aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/kprobetrace.txt57
-rw-r--r--arch/ia64/kernel/kprobes.c2
-rw-r--r--arch/powerpc/kernel/perf_callchain.c3
-rw-r--r--arch/powerpc/kernel/perf_event.c10
-rw-r--r--arch/sh/kernel/perf_callchain.c3
-rw-r--r--arch/sparc/kernel/perf_event.c10
-rw-r--r--arch/x86/include/asm/alternative.h5
-rw-r--r--arch/x86/include/asm/debugreg.h3
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/perf_event.h16
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/kernel/alternative.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1854
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c416
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c971
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c157
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c5
-rw-r--r--arch/x86/kernel/hw_breakpoint.c10
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/ptrace.c24
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--include/linux/bitops.h29
-rw-r--r--include/linux/ftrace.h7
-rw-r--r--include/linux/ftrace_event.h20
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/perf_event.h55
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/trace/events/lock.h29
-rw-r--r--include/trace/ftrace.h60
-rw-r--r--include/trace/syscall.h4
-rw-r--r--init/Kconfig13
-rw-r--r--kernel/kprobes.c33
-rw-r--r--kernel/perf_event.c627
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c54
-rw-r--r--kernel/trace/trace_event_profile.c52
-rw-r--r--kernel/trace/trace_events_filter.c4
-rw-r--r--kernel/trace/trace_kprobe.c196
-rw-r--r--kernel/trace/trace_syscalls.c76
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/perf-archive.txt22
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt33
-rw-r--r--tools/perf/Documentation/perf-probe.txt20
-rw-r--r--tools/perf/Documentation/perf-top.txt2
-rw-r--r--tools/perf/Documentation/perf-trace-perl.txt2
-rw-r--r--tools/perf/Documentation/perf-trace-python.txt625
-rw-r--r--tools/perf/Documentation/perf-trace.txt15
-rw-r--r--tools/perf/Documentation/perf.txt2
-rw-r--r--tools/perf/Makefile51
-rw-r--r--tools/perf/builtin-annotate.c240
-rw-r--r--tools/perf/builtin-buildid-cache.c133
-rw-r--r--tools/perf/builtin-buildid-list.c31
-rw-r--r--tools/perf/builtin-diff.c74
-rw-r--r--tools/perf/builtin-help.c5
-rw-r--r--tools/perf/builtin-kmem.c48
-rw-r--r--tools/perf/builtin-lock.c678
-rw-r--r--tools/perf/builtin-probe.c94
-rw-r--r--tools/perf/builtin-record.c50
-rw-r--r--tools/perf/builtin-report.c58
-rw-r--r--tools/perf/builtin-sched.c32
-rw-r--r--tools/perf/builtin-stat.c106
-rw-r--r--tools/perf/builtin-timechart.c25
-rw-r--r--tools/perf/builtin-top.c109
-rw-r--r--tools/perf/builtin-trace.c34
-rw-r--r--tools/perf/builtin.h2
-rw-r--r--tools/perf/command-list.txt2
-rw-r--r--tools/perf/design.txt8
-rw-r--r--tools/perf/perf-archive.sh32
-rw-r--r--tools/perf/perf.c25
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c5
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs3
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm2
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-record7
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-report6
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-record2
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report4
-rw-r--r--tools/perf/scripts/perl/failed-syscalls.pl38
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c88
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py91
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py25
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report4
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report4
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report4
-rw-r--r--tools/perf/scripts/python/check-perf-trace.py83
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py68
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py64
-rw-r--r--tools/perf/scripts/python/syscall-counts.py58
-rw-r--r--tools/perf/util/build-id.c39
-rw-r--r--tools/perf/util/build-id.h8
-rw-r--r--tools/perf/util/data_map.c252
-rw-r--r--tools/perf/util/debug.c1
-rw-r--r--tools/perf/util/debugfs.c17
-rw-r--r--tools/perf/util/debugfs.h2
-rw-r--r--tools/perf/util/event.c220
-rw-r--r--tools/perf/util/event.h79
-rw-r--r--tools/perf/util/header.c284
-rw-r--r--tools/perf/util/header.h9
-rw-r--r--tools/perf/util/include/linux/hash.h5
-rw-r--r--tools/perf/util/include/linux/kernel.h1
-rw-r--r--tools/perf/util/map.c52
-rw-r--r--tools/perf/util/map.h94
-rw-r--r--tools/perf/util/parse-events.c48
-rw-r--r--tools/perf/util/probe-event.c105
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/probe-finder.c203
-rw-r--r--tools/perf/util/probe-finder.h33
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c (renamed from tools/perf/util/trace-event-perl.c)115
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c573
-rw-r--r--tools/perf/util/session.c431
-rw-r--r--tools/perf/util/session.h55
-rw-r--r--tools/perf/util/string.c65
-rw-r--r--tools/perf/util/symbol.c529
-rw-r--r--tools/perf/util/symbol.h52
-rw-r--r--tools/perf/util/thread.c52
-rw-r--r--tools/perf/util/thread.h24
-rw-r--r--tools/perf/util/trace-event-info.c64
-rw-r--r--tools/perf/util/trace-event-parse.c24
-rw-r--r--tools/perf/util/trace-event-perl.h55
-rw-r--r--tools/perf/util/trace-event-read.c18
-rw-r--r--tools/perf/util/trace-event-scripting.c167
-rw-r--r--tools/perf/util/trace-event.h10
-rw-r--r--tools/perf/util/util.c94
-rw-r--r--tools/perf/util/util.h3
-rw-r--r--tools/perf/util/values.c1
131 files changed, 8460 insertions, 3335 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 47aabeebbdf6..a9100b28eb84 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -24,6 +24,7 @@ Synopsis of kprobe_events
24------------------------- 24-------------------------
25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe 25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe 26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
27 -:[GRP/]EVENT : Clear a probe
27 28
28 GRP : Group name. If omitted, use "kprobes" for it. 29 GRP : Group name. If omitted, use "kprobes" for it.
29 EVENT : Event name. If omitted, the event name is generated 30 EVENT : Event name. If omitted, the event name is generated
@@ -37,15 +38,12 @@ Synopsis of kprobe_events
37 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) 38 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
38 $stackN : Fetch Nth entry of stack (N >= 0) 39 $stackN : Fetch Nth entry of stack (N >= 0)
39 $stack : Fetch stack address. 40 $stack : Fetch stack address.
40 $argN : Fetch function argument. (N >= 0)(*) 41 $retval : Fetch return value.(*)
41 $retval : Fetch return value.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
43 NAME=FETCHARG: Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
44 44
45 (*) aN may not correct on asmlinkaged functions and at the middle of 45 (*) only for return probe.
46 function body. 46 (**) this is useful for fetching a field of data structures.
47 (**) only for return probe.
48 (***) this is useful for fetching a field of data structures.
49 47
50 48
51Per-Probe Event Filtering 49Per-Probe Event Filtering
@@ -82,13 +80,16 @@ Usage examples
82To add a probe as a new event, write a new definition to kprobe_events 80To add a probe as a new event, write a new definition to kprobe_events
83as below. 81as below.
84 82
85 echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events 83 echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
86 84
87 This sets a kprobe on the top of do_sys_open() function with recording 85 This sets a kprobe on the top of do_sys_open() function with recording
881st to 4th arguments as "myprobe" event. As this example shows, users can 861st to 4th arguments as "myprobe" event. Note, which register/stack entry is
89choose more familiar names for each arguments. 87assigned to each function argument depends on arch-specific ABI. If you unsure
88the ABI, please try to use probe subcommand of perf-tools (you can find it
89under tools/perf/).
90As this example shows, users can choose more familiar names for each arguments.
90 91
91 echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events 92 echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
92 93
93 This sets a kretprobe on the return point of do_sys_open() function with 94 This sets a kretprobe on the return point of do_sys_open() function with
94recording return value as "myretprobe" event. 95recording return value as "myretprobe" event.
@@ -97,23 +98,24 @@ recording return value as "myretprobe" event.
97 98
98 cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format 99 cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
99name: myprobe 100name: myprobe
100ID: 75 101ID: 780
101format: 102format:
102 field:unsigned short common_type; offset:0; size:2; 103 field:unsigned short common_type; offset:0; size:2; signed:0;
103 field:unsigned char common_flags; offset:2; size:1; 104 field:unsigned char common_flags; offset:2; size:1; signed:0;
104 field:unsigned char common_preempt_count; offset:3; size:1; 105 field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
105 field:int common_pid; offset:4; size:4; 106 field:int common_pid; offset:4; size:4; signed:1;
106 field:int common_tgid; offset:8; size:4; 107 field:int common_lock_depth; offset:8; size:4; signed:1;
107 108
108 field: unsigned long ip; offset:16;tsize:8; 109 field:unsigned long __probe_ip; offset:12; size:4; signed:0;
109 field: int nargs; offset:24;tsize:4; 110 field:int __probe_nargs; offset:16; size:4; signed:1;
110 field: unsigned long dfd; offset:32;tsize:8; 111 field:unsigned long dfd; offset:20; size:4; signed:0;
111 field: unsigned long filename; offset:40;tsize:8; 112 field:unsigned long filename; offset:24; size:4; signed:0;
112 field: unsigned long flags; offset:48;tsize:8; 113 field:unsigned long flags; offset:28; size:4; signed:0;
113 field: unsigned long mode; offset:56;tsize:8; 114 field:unsigned long mode; offset:32; size:4; signed:0;
114 115
115print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
116 116
117print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
118REC->dfd, REC->filename, REC->flags, REC->mode
117 119
118 You can see that the event has 4 arguments as in the expressions you specified. 120 You can see that the event has 4 arguments as in the expressions you specified.
119 121
@@ -121,6 +123,12 @@ print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, R
121 123
122 This clears all probe points. 124 This clears all probe points.
123 125
126 Or,
127
128 echo -:myprobe >> kprobe_events
129
130 This clears probe points selectively.
131
124 Right after definition, each event is disabled by default. For tracing these 132 Right after definition, each event is disabled by default. For tracing these
125events, you need to enable it. 133events, you need to enable it.
126 134
@@ -146,4 +154,3 @@ events, you need to enable it.
146returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel 154returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
147returns from do_sys_open to sys_open+0x1b). 155returns from do_sys_open to sys_open+0x1b).
148 156
149
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 9adac441ac9b..7026b29e277a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
870 return 1; 870 return 1;
871 871
872ss_probe: 872ss_probe:
873#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) 873#if !defined(CONFIG_PREEMPT)
874 if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { 874 if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
875 /* Boost up -- we can execute copied instructions directly */ 875 /* Boost up -- we can execute copied instructions directly */
876 ia64_psr(regs)->ri = p->ainsn.slot; 876 ia64_psr(regs)->ri = p->ainsn.slot;
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a3c11cac3d71..95ad9dad298e 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -495,9 +495,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
495 495
496 entry->nr = 0; 496 entry->nr = 0;
497 497
498 if (current->pid == 0) /* idle task? */
499 return entry;
500
501 if (!user_mode(regs)) { 498 if (!user_mode(regs)) {
502 perf_callchain_kernel(regs, entry); 499 perf_callchain_kernel(regs, entry);
503 if (current->mm) 500 if (current->mm)
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 1eb85fbf53a5..b6cf8f1f4d35 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -718,10 +718,10 @@ static int collect_events(struct perf_event *group, int max_count,
718 return n; 718 return n;
719} 719}
720 720
721static void event_sched_in(struct perf_event *event, int cpu) 721static void event_sched_in(struct perf_event *event)
722{ 722{
723 event->state = PERF_EVENT_STATE_ACTIVE; 723 event->state = PERF_EVENT_STATE_ACTIVE;
724 event->oncpu = cpu; 724 event->oncpu = smp_processor_id();
725 event->tstamp_running += event->ctx->time - event->tstamp_stopped; 725 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
726 if (is_software_event(event)) 726 if (is_software_event(event))
727 event->pmu->enable(event); 727 event->pmu->enable(event);
@@ -735,7 +735,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
735 */ 735 */
736int hw_perf_group_sched_in(struct perf_event *group_leader, 736int hw_perf_group_sched_in(struct perf_event *group_leader,
737 struct perf_cpu_context *cpuctx, 737 struct perf_cpu_context *cpuctx,
738 struct perf_event_context *ctx, int cpu) 738 struct perf_event_context *ctx)
739{ 739{
740 struct cpu_hw_events *cpuhw; 740 struct cpu_hw_events *cpuhw;
741 long i, n, n0; 741 long i, n, n0;
@@ -766,10 +766,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
766 cpuhw->event[i]->hw.config = cpuhw->events[i]; 766 cpuhw->event[i]->hw.config = cpuhw->events[i];
767 cpuctx->active_oncpu += n; 767 cpuctx->active_oncpu += n;
768 n = 1; 768 n = 1;
769 event_sched_in(group_leader, cpu); 769 event_sched_in(group_leader);
770 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { 770 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
771 if (sub->state != PERF_EVENT_STATE_OFF) { 771 if (sub->state != PERF_EVENT_STATE_OFF) {
772 event_sched_in(sub, cpu); 772 event_sched_in(sub);
773 ++n; 773 ++n;
774 } 774 }
775 } 775 }
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index 24ea837eac5b..a9dd3abde28e 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -68,9 +68,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
68 68
69 is_user = user_mode(regs); 69 is_user = user_mode(regs);
70 70
71 if (!current || current->pid == 0)
72 return;
73
74 if (is_user && current->state != TASK_RUNNING) 71 if (is_user && current->state != TASK_RUNNING)
75 return; 72 return;
76 73
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e856456ec02f..9f2b2bac8b2b 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -980,10 +980,10 @@ static int collect_events(struct perf_event *group, int max_count,
980 return n; 980 return n;
981} 981}
982 982
983static void event_sched_in(struct perf_event *event, int cpu) 983static void event_sched_in(struct perf_event *event)
984{ 984{
985 event->state = PERF_EVENT_STATE_ACTIVE; 985 event->state = PERF_EVENT_STATE_ACTIVE;
986 event->oncpu = cpu; 986 event->oncpu = smp_processor_id();
987 event->tstamp_running += event->ctx->time - event->tstamp_stopped; 987 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
988 if (is_software_event(event)) 988 if (is_software_event(event))
989 event->pmu->enable(event); 989 event->pmu->enable(event);
@@ -991,7 +991,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
991 991
992int hw_perf_group_sched_in(struct perf_event *group_leader, 992int hw_perf_group_sched_in(struct perf_event *group_leader,
993 struct perf_cpu_context *cpuctx, 993 struct perf_cpu_context *cpuctx,
994 struct perf_event_context *ctx, int cpu) 994 struct perf_event_context *ctx)
995{ 995{
996 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 996 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
997 struct perf_event *sub; 997 struct perf_event *sub;
@@ -1015,10 +1015,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
1015 1015
1016 cpuctx->active_oncpu += n; 1016 cpuctx->active_oncpu += n;
1017 n = 1; 1017 n = 1;
1018 event_sched_in(group_leader, cpu); 1018 event_sched_in(group_leader);
1019 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { 1019 list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
1020 if (sub->state != PERF_EVENT_STATE_OFF) { 1020 if (sub->state != PERF_EVENT_STATE_OFF) {
1021 event_sched_in(sub, cpu); 1021 event_sched_in(sub);
1022 n++; 1022 n++;
1023 } 1023 }
1024 } 1024 }
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..ac80b7d70014 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
65 void *text, void *text_end); 65 void *text, void *text_end);
66extern void alternatives_smp_module_del(struct module *mod); 66extern void alternatives_smp_module_del(struct module *mod);
67extern void alternatives_smp_switch(int smp); 67extern void alternatives_smp_switch(int smp);
68extern int alternatives_text_reserved(void *start, void *end);
68#else 69#else
69static inline void alternatives_smp_module_add(struct module *mod, char *name, 70static inline void alternatives_smp_module_add(struct module *mod, char *name,
70 void *locks, void *locks_end, 71 void *locks, void *locks_end,
71 void *text, void *text_end) {} 72 void *text, void *text_end) {}
72static inline void alternatives_smp_module_del(struct module *mod) {} 73static inline void alternatives_smp_module_del(struct module *mod) {}
73static inline void alternatives_smp_switch(int smp) {} 74static inline void alternatives_smp_switch(int smp) {}
75static inline int alternatives_text_reserved(void *start, void *end)
76{
77 return 0;
78}
74#endif /* CONFIG_SMP */ 79#endif /* CONFIG_SMP */
75 80
76/* alternative assembly primitive: */ 81/* alternative assembly primitive: */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76b531e..b81002f23614 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
14 which debugging register was responsible for the trap. The other bits 14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */ 15 are either reserved or not of interest to us. */
16 16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
17#define DR_TRAP0 (0x1) /* db0 */ 20#define DR_TRAP0 (0x1) /* db0 */
18#define DR_TRAP1 (0x2) /* db1 */ 21#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 22#define DR_TRAP2 (0x4) /* db2 */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20extern int nmi_watchdog_enabled; 20extern int nmi_watchdog_enabled;
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int avail_to_resrv_perfctr_nmi(unsigned int);
23extern int reserve_perfctr_nmi(unsigned int); 22extern int reserve_perfctr_nmi(unsigned int);
24extern void release_perfctr_nmi(unsigned int); 23extern void release_perfctr_nmi(unsigned int);
25extern int reserve_evntsel_nmi(unsigned int); 24extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1380367dabd9..befd172c82ad 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -27,7 +27,14 @@
27/* 27/*
28 * Includes eventsel and unit mask as well: 28 * Includes eventsel and unit mask as well:
29 */ 29 */
30#define ARCH_PERFMON_EVENT_MASK 0xffff 30
31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
35#define INTEL_ARCH_INV_MASK 0x00800000ULL
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
31 38
32/* 39/*
33 * filter mask to validate fixed counter events. 40 * filter mask to validate fixed counter events.
@@ -38,7 +45,12 @@
38 * The other filters are supported by fixed counters. 45 * The other filters are supported by fixed counters.
39 * The any-thread option is supported starting with v3. 46 * The any-thread option is supported starting with v3.
40 */ 47 */
41#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVTSEL_MASK)
42 54
43#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
44#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f680321..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
274 return 0; 274 return 0;
275} 275}
276 276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
281/* 277/*
282 * These are defined as per linux/ptrace.h, which see. 278 * These are defined as per linux/ptrace.h, which see.
283 */ 279 */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e89122a42f..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
3 3
4extern int kstack_depth_to_print; 4extern int kstack_depth_to_print;
5 5
6int x86_is_stack_id(int id, char *name);
7
8struct thread_info; 6struct thread_info;
9struct stacktrace_ops; 7struct stacktrace_ops;
10 8
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..e63b80e5861c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -390,6 +390,24 @@ void alternatives_smp_switch(int smp)
390 mutex_unlock(&smp_alt); 390 mutex_unlock(&smp_alt);
391} 391}
392 392
393/* Return 1 if the address range is reserved for smp-alternatives */
394int alternatives_text_reserved(void *start, void *end)
395{
396 struct smp_alt_module *mod;
397 u8 **ptr;
398 u8 *text_start = start;
399 u8 *text_end = end;
400
401 list_for_each_entry(mod, &smp_alt_modules, next) {
402 if (mod->text > text_end || mod->text_end < text_start)
403 continue;
404 for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
405 if (text_start <= *ptr && text_end >= *ptr)
406 return 1;
407 }
408
409 return 0;
410}
393#endif 411#endif
394 412
395#ifdef CONFIG_PARAVIRT 413#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8c1c07073ccc..641ccb9dddbc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
10 * 11 *
11 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
12 */ 13 */
@@ -22,6 +23,7 @@
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h>
25 27
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
@@ -68,26 +70,59 @@ struct debug_store {
68 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 71};
70 72
73struct event_constraint {
74 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64[1];
77 };
78 int code;
79 int cmask;
80 int weight;
81};
82
83struct amd_nb {
84 int nb_id; /* NorthBridge id */
85 int refcnt; /* reference count */
86 struct perf_event *owners[X86_PMC_IDX_MAX];
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88};
89
71struct cpu_hw_events { 90struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX]; 91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 93 unsigned long interrupts;
76 int enabled; 94 int enabled;
77 struct debug_store *ds; 95 struct debug_store *ds;
78};
79 96
80struct event_constraint { 97 int n_events;
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 98 int n_added;
82 int code; 99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
102 struct amd_nb *amd_nb;
83}; 103};
84 104
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } 106 { .idxmsk64[0] = (n) }, \
107 .code = (c), \
108 .cmask = (m), \
109 .weight = (w), \
110}
111
112#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
87 114
88#define for_each_event_constraint(e, c) \ 115#define INTEL_EVENT_CONSTRAINT(c, n) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++) 116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
90 117
118#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
120
121#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0)
123
124#define for_each_event_constraint(e, c) \
125 for ((e) = (c); (e)->cmask; (e)++)
91 126
92/* 127/*
93 * struct x86_pmu - generic x86 pmu 128 * struct x86_pmu - generic x86 pmu
@@ -114,8 +149,14 @@ struct x86_pmu {
114 u64 intel_ctrl; 149 u64 intel_ctrl;
115 void (*enable_bts)(u64 config); 150 void (*enable_bts)(u64 config);
116 void (*disable_bts)(void); 151 void (*disable_bts)(void);
117 int (*get_event_idx)(struct cpu_hw_events *cpuc, 152
118 struct hw_perf_event *hwc); 153 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event);
156
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event);
159 struct event_constraint *event_constraints;
119}; 160};
120 161
121static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
@@ -124,111 +165,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 .enabled = 1, 165 .enabled = 1,
125}; 166};
126 167
127static const struct event_constraint *event_constraints; 168static int x86_perf_event_set_period(struct perf_event *event,
128 169 struct hw_perf_event *hwc, int idx);
129/*
130 * Not sure about some of these
131 */
132static const u64 p6_perfmon_event_map[] =
133{
134 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
135 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
136 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
137 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
138 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
139 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
140 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
141};
142
143static u64 p6_pmu_event_map(int hw_event)
144{
145 return p6_perfmon_event_map[hw_event];
146}
147
148/*
149 * Event setting that is specified not to count anything.
150 * We use this to effectively disable a counter.
151 *
152 * L2_RQSTS with 0 MESI unit mask.
153 */
154#define P6_NOP_EVENT 0x0000002EULL
155
156static u64 p6_pmu_raw_event(u64 hw_event)
157{
158#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
159#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
160#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
161#define P6_EVNTSEL_INV_MASK 0x00800000ULL
162#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
163
164#define P6_EVNTSEL_MASK \
165 (P6_EVNTSEL_EVENT_MASK | \
166 P6_EVNTSEL_UNIT_MASK | \
167 P6_EVNTSEL_EDGE_MASK | \
168 P6_EVNTSEL_INV_MASK | \
169 P6_EVNTSEL_REG_MASK)
170
171 return hw_event & P6_EVNTSEL_MASK;
172}
173
174static const struct event_constraint intel_p6_event_constraints[] =
175{
176 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
177 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
178 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
179 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
180 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
181 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
182 EVENT_CONSTRAINT_END
183};
184
185/*
186 * Intel PerfMon v3. Used on Core2 and later.
187 */
188static const u64 intel_perfmon_event_map[] =
189{
190 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
191 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
192 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
193 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
194 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
195 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
196 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
197};
198
199static const struct event_constraint intel_core_event_constraints[] =
200{
201 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
202 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
203 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
204 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
205 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
206 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
207 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
208 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
209 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
210 EVENT_CONSTRAINT_END
211};
212
213static const struct event_constraint intel_nehalem_event_constraints[] =
214{
215 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
216 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
217 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
218 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
219 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
220 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
221 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
222 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
224 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
225 EVENT_CONSTRAINT_END
226};
227
228static u64 intel_pmu_event_map(int hw_event)
229{
230 return intel_perfmon_event_map[hw_event];
231}
232 170
233/* 171/*
234 * Generalized hw caching related hw_event table, filled 172 * Generalized hw caching related hw_event table, filled
@@ -245,424 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
245 [PERF_COUNT_HW_CACHE_OP_MAX] 183 [PERF_COUNT_HW_CACHE_OP_MAX]
246 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 184 [PERF_COUNT_HW_CACHE_RESULT_MAX];
247 185
248static __initconst u64 nehalem_hw_cache_event_ids
249 [PERF_COUNT_HW_CACHE_MAX]
250 [PERF_COUNT_HW_CACHE_OP_MAX]
251 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
252{
253 [ C(L1D) ] = {
254 [ C(OP_READ) ] = {
255 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
256 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
257 },
258 [ C(OP_WRITE) ] = {
259 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
260 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
261 },
262 [ C(OP_PREFETCH) ] = {
263 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
264 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
265 },
266 },
267 [ C(L1I ) ] = {
268 [ C(OP_READ) ] = {
269 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
270 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
271 },
272 [ C(OP_WRITE) ] = {
273 [ C(RESULT_ACCESS) ] = -1,
274 [ C(RESULT_MISS) ] = -1,
275 },
276 [ C(OP_PREFETCH) ] = {
277 [ C(RESULT_ACCESS) ] = 0x0,
278 [ C(RESULT_MISS) ] = 0x0,
279 },
280 },
281 [ C(LL ) ] = {
282 [ C(OP_READ) ] = {
283 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
284 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
285 },
286 [ C(OP_WRITE) ] = {
287 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
288 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
289 },
290 [ C(OP_PREFETCH) ] = {
291 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
292 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
293 },
294 },
295 [ C(DTLB) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
298 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
299 },
300 [ C(OP_WRITE) ] = {
301 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
302 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
303 },
304 [ C(OP_PREFETCH) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0,
306 [ C(RESULT_MISS) ] = 0x0,
307 },
308 },
309 [ C(ITLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
312 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
313 },
314 [ C(OP_WRITE) ] = {
315 [ C(RESULT_ACCESS) ] = -1,
316 [ C(RESULT_MISS) ] = -1,
317 },
318 [ C(OP_PREFETCH) ] = {
319 [ C(RESULT_ACCESS) ] = -1,
320 [ C(RESULT_MISS) ] = -1,
321 },
322 },
323 [ C(BPU ) ] = {
324 [ C(OP_READ) ] = {
325 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
326 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static __initconst u64 core2_hw_cache_event_ids
340 [PERF_COUNT_HW_CACHE_MAX]
341 [PERF_COUNT_HW_CACHE_OP_MAX]
342 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
343{
344 [ C(L1D) ] = {
345 [ C(OP_READ) ] = {
346 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
347 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
348 },
349 [ C(OP_WRITE) ] = {
350 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
351 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
352 },
353 [ C(OP_PREFETCH) ] = {
354 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
355 [ C(RESULT_MISS) ] = 0,
356 },
357 },
358 [ C(L1I ) ] = {
359 [ C(OP_READ) ] = {
360 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
361 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
362 },
363 [ C(OP_WRITE) ] = {
364 [ C(RESULT_ACCESS) ] = -1,
365 [ C(RESULT_MISS) ] = -1,
366 },
367 [ C(OP_PREFETCH) ] = {
368 [ C(RESULT_ACCESS) ] = 0,
369 [ C(RESULT_MISS) ] = 0,
370 },
371 },
372 [ C(LL ) ] = {
373 [ C(OP_READ) ] = {
374 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
375 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
376 },
377 [ C(OP_WRITE) ] = {
378 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
379 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
380 },
381 [ C(OP_PREFETCH) ] = {
382 [ C(RESULT_ACCESS) ] = 0,
383 [ C(RESULT_MISS) ] = 0,
384 },
385 },
386 [ C(DTLB) ] = {
387 [ C(OP_READ) ] = {
388 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
389 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
390 },
391 [ C(OP_WRITE) ] = {
392 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
393 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
394 },
395 [ C(OP_PREFETCH) ] = {
396 [ C(RESULT_ACCESS) ] = 0,
397 [ C(RESULT_MISS) ] = 0,
398 },
399 },
400 [ C(ITLB) ] = {
401 [ C(OP_READ) ] = {
402 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
403 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
404 },
405 [ C(OP_WRITE) ] = {
406 [ C(RESULT_ACCESS) ] = -1,
407 [ C(RESULT_MISS) ] = -1,
408 },
409 [ C(OP_PREFETCH) ] = {
410 [ C(RESULT_ACCESS) ] = -1,
411 [ C(RESULT_MISS) ] = -1,
412 },
413 },
414 [ C(BPU ) ] = {
415 [ C(OP_READ) ] = {
416 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
417 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
418 },
419 [ C(OP_WRITE) ] = {
420 [ C(RESULT_ACCESS) ] = -1,
421 [ C(RESULT_MISS) ] = -1,
422 },
423 [ C(OP_PREFETCH) ] = {
424 [ C(RESULT_ACCESS) ] = -1,
425 [ C(RESULT_MISS) ] = -1,
426 },
427 },
428};
429
430static __initconst u64 atom_hw_cache_event_ids
431 [PERF_COUNT_HW_CACHE_MAX]
432 [PERF_COUNT_HW_CACHE_OP_MAX]
433 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
434{
435 [ C(L1D) ] = {
436 [ C(OP_READ) ] = {
437 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
438 [ C(RESULT_MISS) ] = 0,
439 },
440 [ C(OP_WRITE) ] = {
441 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
442 [ C(RESULT_MISS) ] = 0,
443 },
444 [ C(OP_PREFETCH) ] = {
445 [ C(RESULT_ACCESS) ] = 0x0,
446 [ C(RESULT_MISS) ] = 0,
447 },
448 },
449 [ C(L1I ) ] = {
450 [ C(OP_READ) ] = {
451 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
452 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
453 },
454 [ C(OP_WRITE) ] = {
455 [ C(RESULT_ACCESS) ] = -1,
456 [ C(RESULT_MISS) ] = -1,
457 },
458 [ C(OP_PREFETCH) ] = {
459 [ C(RESULT_ACCESS) ] = 0,
460 [ C(RESULT_MISS) ] = 0,
461 },
462 },
463 [ C(LL ) ] = {
464 [ C(OP_READ) ] = {
465 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
466 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
470 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
471 },
472 [ C(OP_PREFETCH) ] = {
473 [ C(RESULT_ACCESS) ] = 0,
474 [ C(RESULT_MISS) ] = 0,
475 },
476 },
477 [ C(DTLB) ] = {
478 [ C(OP_READ) ] = {
479 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
480 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
481 },
482 [ C(OP_WRITE) ] = {
483 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
484 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
485 },
486 [ C(OP_PREFETCH) ] = {
487 [ C(RESULT_ACCESS) ] = 0,
488 [ C(RESULT_MISS) ] = 0,
489 },
490 },
491 [ C(ITLB) ] = {
492 [ C(OP_READ) ] = {
493 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
494 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
495 },
496 [ C(OP_WRITE) ] = {
497 [ C(RESULT_ACCESS) ] = -1,
498 [ C(RESULT_MISS) ] = -1,
499 },
500 [ C(OP_PREFETCH) ] = {
501 [ C(RESULT_ACCESS) ] = -1,
502 [ C(RESULT_MISS) ] = -1,
503 },
504 },
505 [ C(BPU ) ] = {
506 [ C(OP_READ) ] = {
507 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
508 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
509 },
510 [ C(OP_WRITE) ] = {
511 [ C(RESULT_ACCESS) ] = -1,
512 [ C(RESULT_MISS) ] = -1,
513 },
514 [ C(OP_PREFETCH) ] = {
515 [ C(RESULT_ACCESS) ] = -1,
516 [ C(RESULT_MISS) ] = -1,
517 },
518 },
519};
520
521static u64 intel_pmu_raw_event(u64 hw_event)
522{
523#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
524#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
525#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
526#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
527#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
528
529#define CORE_EVNTSEL_MASK \
530 (CORE_EVNTSEL_EVENT_MASK | \
531 CORE_EVNTSEL_UNIT_MASK | \
532 CORE_EVNTSEL_EDGE_MASK | \
533 CORE_EVNTSEL_INV_MASK | \
534 CORE_EVNTSEL_REG_MASK)
535
536 return hw_event & CORE_EVNTSEL_MASK;
537}
538
539static __initconst u64 amd_hw_cache_event_ids
540 [PERF_COUNT_HW_CACHE_MAX]
541 [PERF_COUNT_HW_CACHE_OP_MAX]
542 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
543{
544 [ C(L1D) ] = {
545 [ C(OP_READ) ] = {
546 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
547 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
548 },
549 [ C(OP_WRITE) ] = {
550 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 [ C(RESULT_MISS) ] = 0,
552 },
553 [ C(OP_PREFETCH) ] = {
554 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
555 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
556 },
557 },
558 [ C(L1I ) ] = {
559 [ C(OP_READ) ] = {
560 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
561 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
562 },
563 [ C(OP_WRITE) ] = {
564 [ C(RESULT_ACCESS) ] = -1,
565 [ C(RESULT_MISS) ] = -1,
566 },
567 [ C(OP_PREFETCH) ] = {
568 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 [ C(RESULT_MISS) ] = 0,
570 },
571 },
572 [ C(LL ) ] = {
573 [ C(OP_READ) ] = {
574 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
575 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
576 },
577 [ C(OP_WRITE) ] = {
578 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
579 [ C(RESULT_MISS) ] = 0,
580 },
581 [ C(OP_PREFETCH) ] = {
582 [ C(RESULT_ACCESS) ] = 0,
583 [ C(RESULT_MISS) ] = 0,
584 },
585 },
586 [ C(DTLB) ] = {
587 [ C(OP_READ) ] = {
588 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
589 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
590 },
591 [ C(OP_WRITE) ] = {
592 [ C(RESULT_ACCESS) ] = 0,
593 [ C(RESULT_MISS) ] = 0,
594 },
595 [ C(OP_PREFETCH) ] = {
596 [ C(RESULT_ACCESS) ] = 0,
597 [ C(RESULT_MISS) ] = 0,
598 },
599 },
600 [ C(ITLB) ] = {
601 [ C(OP_READ) ] = {
602 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
603 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
604 },
605 [ C(OP_WRITE) ] = {
606 [ C(RESULT_ACCESS) ] = -1,
607 [ C(RESULT_MISS) ] = -1,
608 },
609 [ C(OP_PREFETCH) ] = {
610 [ C(RESULT_ACCESS) ] = -1,
611 [ C(RESULT_MISS) ] = -1,
612 },
613 },
614 [ C(BPU ) ] = {
615 [ C(OP_READ) ] = {
616 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
617 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
618 },
619 [ C(OP_WRITE) ] = {
620 [ C(RESULT_ACCESS) ] = -1,
621 [ C(RESULT_MISS) ] = -1,
622 },
623 [ C(OP_PREFETCH) ] = {
624 [ C(RESULT_ACCESS) ] = -1,
625 [ C(RESULT_MISS) ] = -1,
626 },
627 },
628};
629
630/*
631 * AMD Performance Monitor K7 and later.
632 */
633static const u64 amd_perfmon_event_map[] =
634{
635 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
636 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
637 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
638 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
639 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
640 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
641};
642
643static u64 amd_pmu_event_map(int hw_event)
644{
645 return amd_perfmon_event_map[hw_event];
646}
647
648static u64 amd_pmu_raw_event(u64 hw_event)
649{
650#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
651#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
652#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
653#define K7_EVNTSEL_INV_MASK 0x000800000ULL
654#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
655
656#define K7_EVNTSEL_MASK \
657 (K7_EVNTSEL_EVENT_MASK | \
658 K7_EVNTSEL_UNIT_MASK | \
659 K7_EVNTSEL_EDGE_MASK | \
660 K7_EVNTSEL_INV_MASK | \
661 K7_EVNTSEL_REG_MASK)
662
663 return hw_event & K7_EVNTSEL_MASK;
664}
665
666/* 186/*
667 * Propagate event elapsed time into the generic event. 187 * Propagate event elapsed time into the generic event.
668 * Can only be executed on the CPU where the event is active. 188 * Can only be executed on the CPU where the event is active.
@@ -914,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
914 return 0; 434 return 0;
915} 435}
916 436
917static void intel_pmu_enable_bts(u64 config)
918{
919 unsigned long debugctlmsr;
920
921 debugctlmsr = get_debugctlmsr();
922
923 debugctlmsr |= X86_DEBUGCTL_TR;
924 debugctlmsr |= X86_DEBUGCTL_BTS;
925 debugctlmsr |= X86_DEBUGCTL_BTINT;
926
927 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
928 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
929
930 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
931 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
932
933 update_debugctlmsr(debugctlmsr);
934}
935
936static void intel_pmu_disable_bts(void)
937{
938 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 unsigned long debugctlmsr;
940
941 if (!cpuc->ds)
942 return;
943
944 debugctlmsr = get_debugctlmsr();
945
946 debugctlmsr &=
947 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
948 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
949
950 update_debugctlmsr(debugctlmsr);
951}
952
953/* 437/*
954 * Setup the hardware configuration for a given attr_type 438 * Setup the hardware configuration for a given attr_type
955 */ 439 */
@@ -988,6 +472,8 @@ static int __hw_perf_event_init(struct perf_event *event)
988 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 472 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
989 473
990 hwc->idx = -1; 474 hwc->idx = -1;
475 hwc->last_cpu = -1;
476 hwc->last_tag = ~0ULL;
991 477
992 /* 478 /*
993 * Count user and OS events unless requested not to. 479 * Count user and OS events unless requested not to.
@@ -1056,216 +542,323 @@ static int __hw_perf_event_init(struct perf_event *event)
1056 return 0; 542 return 0;
1057} 543}
1058 544
1059static void p6_pmu_disable_all(void) 545static void x86_pmu_disable_all(void)
1060{ 546{
1061 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 547 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062 u64 val; 548 int idx;
1063
1064 if (!cpuc->enabled)
1065 return;
1066 549
1067 cpuc->enabled = 0; 550 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1068 barrier(); 551 u64 val;
1069 552
1070 /* p6 only has one enable register */ 553 if (!test_bit(idx, cpuc->active_mask))
1071 rdmsrl(MSR_P6_EVNTSEL0, val); 554 continue;
1072 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 555 rdmsrl(x86_pmu.eventsel + idx, val);
1073 wrmsrl(MSR_P6_EVNTSEL0, val); 556 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
557 continue;
558 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
559 wrmsrl(x86_pmu.eventsel + idx, val);
560 }
1074} 561}
1075 562
1076static void intel_pmu_disable_all(void) 563void hw_perf_disable(void)
1077{ 564{
1078 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 566
567 if (!x86_pmu_initialized())
568 return;
569
1080 if (!cpuc->enabled) 570 if (!cpuc->enabled)
1081 return; 571 return;
1082 572
573 cpuc->n_added = 0;
1083 cpuc->enabled = 0; 574 cpuc->enabled = 0;
1084 barrier(); 575 barrier();
1085 576
1086 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 577 x86_pmu.disable_all();
1087
1088 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1089 intel_pmu_disable_bts();
1090} 578}
1091 579
1092static void amd_pmu_disable_all(void) 580static void x86_pmu_enable_all(void)
1093{ 581{
1094 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 582 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 int idx; 583 int idx;
1096 584
1097 if (!cpuc->enabled)
1098 return;
1099
1100 cpuc->enabled = 0;
1101 /*
1102 * ensure we write the disable before we start disabling the
1103 * events proper, so that amd_pmu_enable_event() does the
1104 * right thing.
1105 */
1106 barrier();
1107
1108 for (idx = 0; idx < x86_pmu.num_events; idx++) { 585 for (idx = 0; idx < x86_pmu.num_events; idx++) {
586 struct perf_event *event = cpuc->events[idx];
1109 u64 val; 587 u64 val;
1110 588
1111 if (!test_bit(idx, cpuc->active_mask)) 589 if (!test_bit(idx, cpuc->active_mask))
1112 continue; 590 continue;
1113 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 591
1114 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 592 val = event->hw.config;
1115 continue; 593 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1116 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 594 wrmsrl(x86_pmu.eventsel + idx, val);
1117 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 } 595 }
1119} 596}
1120 597
1121void hw_perf_disable(void) 598static const struct pmu pmu;
599
600static inline int is_x86_event(struct perf_event *event)
1122{ 601{
1123 if (!x86_pmu_initialized()) 602 return event->pmu == &pmu;
1124 return;
1125 return x86_pmu.disable_all();
1126} 603}
1127 604
1128static void p6_pmu_enable_all(void) 605static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1129{ 606{
1130 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 607 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1131 unsigned long val; 608 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
609 int i, j, w, wmax, num = 0;
610 struct hw_perf_event *hwc;
1132 611
1133 if (cpuc->enabled) 612 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1134 return;
1135 613
1136 cpuc->enabled = 1; 614 for (i = 0; i < n; i++) {
1137 barrier(); 615 constraints[i] =
616 x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
617 }
1138 618
1139 /* p6 only has one enable register */ 619 /*
1140 rdmsrl(MSR_P6_EVNTSEL0, val); 620 * fastpath, try to reuse previous register
1141 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 621 */
1142 wrmsrl(MSR_P6_EVNTSEL0, val); 622 for (i = 0; i < n; i++) {
1143} 623 hwc = &cpuc->event_list[i]->hw;
624 c = constraints[i];
1144 625
1145static void intel_pmu_enable_all(void) 626 /* never assigned */
1146{ 627 if (hwc->idx == -1)
1147 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 628 break;
1148 629
1149 if (cpuc->enabled) 630 /* constraint still honored */
1150 return; 631 if (!test_bit(hwc->idx, c->idxmsk))
632 break;
1151 633
1152 cpuc->enabled = 1; 634 /* not already used */
1153 barrier(); 635 if (test_bit(hwc->idx, used_mask))
636 break;
637
638 set_bit(hwc->idx, used_mask);
639 if (assign)
640 assign[i] = hwc->idx;
641 }
642 if (i == n)
643 goto done;
644
645 /*
646 * begin slow path
647 */
648
649 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
650
651 /*
652 * weight = number of possible counters
653 *
654 * 1 = most constrained, only works on one counter
655 * wmax = least constrained, works on any counter
656 *
657 * assign events to counters starting with most
658 * constrained events.
659 */
660 wmax = x86_pmu.num_events;
661
662 /*
663 * when fixed event counters are present,
664 * wmax is incremented by 1 to account
665 * for one more choice
666 */
667 if (x86_pmu.num_events_fixed)
668 wmax++;
1154 669
1155 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 670 for (w = 1, num = n; num && w <= wmax; w++) {
671 /* for each event */
672 for (i = 0; num && i < n; i++) {
673 c = constraints[i];
674 hwc = &cpuc->event_list[i]->hw;
1156 675
1157 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 676 if (c->weight != w)
1158 struct perf_event *event = 677 continue;
1159 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160 678
1161 if (WARN_ON_ONCE(!event)) 679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1162 return; 680 if (!test_bit(j, used_mask))
681 break;
682 }
683
684 if (j == X86_PMC_IDX_MAX)
685 break;
686
687 set_bit(j, used_mask);
1163 688
1164 intel_pmu_enable_bts(event->hw.config); 689 if (assign)
690 assign[i] = j;
691 num--;
692 }
693 }
694done:
695 /*
696 * scheduling failed or is just a simulation,
697 * free resources if necessary
698 */
699 if (!assign || num) {
700 for (i = 0; i < n; i++) {
701 if (x86_pmu.put_event_constraints)
702 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
703 }
1165 } 704 }
705 return num ? -ENOSPC : 0;
1166} 706}
1167 707
1168static void amd_pmu_enable_all(void) 708/*
709 * dogrp: true if must collect siblings events (group)
710 * returns total number of events and error code
711 */
712static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1169{ 713{
1170 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 714 struct perf_event *event;
1171 int idx; 715 int n, max_count;
1172 716
1173 if (cpuc->enabled) 717 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1174 return;
1175 718
1176 cpuc->enabled = 1; 719 /* current number of events already accepted */
1177 barrier(); 720 n = cpuc->n_events;
1178 721
1179 for (idx = 0; idx < x86_pmu.num_events; idx++) { 722 if (is_x86_event(leader)) {
1180 struct perf_event *event = cpuc->events[idx]; 723 if (n >= max_count)
1181 u64 val; 724 return -ENOSPC;
725 cpuc->event_list[n] = leader;
726 n++;
727 }
728 if (!dogrp)
729 return n;
1182 730
1183 if (!test_bit(idx, cpuc->active_mask)) 731 list_for_each_entry(event, &leader->sibling_list, group_entry) {
732 if (!is_x86_event(event) ||
733 event->state <= PERF_EVENT_STATE_OFF)
1184 continue; 734 continue;
1185 735
1186 val = event->hw.config; 736 if (n >= max_count)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 737 return -ENOSPC;
1188 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 }
1190}
1191 738
1192void hw_perf_enable(void) 739 cpuc->event_list[n] = event;
1193{ 740 n++;
1194 if (!x86_pmu_initialized()) 741 }
1195 return; 742 return n;
1196 x86_pmu.enable_all();
1197} 743}
1198 744
1199static inline u64 intel_pmu_get_status(void) 745static inline void x86_assign_hw_event(struct perf_event *event,
746 struct cpu_hw_events *cpuc, int i)
1200{ 747{
1201 u64 status; 748 struct hw_perf_event *hwc = &event->hw;
1202 749
1203 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 750 hwc->idx = cpuc->assign[i];
751 hwc->last_cpu = smp_processor_id();
752 hwc->last_tag = ++cpuc->tags[i];
1204 753
1205 return status; 754 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
755 hwc->config_base = 0;
756 hwc->event_base = 0;
757 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
758 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
759 /*
760 * We set it so that event_base + idx in wrmsr/rdmsr maps to
761 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
762 */
763 hwc->event_base =
764 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
765 } else {
766 hwc->config_base = x86_pmu.eventsel;
767 hwc->event_base = x86_pmu.perfctr;
768 }
1206} 769}
1207 770
1208static inline void intel_pmu_ack_status(u64 ack) 771static inline int match_prev_assignment(struct hw_perf_event *hwc,
772 struct cpu_hw_events *cpuc,
773 int i)
1209{ 774{
1210 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 775 return hwc->idx == cpuc->assign[i] &&
776 hwc->last_cpu == smp_processor_id() &&
777 hwc->last_tag == cpuc->tags[i];
1211} 778}
1212 779
1213static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 780static void x86_pmu_stop(struct perf_event *event);
1214{
1215 (void)checking_wrmsrl(hwc->config_base + idx,
1216 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217}
1218 781
1219static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 782void hw_perf_enable(void)
1220{ 783{
1221 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 784 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1222} 785 struct perf_event *event;
786 struct hw_perf_event *hwc;
787 int i;
1223 788
1224static inline void 789 if (!x86_pmu_initialized())
1225intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 790 return;
1226{
1227 int idx = __idx - X86_PMC_IDX_FIXED;
1228 u64 ctrl_val, mask;
1229 791
1230 mask = 0xfULL << (idx * 4); 792 if (cpuc->enabled)
793 return;
1231 794
1232 rdmsrl(hwc->config_base, ctrl_val); 795 if (cpuc->n_added) {
1233 ctrl_val &= ~mask; 796 /*
1234 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 797 * apply assignment obtained either from
1235} 798 * hw_perf_group_sched_in() or x86_pmu_enable()
799 *
800 * step1: save events moving to new counters
801 * step2: reprogram moved events into new counters
802 */
803 for (i = 0; i < cpuc->n_events; i++) {
1236 804
1237static inline void 805 event = cpuc->event_list[i];
1238p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 806 hwc = &event->hw;
1239{
1240 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1241 u64 val = P6_NOP_EVENT;
1242 807
1243 if (cpuc->enabled) 808 /*
1244 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 809 * we can avoid reprogramming counter if:
810 * - assigned same counter as last time
811 * - running on same CPU as last time
812 * - no other event has used the counter since
813 */
814 if (hwc->idx == -1 ||
815 match_prev_assignment(hwc, cpuc, i))
816 continue;
1245 817
1246 (void)checking_wrmsrl(hwc->config_base + idx, val); 818 x86_pmu_stop(event);
1247}
1248 819
1249static inline void 820 hwc->idx = -1;
1250intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 821 }
1251{
1252 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1253 intel_pmu_disable_bts();
1254 return;
1255 }
1256 822
1257 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 823 for (i = 0; i < cpuc->n_events; i++) {
1258 intel_pmu_disable_fixed(hwc, idx); 824
1259 return; 825 event = cpuc->event_list[i];
826 hwc = &event->hw;
827
828 if (hwc->idx == -1) {
829 x86_assign_hw_event(event, cpuc, i);
830 x86_perf_event_set_period(event, hwc, hwc->idx);
831 }
832 /*
833 * need to mark as active because x86_pmu_disable()
834 * clear active_mask and events[] yet it preserves
835 * idx
836 */
837 set_bit(hwc->idx, cpuc->active_mask);
838 cpuc->events[hwc->idx] = event;
839
840 x86_pmu.enable(hwc, hwc->idx);
841 perf_event_update_userpage(event);
842 }
843 cpuc->n_added = 0;
844 perf_events_lapic_init();
1260 } 845 }
1261 846
1262 x86_pmu_disable_event(hwc, idx); 847 cpuc->enabled = 1;
848 barrier();
849
850 x86_pmu.enable_all();
851}
852
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
854{
855 (void)checking_wrmsrl(hwc->config_base + idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1263} 857}
1264 858
1265static inline void 859static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1266amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267{ 860{
1268 x86_pmu_disable_event(hwc, idx); 861 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1269} 862}
1270 863
1271static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 864static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1326,220 +919,60 @@ x86_perf_event_set_period(struct perf_event *event,
1326 return ret; 919 return ret;
1327} 920}
1328 921
1329static inline void 922static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331{
1332 int idx = __idx - X86_PMC_IDX_FIXED;
1333 u64 ctrl_val, bits, mask;
1334 int err;
1335
1336 /*
1337 * Enable IRQ generation (0x8),
1338 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1339 * if requested:
1340 */
1341 bits = 0x8ULL;
1342 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1343 bits |= 0x2;
1344 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1345 bits |= 0x1;
1346
1347 /*
1348 * ANY bit is supported in v3 and up
1349 */
1350 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1351 bits |= 0x4;
1352
1353 bits <<= (idx * 4);
1354 mask = 0xfULL << (idx * 4);
1355
1356 rdmsrl(hwc->config_base, ctrl_val);
1357 ctrl_val &= ~mask;
1358 ctrl_val |= bits;
1359 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1360}
1361
1362static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1363{ 923{
1364 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 924 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1365 u64 val;
1366
1367 val = hwc->config;
1368 if (cpuc->enabled) 925 if (cpuc->enabled)
1369 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 926 __x86_pmu_enable_event(hwc, idx);
1370
1371 (void)checking_wrmsrl(hwc->config_base + idx, val);
1372} 927}
1373 928
1374 929/*
1375static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 930 * activate a single event
1376{ 931 *
1377 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 932 * The event is added to the group of enabled events
1378 if (!__get_cpu_var(cpu_hw_events).enabled) 933 * but only if it can be scehduled with existing events.
1379 return; 934 *
1380 935 * Called with PMU disabled. If successful and return value 1,
1381 intel_pmu_enable_bts(hwc->config); 936 * then guaranteed to call perf_enable() and hw_perf_enable()
1382 return; 937 */
1383 } 938static int x86_pmu_enable(struct perf_event *event)
1384
1385 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1386 intel_pmu_enable_fixed(hwc, idx);
1387 return;
1388 }
1389
1390 x86_pmu_enable_event(hwc, idx);
1391}
1392
1393static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1394{ 939{
1395 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 940 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
941 struct hw_perf_event *hwc;
942 int assign[X86_PMC_IDX_MAX];
943 int n, n0, ret;
1396 944
1397 if (cpuc->enabled) 945 hwc = &event->hw;
1398 x86_pmu_enable_event(hwc, idx);
1399}
1400
1401static int fixed_mode_idx(struct hw_perf_event *hwc)
1402{
1403 unsigned int hw_event;
1404
1405 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1406
1407 if (unlikely((hw_event ==
1408 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1409 (hwc->sample_period == 1)))
1410 return X86_PMC_IDX_FIXED_BTS;
1411 946
1412 if (!x86_pmu.num_events_fixed) 947 n0 = cpuc->n_events;
1413 return -1; 948 n = collect_events(cpuc, event, false);
949 if (n < 0)
950 return n;
1414 951
952 ret = x86_schedule_events(cpuc, n, assign);
953 if (ret)
954 return ret;
1415 /* 955 /*
1416 * fixed counters do not take all possible filters 956 * copy new assignment, now we know it is possible
957 * will be used by hw_perf_enable()
1417 */ 958 */
1418 if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) 959 memcpy(cpuc->assign, assign, n*sizeof(int));
1419 return -1;
1420 960
1421 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 961 cpuc->n_events = n;
1422 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 962 cpuc->n_added = n - n0;
1423 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1424 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1425 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1426 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1427 963
1428 return -1; 964 return 0;
1429}
1430
1431/*
1432 * generic counter allocator: get next free counter
1433 */
1434static int
1435gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1436{
1437 int idx;
1438
1439 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1440 return idx == x86_pmu.num_events ? -1 : idx;
1441}
1442
1443/*
1444 * intel-specific counter allocator: check event constraints
1445 */
1446static int
1447intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1448{
1449 const struct event_constraint *event_constraint;
1450 int i, code;
1451
1452 if (!event_constraints)
1453 goto skip;
1454
1455 code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1456
1457 for_each_event_constraint(event_constraint, event_constraints) {
1458 if (code == event_constraint->code) {
1459 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1460 if (!test_and_set_bit(i, cpuc->used_mask))
1461 return i;
1462 }
1463 return -1;
1464 }
1465 }
1466skip:
1467 return gen_get_event_idx(cpuc, hwc);
1468}
1469
1470static int
1471x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1472{
1473 int idx;
1474
1475 idx = fixed_mode_idx(hwc);
1476 if (idx == X86_PMC_IDX_FIXED_BTS) {
1477 /* BTS is already occupied. */
1478 if (test_and_set_bit(idx, cpuc->used_mask))
1479 return -EAGAIN;
1480
1481 hwc->config_base = 0;
1482 hwc->event_base = 0;
1483 hwc->idx = idx;
1484 } else if (idx >= 0) {
1485 /*
1486 * Try to get the fixed event, if that is already taken
1487 * then try to get a generic event:
1488 */
1489 if (test_and_set_bit(idx, cpuc->used_mask))
1490 goto try_generic;
1491
1492 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1493 /*
1494 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1495 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1496 */
1497 hwc->event_base =
1498 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1499 hwc->idx = idx;
1500 } else {
1501 idx = hwc->idx;
1502 /* Try to get the previous generic event again */
1503 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1504try_generic:
1505 idx = x86_pmu.get_event_idx(cpuc, hwc);
1506 if (idx == -1)
1507 return -EAGAIN;
1508
1509 set_bit(idx, cpuc->used_mask);
1510 hwc->idx = idx;
1511 }
1512 hwc->config_base = x86_pmu.eventsel;
1513 hwc->event_base = x86_pmu.perfctr;
1514 }
1515
1516 return idx;
1517} 965}
1518 966
1519/* 967static int x86_pmu_start(struct perf_event *event)
1520 * Find a PMC slot for the freshly enabled / scheduled in event:
1521 */
1522static int x86_pmu_enable(struct perf_event *event)
1523{ 968{
1524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1525 struct hw_perf_event *hwc = &event->hw; 969 struct hw_perf_event *hwc = &event->hw;
1526 int idx;
1527
1528 idx = x86_schedule_event(cpuc, hwc);
1529 if (idx < 0)
1530 return idx;
1531 970
1532 perf_events_lapic_init(); 971 if (hwc->idx == -1)
1533 972 return -EAGAIN;
1534 x86_pmu.disable(hwc, idx);
1535 973
1536 cpuc->events[idx] = event; 974 x86_perf_event_set_period(event, hwc, hwc->idx);
1537 set_bit(idx, cpuc->active_mask); 975 x86_pmu.enable(hwc, hwc->idx);
1538
1539 x86_perf_event_set_period(event, hwc, idx);
1540 x86_pmu.enable(hwc, idx);
1541
1542 perf_event_update_userpage(event);
1543 976
1544 return 0; 977 return 0;
1545} 978}
@@ -1583,7 +1016,7 @@ void perf_event_print_debug(void)
1583 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1016 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1584 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1017 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1585 } 1018 }
1586 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1019 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1587 1020
1588 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1021 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1589 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1022 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1607,67 +1040,7 @@ void perf_event_print_debug(void)
1607 local_irq_restore(flags); 1040 local_irq_restore(flags);
1608} 1041}
1609 1042
1610static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) 1043static void x86_pmu_stop(struct perf_event *event)
1611{
1612 struct debug_store *ds = cpuc->ds;
1613 struct bts_record {
1614 u64 from;
1615 u64 to;
1616 u64 flags;
1617 };
1618 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1619 struct bts_record *at, *top;
1620 struct perf_output_handle handle;
1621 struct perf_event_header header;
1622 struct perf_sample_data data;
1623 struct pt_regs regs;
1624
1625 if (!event)
1626 return;
1627
1628 if (!ds)
1629 return;
1630
1631 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1632 top = (struct bts_record *)(unsigned long)ds->bts_index;
1633
1634 if (top <= at)
1635 return;
1636
1637 ds->bts_index = ds->bts_buffer_base;
1638
1639
1640 data.period = event->hw.last_period;
1641 data.addr = 0;
1642 data.raw = NULL;
1643 regs.ip = 0;
1644
1645 /*
1646 * Prepare a generic sample, i.e. fill in the invariant fields.
1647 * We will overwrite the from and to address before we output
1648 * the sample.
1649 */
1650 perf_prepare_sample(&header, &data, event, &regs);
1651
1652 if (perf_output_begin(&handle, event,
1653 header.size * (top - at), 1, 1))
1654 return;
1655
1656 for (; at < top; at++) {
1657 data.ip = at->from;
1658 data.addr = at->to;
1659
1660 perf_output_sample(&handle, &header, &data, event);
1661 }
1662
1663 perf_output_end(&handle);
1664
1665 /* There's new data available. */
1666 event->hw.interrupts++;
1667 event->pending_kill = POLL_IN;
1668}
1669
1670static void x86_pmu_disable(struct perf_event *event)
1671{ 1044{
1672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1045 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1673 struct hw_perf_event *hwc = &event->hw; 1046 struct hw_perf_event *hwc = &event->hw;
@@ -1681,183 +1054,38 @@ static void x86_pmu_disable(struct perf_event *event)
1681 x86_pmu.disable(hwc, idx); 1054 x86_pmu.disable(hwc, idx);
1682 1055
1683 /* 1056 /*
1684 * Make sure the cleared pointer becomes visible before we
1685 * (potentially) free the event:
1686 */
1687 barrier();
1688
1689 /*
1690 * Drain the remaining delta count out of a event 1057 * Drain the remaining delta count out of a event
1691 * that we are disabling: 1058 * that we are disabling:
1692 */ 1059 */
1693 x86_perf_event_update(event, hwc, idx); 1060 x86_perf_event_update(event, hwc, idx);
1694 1061
1695 /* Drain the remaining BTS records. */
1696 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1697 intel_pmu_drain_bts_buffer(cpuc);
1698
1699 cpuc->events[idx] = NULL; 1062 cpuc->events[idx] = NULL;
1700 clear_bit(idx, cpuc->used_mask);
1701
1702 perf_event_update_userpage(event);
1703}
1704
1705/*
1706 * Save and restart an expired event. Called by NMI contexts,
1707 * so it has to be careful about preempting normal event ops:
1708 */
1709static int intel_pmu_save_and_restart(struct perf_event *event)
1710{
1711 struct hw_perf_event *hwc = &event->hw;
1712 int idx = hwc->idx;
1713 int ret;
1714
1715 x86_perf_event_update(event, hwc, idx);
1716 ret = x86_perf_event_set_period(event, hwc, idx);
1717
1718 if (event->state == PERF_EVENT_STATE_ACTIVE)
1719 intel_pmu_enable_event(hwc, idx);
1720
1721 return ret;
1722} 1063}
1723 1064
1724static void intel_pmu_reset(void) 1065static void x86_pmu_disable(struct perf_event *event)
1725{
1726 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1727 unsigned long flags;
1728 int idx;
1729
1730 if (!x86_pmu.num_events)
1731 return;
1732
1733 local_irq_save(flags);
1734
1735 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1736
1737 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1738 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1739 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1740 }
1741 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1742 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1743 }
1744 if (ds)
1745 ds->bts_index = ds->bts_buffer_base;
1746
1747 local_irq_restore(flags);
1748}
1749
1750static int p6_pmu_handle_irq(struct pt_regs *regs)
1751{
1752 struct perf_sample_data data;
1753 struct cpu_hw_events *cpuc;
1754 struct perf_event *event;
1755 struct hw_perf_event *hwc;
1756 int idx, handled = 0;
1757 u64 val;
1758
1759 data.addr = 0;
1760 data.raw = NULL;
1761
1762 cpuc = &__get_cpu_var(cpu_hw_events);
1763
1764 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1765 if (!test_bit(idx, cpuc->active_mask))
1766 continue;
1767
1768 event = cpuc->events[idx];
1769 hwc = &event->hw;
1770
1771 val = x86_perf_event_update(event, hwc, idx);
1772 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1773 continue;
1774
1775 /*
1776 * event overflow
1777 */
1778 handled = 1;
1779 data.period = event->hw.last_period;
1780
1781 if (!x86_perf_event_set_period(event, hwc, idx))
1782 continue;
1783
1784 if (perf_event_overflow(event, 1, &data, regs))
1785 p6_pmu_disable_event(hwc, idx);
1786 }
1787
1788 if (handled)
1789 inc_irq_stat(apic_perf_irqs);
1790
1791 return handled;
1792}
1793
1794/*
1795 * This handler is triggered by the local APIC, so the APIC IRQ handling
1796 * rules apply:
1797 */
1798static int intel_pmu_handle_irq(struct pt_regs *regs)
1799{ 1066{
1800 struct perf_sample_data data; 1067 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1801 struct cpu_hw_events *cpuc; 1068 int i;
1802 int bit, loops;
1803 u64 ack, status;
1804
1805 data.addr = 0;
1806 data.raw = NULL;
1807
1808 cpuc = &__get_cpu_var(cpu_hw_events);
1809
1810 perf_disable();
1811 intel_pmu_drain_bts_buffer(cpuc);
1812 status = intel_pmu_get_status();
1813 if (!status) {
1814 perf_enable();
1815 return 0;
1816 }
1817
1818 loops = 0;
1819again:
1820 if (++loops > 100) {
1821 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1822 perf_event_print_debug();
1823 intel_pmu_reset();
1824 perf_enable();
1825 return 1;
1826 }
1827 1069
1828 inc_irq_stat(apic_perf_irqs); 1070 x86_pmu_stop(event);
1829 ack = status;
1830 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1831 struct perf_event *event = cpuc->events[bit];
1832 1071
1833 clear_bit(bit, (unsigned long *) &status); 1072 for (i = 0; i < cpuc->n_events; i++) {
1834 if (!test_bit(bit, cpuc->active_mask)) 1073 if (event == cpuc->event_list[i]) {
1835 continue;
1836 1074
1837 if (!intel_pmu_save_and_restart(event)) 1075 if (x86_pmu.put_event_constraints)
1838 continue; 1076 x86_pmu.put_event_constraints(cpuc, event);
1839 1077
1840 data.period = event->hw.last_period; 1078 while (++i < cpuc->n_events)
1079 cpuc->event_list[i-1] = cpuc->event_list[i];
1841 1080
1842 if (perf_event_overflow(event, 1, &data, regs)) 1081 --cpuc->n_events;
1843 intel_pmu_disable_event(&event->hw, bit); 1082 break;
1083 }
1844 } 1084 }
1845 1085 perf_event_update_userpage(event);
1846 intel_pmu_ack_status(ack);
1847
1848 /*
1849 * Repeat if there is more work to be done:
1850 */
1851 status = intel_pmu_get_status();
1852 if (status)
1853 goto again;
1854
1855 perf_enable();
1856
1857 return 1;
1858} 1086}
1859 1087
1860static int amd_pmu_handle_irq(struct pt_regs *regs) 1088static int x86_pmu_handle_irq(struct pt_regs *regs)
1861{ 1089{
1862 struct perf_sample_data data; 1090 struct perf_sample_data data;
1863 struct cpu_hw_events *cpuc; 1091 struct cpu_hw_events *cpuc;
@@ -1892,7 +1120,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1892 continue; 1120 continue;
1893 1121
1894 if (perf_event_overflow(event, 1, &data, regs)) 1122 if (perf_event_overflow(event, 1, &data, regs))
1895 amd_pmu_disable_event(hwc, idx); 1123 x86_pmu.disable(hwc, idx);
1896 } 1124 }
1897 1125
1898 if (handled) 1126 if (handled)
@@ -1975,194 +1203,137 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1975 .priority = 1 1203 .priority = 1
1976}; 1204};
1977 1205
1978static __initconst struct x86_pmu p6_pmu = { 1206static struct event_constraint unconstrained;
1979 .name = "p6", 1207static struct event_constraint emptyconstraint;
1980 .handle_irq = p6_pmu_handle_irq,
1981 .disable_all = p6_pmu_disable_all,
1982 .enable_all = p6_pmu_enable_all,
1983 .enable = p6_pmu_enable_event,
1984 .disable = p6_pmu_disable_event,
1985 .eventsel = MSR_P6_EVNTSEL0,
1986 .perfctr = MSR_P6_PERFCTR0,
1987 .event_map = p6_pmu_event_map,
1988 .raw_event = p6_pmu_raw_event,
1989 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1990 .apic = 1,
1991 .max_period = (1ULL << 31) - 1,
1992 .version = 0,
1993 .num_events = 2,
1994 /*
1995 * Events have 40 bits implemented. However they are designed such
1996 * that bits [32-39] are sign extensions of bit 31. As such the
1997 * effective width of a event for P6-like PMU is 32 bits only.
1998 *
1999 * See IA-32 Intel Architecture Software developer manual Vol 3B
2000 */
2001 .event_bits = 32,
2002 .event_mask = (1ULL << 32) - 1,
2003 .get_event_idx = intel_get_event_idx,
2004};
2005 1208
2006static __initconst struct x86_pmu intel_pmu = { 1209static struct event_constraint *
2007 .name = "Intel", 1210x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2008 .handle_irq = intel_pmu_handle_irq, 1211{
2009 .disable_all = intel_pmu_disable_all, 1212 struct event_constraint *c;
2010 .enable_all = intel_pmu_enable_all,
2011 .enable = intel_pmu_enable_event,
2012 .disable = intel_pmu_disable_event,
2013 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2014 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2015 .event_map = intel_pmu_event_map,
2016 .raw_event = intel_pmu_raw_event,
2017 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2018 .apic = 1,
2019 /*
2020 * Intel PMCs cannot be accessed sanely above 32 bit width,
2021 * so we install an artificial 1<<31 period regardless of
2022 * the generic event period:
2023 */
2024 .max_period = (1ULL << 31) - 1,
2025 .enable_bts = intel_pmu_enable_bts,
2026 .disable_bts = intel_pmu_disable_bts,
2027 .get_event_idx = intel_get_event_idx,
2028};
2029 1213
2030static __initconst struct x86_pmu amd_pmu = { 1214 if (x86_pmu.event_constraints) {
2031 .name = "AMD", 1215 for_each_event_constraint(c, x86_pmu.event_constraints) {
2032 .handle_irq = amd_pmu_handle_irq, 1216 if ((event->hw.config & c->cmask) == c->code)
2033 .disable_all = amd_pmu_disable_all, 1217 return c;
2034 .enable_all = amd_pmu_enable_all, 1218 }
2035 .enable = amd_pmu_enable_event, 1219 }
2036 .disable = amd_pmu_disable_event, 1220
2037 .eventsel = MSR_K7_EVNTSEL0, 1221 return &unconstrained;
2038 .perfctr = MSR_K7_PERFCTR0, 1222}
2039 .event_map = amd_pmu_event_map,
2040 .raw_event = amd_pmu_raw_event,
2041 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
2042 .num_events = 4,
2043 .event_bits = 48,
2044 .event_mask = (1ULL << 48) - 1,
2045 .apic = 1,
2046 /* use highest bit to detect overflow */
2047 .max_period = (1ULL << 47) - 1,
2048 .get_event_idx = gen_get_event_idx,
2049};
2050 1223
2051static __init int p6_pmu_init(void) 1224static int x86_event_sched_in(struct perf_event *event,
1225 struct perf_cpu_context *cpuctx)
2052{ 1226{
2053 switch (boot_cpu_data.x86_model) { 1227 int ret = 0;
2054 case 1:
2055 case 3: /* Pentium Pro */
2056 case 5:
2057 case 6: /* Pentium II */
2058 case 7:
2059 case 8:
2060 case 11: /* Pentium III */
2061 event_constraints = intel_p6_event_constraints;
2062 break;
2063 case 9:
2064 case 13:
2065 /* Pentium M */
2066 event_constraints = intel_p6_event_constraints;
2067 break;
2068 default:
2069 pr_cont("unsupported p6 CPU model %d ",
2070 boot_cpu_data.x86_model);
2071 return -ENODEV;
2072 }
2073 1228
2074 x86_pmu = p6_pmu; 1229 event->state = PERF_EVENT_STATE_ACTIVE;
1230 event->oncpu = smp_processor_id();
1231 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2075 1232
2076 return 0; 1233 if (!is_x86_event(event))
1234 ret = event->pmu->enable(event);
1235
1236 if (!ret && !is_software_event(event))
1237 cpuctx->active_oncpu++;
1238
1239 if (!ret && event->attr.exclusive)
1240 cpuctx->exclusive = 1;
1241
1242 return ret;
2077} 1243}
2078 1244
2079static __init int intel_pmu_init(void) 1245static void x86_event_sched_out(struct perf_event *event,
1246 struct perf_cpu_context *cpuctx)
2080{ 1247{
2081 union cpuid10_edx edx; 1248 event->state = PERF_EVENT_STATE_INACTIVE;
2082 union cpuid10_eax eax; 1249 event->oncpu = -1;
2083 unsigned int unused;
2084 unsigned int ebx;
2085 int version;
2086
2087 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2088 /* check for P6 processor family */
2089 if (boot_cpu_data.x86 == 6) {
2090 return p6_pmu_init();
2091 } else {
2092 return -ENODEV;
2093 }
2094 }
2095 1250
2096 /* 1251 if (!is_x86_event(event))
2097 * Check whether the Architectural PerfMon supports 1252 event->pmu->disable(event);
2098 * Branch Misses Retired hw_event or not.
2099 */
2100 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2101 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2102 return -ENODEV;
2103 1253
2104 version = eax.split.version_id; 1254 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2105 if (version < 2)
2106 return -ENODEV;
2107 1255
2108 x86_pmu = intel_pmu; 1256 if (!is_software_event(event))
2109 x86_pmu.version = version; 1257 cpuctx->active_oncpu--;
2110 x86_pmu.num_events = eax.split.num_events;
2111 x86_pmu.event_bits = eax.split.bit_width;
2112 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
2113 1258
2114 /* 1259 if (event->attr.exclusive || !cpuctx->active_oncpu)
2115 * Quirk: v2 perfmon does not report fixed-purpose events, so 1260 cpuctx->exclusive = 0;
2116 * assume at least 3 events: 1261}
2117 */
2118 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2119 1262
1263/*
1264 * Called to enable a whole group of events.
1265 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1266 * Assumes the caller has disabled interrupts and has
1267 * frozen the PMU with hw_perf_save_disable.
1268 *
1269 * called with PMU disabled. If successful and return value 1,
1270 * then guaranteed to call perf_enable() and hw_perf_enable()
1271 */
1272int hw_perf_group_sched_in(struct perf_event *leader,
1273 struct perf_cpu_context *cpuctx,
1274 struct perf_event_context *ctx)
1275{
1276 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1277 struct perf_event *sub;
1278 int assign[X86_PMC_IDX_MAX];
1279 int n0, n1, ret;
1280
1281 /* n0 = total number of events */
1282 n0 = collect_events(cpuc, leader, true);
1283 if (n0 < 0)
1284 return n0;
1285
1286 ret = x86_schedule_events(cpuc, n0, assign);
1287 if (ret)
1288 return ret;
1289
1290 ret = x86_event_sched_in(leader, cpuctx);
1291 if (ret)
1292 return ret;
1293
1294 n1 = 1;
1295 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1296 if (sub->state > PERF_EVENT_STATE_OFF) {
1297 ret = x86_event_sched_in(sub, cpuctx);
1298 if (ret)
1299 goto undo;
1300 ++n1;
1301 }
1302 }
2120 /* 1303 /*
2121 * Install the hw-cache-events table: 1304 * copy new assignment, now we know it is possible
1305 * will be used by hw_perf_enable()
2122 */ 1306 */
2123 switch (boot_cpu_data.x86_model) { 1307 memcpy(cpuc->assign, assign, n0*sizeof(int));
2124 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2125 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2126 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2127 case 29: /* six-core 45 nm xeon "Dunnington" */
2128 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2129 sizeof(hw_cache_event_ids));
2130
2131 pr_cont("Core2 events, ");
2132 event_constraints = intel_core_event_constraints;
2133 break;
2134 default:
2135 case 26:
2136 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2137 sizeof(hw_cache_event_ids));
2138 1308
2139 event_constraints = intel_nehalem_event_constraints; 1309 cpuc->n_events = n0;
2140 pr_cont("Nehalem/Corei7 events, "); 1310 cpuc->n_added = n1;
2141 break; 1311 ctx->nr_active += n1;
2142 case 28:
2143 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2144 sizeof(hw_cache_event_ids));
2145 1312
2146 pr_cont("Atom events, "); 1313 /*
2147 break; 1314 * 1 means successful and events are active
1315 * This is not quite true because we defer
1316 * actual activation until hw_perf_enable() but
1317 * this way we* ensure caller won't try to enable
1318 * individual events
1319 */
1320 return 1;
1321undo:
1322 x86_event_sched_out(leader, cpuctx);
1323 n0 = 1;
1324 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1325 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1326 x86_event_sched_out(sub, cpuctx);
1327 if (++n0 == n1)
1328 break;
1329 }
2148 } 1330 }
2149 return 0; 1331 return ret;
2150} 1332}
2151 1333
2152static __init int amd_pmu_init(void) 1334#include "perf_event_amd.c"
2153{ 1335#include "perf_event_p6.c"
2154 /* Performance-monitoring supported from K7 and later: */ 1336#include "perf_event_intel.c"
2155 if (boot_cpu_data.x86 < 6)
2156 return -ENODEV;
2157
2158 x86_pmu = amd_pmu;
2159
2160 /* Events are common for all AMDs */
2161 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2162 sizeof(hw_cache_event_ids));
2163
2164 return 0;
2165}
2166 1337
2167static void __init pmu_check_apic(void) 1338static void __init pmu_check_apic(void)
2168{ 1339{
@@ -2220,6 +1391,10 @@ void __init init_hw_perf_events(void)
2220 perf_events_lapic_init(); 1391 perf_events_lapic_init();
2221 register_die_notifier(&perf_event_nmi_notifier); 1392 register_die_notifier(&perf_event_nmi_notifier);
2222 1393
1394 unconstrained = (struct event_constraint)
1395 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1396 0, x86_pmu.num_events);
1397
2223 pr_info("... version: %d\n", x86_pmu.version); 1398 pr_info("... version: %d\n", x86_pmu.version);
2224 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1399 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2225 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1400 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2237,50 +1412,79 @@ static inline void x86_pmu_read(struct perf_event *event)
2237static const struct pmu pmu = { 1412static const struct pmu pmu = {
2238 .enable = x86_pmu_enable, 1413 .enable = x86_pmu_enable,
2239 .disable = x86_pmu_disable, 1414 .disable = x86_pmu_disable,
1415 .start = x86_pmu_start,
1416 .stop = x86_pmu_stop,
2240 .read = x86_pmu_read, 1417 .read = x86_pmu_read,
2241 .unthrottle = x86_pmu_unthrottle, 1418 .unthrottle = x86_pmu_unthrottle,
2242}; 1419};
2243 1420
2244static int 1421/*
2245validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) 1422 * validate a single event group
2246{ 1423 *
2247 struct hw_perf_event fake_event = event->hw; 1424 * validation include:
2248 1425 * - check events are compatible which each other
2249 if (event->pmu && event->pmu != &pmu) 1426 * - events do not compete for the same counter
2250 return 0; 1427 * - number of events <= number of counters
2251 1428 *
2252 return x86_schedule_event(cpuc, &fake_event) >= 0; 1429 * validation ensures the group can be loaded onto the
2253} 1430 * PMU if it was the only group available.
2254 1431 */
2255static int validate_group(struct perf_event *event) 1432static int validate_group(struct perf_event *event)
2256{ 1433{
2257 struct perf_event *sibling, *leader = event->group_leader; 1434 struct perf_event *leader = event->group_leader;
2258 struct cpu_hw_events fake_pmu; 1435 struct cpu_hw_events *fake_cpuc;
1436 int ret, n;
2259 1437
2260 memset(&fake_pmu, 0, sizeof(fake_pmu)); 1438 ret = -ENOMEM;
1439 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1440 if (!fake_cpuc)
1441 goto out;
1442
1443 /*
1444 * the event is not yet connected with its
1445 * siblings therefore we must first collect
1446 * existing siblings, then add the new event
1447 * before we can simulate the scheduling
1448 */
1449 ret = -ENOSPC;
1450 n = collect_events(fake_cpuc, leader, true);
1451 if (n < 0)
1452 goto out_free;
2261 1453
2262 if (!validate_event(&fake_pmu, leader)) 1454 fake_cpuc->n_events = n;
2263 return -ENOSPC; 1455 n = collect_events(fake_cpuc, event, false);
1456 if (n < 0)
1457 goto out_free;
2264 1458
2265 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 1459 fake_cpuc->n_events = n;
2266 if (!validate_event(&fake_pmu, sibling))
2267 return -ENOSPC;
2268 }
2269 1460
2270 if (!validate_event(&fake_pmu, event)) 1461 ret = x86_schedule_events(fake_cpuc, n, NULL);
2271 return -ENOSPC;
2272 1462
2273 return 0; 1463out_free:
1464 kfree(fake_cpuc);
1465out:
1466 return ret;
2274} 1467}
2275 1468
2276const struct pmu *hw_perf_event_init(struct perf_event *event) 1469const struct pmu *hw_perf_event_init(struct perf_event *event)
2277{ 1470{
1471 const struct pmu *tmp;
2278 int err; 1472 int err;
2279 1473
2280 err = __hw_perf_event_init(event); 1474 err = __hw_perf_event_init(event);
2281 if (!err) { 1475 if (!err) {
1476 /*
1477 * we temporarily connect event to its pmu
1478 * such that validate_group() can classify
1479 * it as an x86 event using is_x86_event()
1480 */
1481 tmp = event->pmu;
1482 event->pmu = &pmu;
1483
2282 if (event->group_leader != event) 1484 if (event->group_leader != event)
2283 err = validate_group(event); 1485 err = validate_group(event);
1486
1487 event->pmu = tmp;
2284 } 1488 }
2285 if (err) { 1489 if (err) {
2286 if (event->destroy) 1490 if (event->destroy)
@@ -2304,7 +1508,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2304 1508
2305static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1509static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2306static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1510static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2307static DEFINE_PER_CPU(int, in_ignored_frame);
2308 1511
2309 1512
2310static void 1513static void
@@ -2320,10 +1523,6 @@ static void backtrace_warning(void *data, char *msg)
2320 1523
2321static int backtrace_stack(void *data, char *name) 1524static int backtrace_stack(void *data, char *name)
2322{ 1525{
2323 per_cpu(in_ignored_frame, smp_processor_id()) =
2324 x86_is_stack_id(NMI_STACK, name) ||
2325 x86_is_stack_id(DEBUG_STACK, name);
2326
2327 return 0; 1526 return 0;
2328} 1527}
2329 1528
@@ -2331,9 +1530,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2331{ 1530{
2332 struct perf_callchain_entry *entry = data; 1531 struct perf_callchain_entry *entry = data;
2333 1532
2334 if (per_cpu(in_ignored_frame, smp_processor_id()))
2335 return;
2336
2337 if (reliable) 1533 if (reliable)
2338 callchain_store(entry, addr); 1534 callchain_store(entry, addr);
2339} 1535}
@@ -2440,9 +1636,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2440 1636
2441 is_user = user_mode(regs); 1637 is_user = user_mode(regs);
2442 1638
2443 if (!current || current->pid == 0)
2444 return;
2445
2446 if (is_user && current->state != TASK_RUNNING) 1639 if (is_user && current->state != TASK_RUNNING)
2447 return; 1640 return;
2448 1641
@@ -2472,4 +1665,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2472void hw_perf_event_setup_online(int cpu) 1665void hw_perf_event_setup_online(int cpu)
2473{ 1666{
2474 init_debug_store_on_cpu(cpu); 1667 init_debug_store_on_cpu(cpu);
1668
1669 switch (boot_cpu_data.x86_vendor) {
1670 case X86_VENDOR_AMD:
1671 amd_pmu_cpu_online(cpu);
1672 break;
1673 default:
1674 return;
1675 }
1676}
1677
1678void hw_perf_event_setup_offline(int cpu)
1679{
1680 init_debug_store_on_cpu(cpu);
1681
1682 switch (boot_cpu_data.x86_vendor) {
1683 case X86_VENDOR_AMD:
1684 amd_pmu_cpu_offline(cpu);
1685 break;
1686 default:
1687 return;
1688 }
2475} 1689}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..8f3dbfda3c4f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{
298 struct amd_nb *nb;
299 int i;
300
301 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
302 if (!nb)
303 return NULL;
304
305 memset(nb, 0, sizeof(*nb));
306 nb->nb_id = nb_id;
307
308 /*
309 * initialize all possible NB constraints
310 */
311 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1;
314 }
315 return nb;
316}
317
318static void amd_pmu_cpu_online(int cpu)
319{
320 struct cpu_hw_events *cpu1, *cpu2;
321 struct amd_nb *nb = NULL;
322 int i, nb_id;
323
324 if (boot_cpu_data.x86_max_cores < 2)
325 return;
326
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID)
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337
338 raw_spin_lock(&amd_nb_lock);
339
340 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i);
342 nb = cpu2->amd_nb;
343 if (!nb)
344 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348
349 nb = amd_alloc_nb(cpu, nb_id);
350 if (!nb) {
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
352 raw_spin_unlock(&amd_nb_lock);
353 return;
354 }
355found:
356 nb->refcnt++;
357 cpu1->amd_nb = nb;
358
359 raw_spin_unlock(&amd_nb_lock);
360}
361
362static void amd_pmu_cpu_offline(int cpu)
363{
364 struct cpu_hw_events *cpuhw;
365
366 if (boot_cpu_data.x86_max_cores < 2)
367 return;
368
369 cpuhw = &per_cpu(cpu_hw_events, cpu);
370
371 raw_spin_lock(&amd_nb_lock);
372
373 if (--cpuhw->amd_nb->refcnt == 0)
374 kfree(cpuhw->amd_nb);
375
376 cpuhw->amd_nb = NULL;
377
378 raw_spin_unlock(&amd_nb_lock);
379}
380
381static __init int amd_pmu_init(void)
382{
383 /* Performance-monitoring supported from K7 and later: */
384 if (boot_cpu_data.x86 < 6)
385 return -ENODEV;
386
387 x86_pmu = amd_pmu;
388
389 /* Events are common for all AMDs */
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids));
392
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0;
399}
400
401#else /* CONFIG_CPU_SUP_AMD */
402
403static int amd_pmu_init(void)
404{
405 return 0;
406}
407
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..cf6590cf4a5f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,971 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon v3. Used on Core2 and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
35 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
36 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END
42};
43
44static struct event_constraint intel_nehalem_event_constraints[] =
45{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
51 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
52 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
53 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
54 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
55 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
56 EVENT_CONSTRAINT_END
57};
58
59static struct event_constraint intel_westmere_event_constraints[] =
60{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
66 EVENT_CONSTRAINT_END
67};
68
69static struct event_constraint intel_gen_event_constraints[] =
70{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
73 EVENT_CONSTRAINT_END
74};
75
76static u64 intel_pmu_event_map(int hw_event)
77{
78 return intel_perfmon_event_map[hw_event];
79}
80
81static __initconst u64 westmere_hw_cache_event_ids
82 [PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
85{
86 [ C(L1D) ] = {
87 [ C(OP_READ) ] = {
88 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
89 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
90 },
91 [ C(OP_WRITE) ] = {
92 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
93 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
94 },
95 [ C(OP_PREFETCH) ] = {
96 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
97 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
98 },
99 },
100 [ C(L1I ) ] = {
101 [ C(OP_READ) ] = {
102 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
103 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
104 },
105 [ C(OP_WRITE) ] = {
106 [ C(RESULT_ACCESS) ] = -1,
107 [ C(RESULT_MISS) ] = -1,
108 },
109 [ C(OP_PREFETCH) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0,
111 [ C(RESULT_MISS) ] = 0x0,
112 },
113 },
114 [ C(LL ) ] = {
115 [ C(OP_READ) ] = {
116 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
117 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
118 },
119 [ C(OP_WRITE) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
121 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
122 },
123 [ C(OP_PREFETCH) ] = {
124 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
125 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
126 },
127 },
128 [ C(DTLB) ] = {
129 [ C(OP_READ) ] = {
130 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
131 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
132 },
133 [ C(OP_WRITE) ] = {
134 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
135 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
136 },
137 [ C(OP_PREFETCH) ] = {
138 [ C(RESULT_ACCESS) ] = 0x0,
139 [ C(RESULT_MISS) ] = 0x0,
140 },
141 },
142 [ C(ITLB) ] = {
143 [ C(OP_READ) ] = {
144 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
145 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
146 },
147 [ C(OP_WRITE) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 [ C(OP_PREFETCH) ] = {
152 [ C(RESULT_ACCESS) ] = -1,
153 [ C(RESULT_MISS) ] = -1,
154 },
155 },
156 [ C(BPU ) ] = {
157 [ C(OP_READ) ] = {
158 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
159 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
160 },
161 [ C(OP_WRITE) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 [ C(OP_PREFETCH) ] = {
166 [ C(RESULT_ACCESS) ] = -1,
167 [ C(RESULT_MISS) ] = -1,
168 },
169 },
170};
171
172static __initconst u64 nehalem_hw_cache_event_ids
173 [PERF_COUNT_HW_CACHE_MAX]
174 [PERF_COUNT_HW_CACHE_OP_MAX]
175 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
176{
177 [ C(L1D) ] = {
178 [ C(OP_READ) ] = {
179 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
180 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
181 },
182 [ C(OP_WRITE) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
184 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
185 },
186 [ C(OP_PREFETCH) ] = {
187 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
188 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
189 },
190 },
191 [ C(L1I ) ] = {
192 [ C(OP_READ) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
194 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
195 },
196 [ C(OP_WRITE) ] = {
197 [ C(RESULT_ACCESS) ] = -1,
198 [ C(RESULT_MISS) ] = -1,
199 },
200 [ C(OP_PREFETCH) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0,
202 [ C(RESULT_MISS) ] = 0x0,
203 },
204 },
205 [ C(LL ) ] = {
206 [ C(OP_READ) ] = {
207 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
208 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
209 },
210 [ C(OP_WRITE) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
212 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
213 },
214 [ C(OP_PREFETCH) ] = {
215 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
216 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
217 },
218 },
219 [ C(DTLB) ] = {
220 [ C(OP_READ) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
222 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
223 },
224 [ C(OP_WRITE) ] = {
225 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
226 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
227 },
228 [ C(OP_PREFETCH) ] = {
229 [ C(RESULT_ACCESS) ] = 0x0,
230 [ C(RESULT_MISS) ] = 0x0,
231 },
232 },
233 [ C(ITLB) ] = {
234 [ C(OP_READ) ] = {
235 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
236 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
237 },
238 [ C(OP_WRITE) ] = {
239 [ C(RESULT_ACCESS) ] = -1,
240 [ C(RESULT_MISS) ] = -1,
241 },
242 [ C(OP_PREFETCH) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 },
247 [ C(BPU ) ] = {
248 [ C(OP_READ) ] = {
249 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
250 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
251 },
252 [ C(OP_WRITE) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 [ C(OP_PREFETCH) ] = {
257 [ C(RESULT_ACCESS) ] = -1,
258 [ C(RESULT_MISS) ] = -1,
259 },
260 },
261};
262
263static __initconst u64 core2_hw_cache_event_ids
264 [PERF_COUNT_HW_CACHE_MAX]
265 [PERF_COUNT_HW_CACHE_OP_MAX]
266 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
267{
268 [ C(L1D) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
271 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
275 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
279 [ C(RESULT_MISS) ] = 0,
280 },
281 },
282 [ C(L1I ) ] = {
283 [ C(OP_READ) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
285 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
286 },
287 [ C(OP_WRITE) ] = {
288 [ C(RESULT_ACCESS) ] = -1,
289 [ C(RESULT_MISS) ] = -1,
290 },
291 [ C(OP_PREFETCH) ] = {
292 [ C(RESULT_ACCESS) ] = 0,
293 [ C(RESULT_MISS) ] = 0,
294 },
295 },
296 [ C(LL ) ] = {
297 [ C(OP_READ) ] = {
298 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
299 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
300 },
301 [ C(OP_WRITE) ] = {
302 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
303 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
304 },
305 [ C(OP_PREFETCH) ] = {
306 [ C(RESULT_ACCESS) ] = 0,
307 [ C(RESULT_MISS) ] = 0,
308 },
309 },
310 [ C(DTLB) ] = {
311 [ C(OP_READ) ] = {
312 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
313 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
317 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
318 },
319 [ C(OP_PREFETCH) ] = {
320 [ C(RESULT_ACCESS) ] = 0,
321 [ C(RESULT_MISS) ] = 0,
322 },
323 },
324 [ C(ITLB) ] = {
325 [ C(OP_READ) ] = {
326 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
327 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
328 },
329 [ C(OP_WRITE) ] = {
330 [ C(RESULT_ACCESS) ] = -1,
331 [ C(RESULT_MISS) ] = -1,
332 },
333 [ C(OP_PREFETCH) ] = {
334 [ C(RESULT_ACCESS) ] = -1,
335 [ C(RESULT_MISS) ] = -1,
336 },
337 },
338 [ C(BPU ) ] = {
339 [ C(OP_READ) ] = {
340 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
341 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
342 },
343 [ C(OP_WRITE) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 [ C(OP_PREFETCH) ] = {
348 [ C(RESULT_ACCESS) ] = -1,
349 [ C(RESULT_MISS) ] = -1,
350 },
351 },
352};
353
354static __initconst u64 atom_hw_cache_event_ids
355 [PERF_COUNT_HW_CACHE_MAX]
356 [PERF_COUNT_HW_CACHE_OP_MAX]
357 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
358{
359 [ C(L1D) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
362 [ C(RESULT_MISS) ] = 0,
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
366 [ C(RESULT_MISS) ] = 0,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = 0x0,
370 [ C(RESULT_MISS) ] = 0,
371 },
372 },
373 [ C(L1I ) ] = {
374 [ C(OP_READ) ] = {
375 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
376 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
377 },
378 [ C(OP_WRITE) ] = {
379 [ C(RESULT_ACCESS) ] = -1,
380 [ C(RESULT_MISS) ] = -1,
381 },
382 [ C(OP_PREFETCH) ] = {
383 [ C(RESULT_ACCESS) ] = 0,
384 [ C(RESULT_MISS) ] = 0,
385 },
386 },
387 [ C(LL ) ] = {
388 [ C(OP_READ) ] = {
389 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
390 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
391 },
392 [ C(OP_WRITE) ] = {
393 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
394 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
395 },
396 [ C(OP_PREFETCH) ] = {
397 [ C(RESULT_ACCESS) ] = 0,
398 [ C(RESULT_MISS) ] = 0,
399 },
400 },
401 [ C(DTLB) ] = {
402 [ C(OP_READ) ] = {
403 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
404 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
405 },
406 [ C(OP_WRITE) ] = {
407 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
408 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
409 },
410 [ C(OP_PREFETCH) ] = {
411 [ C(RESULT_ACCESS) ] = 0,
412 [ C(RESULT_MISS) ] = 0,
413 },
414 },
415 [ C(ITLB) ] = {
416 [ C(OP_READ) ] = {
417 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
418 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
419 },
420 [ C(OP_WRITE) ] = {
421 [ C(RESULT_ACCESS) ] = -1,
422 [ C(RESULT_MISS) ] = -1,
423 },
424 [ C(OP_PREFETCH) ] = {
425 [ C(RESULT_ACCESS) ] = -1,
426 [ C(RESULT_MISS) ] = -1,
427 },
428 },
429 [ C(BPU ) ] = {
430 [ C(OP_READ) ] = {
431 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
432 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
433 },
434 [ C(OP_WRITE) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 [ C(OP_PREFETCH) ] = {
439 [ C(RESULT_ACCESS) ] = -1,
440 [ C(RESULT_MISS) ] = -1,
441 },
442 },
443};
444
445static u64 intel_pmu_raw_event(u64 hw_event)
446{
447#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
448#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
449#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
450#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
451#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
452
453#define CORE_EVNTSEL_MASK \
454 (INTEL_ARCH_EVTSEL_MASK | \
455 INTEL_ARCH_UNIT_MASK | \
456 INTEL_ARCH_EDGE_MASK | \
457 INTEL_ARCH_INV_MASK | \
458 INTEL_ARCH_CNT_MASK)
459
460 return hw_event & CORE_EVNTSEL_MASK;
461}
462
463static void intel_pmu_enable_bts(u64 config)
464{
465 unsigned long debugctlmsr;
466
467 debugctlmsr = get_debugctlmsr();
468
469 debugctlmsr |= X86_DEBUGCTL_TR;
470 debugctlmsr |= X86_DEBUGCTL_BTS;
471 debugctlmsr |= X86_DEBUGCTL_BTINT;
472
473 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
474 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
475
476 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
477 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
478
479 update_debugctlmsr(debugctlmsr);
480}
481
482static void intel_pmu_disable_bts(void)
483{
484 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
485 unsigned long debugctlmsr;
486
487 if (!cpuc->ds)
488 return;
489
490 debugctlmsr = get_debugctlmsr();
491
492 debugctlmsr &=
493 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
494 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
495
496 update_debugctlmsr(debugctlmsr);
497}
498
499static void intel_pmu_disable_all(void)
500{
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
502
503 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
504
505 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
506 intel_pmu_disable_bts();
507}
508
509static void intel_pmu_enable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
516 struct perf_event *event =
517 cpuc->events[X86_PMC_IDX_FIXED_BTS];
518
519 if (WARN_ON_ONCE(!event))
520 return;
521
522 intel_pmu_enable_bts(event->hw.config);
523 }
524}
525
526static inline u64 intel_pmu_get_status(void)
527{
528 u64 status;
529
530 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
531
532 return status;
533}
534
535static inline void intel_pmu_ack_status(u64 ack)
536{
537 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
538}
539
540static inline void
541intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
542{
543 int idx = __idx - X86_PMC_IDX_FIXED;
544 u64 ctrl_val, mask;
545
546 mask = 0xfULL << (idx * 4);
547
548 rdmsrl(hwc->config_base, ctrl_val);
549 ctrl_val &= ~mask;
550 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
551}
552
553static void intel_pmu_drain_bts_buffer(void)
554{
555 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
556 struct debug_store *ds = cpuc->ds;
557 struct bts_record {
558 u64 from;
559 u64 to;
560 u64 flags;
561 };
562 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
563 struct bts_record *at, *top;
564 struct perf_output_handle handle;
565 struct perf_event_header header;
566 struct perf_sample_data data;
567 struct pt_regs regs;
568
569 if (!event)
570 return;
571
572 if (!ds)
573 return;
574
575 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
576 top = (struct bts_record *)(unsigned long)ds->bts_index;
577
578 if (top <= at)
579 return;
580
581 ds->bts_index = ds->bts_buffer_base;
582
583
584 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0;
588
589 /*
590 * Prepare a generic sample, i.e. fill in the invariant fields.
591 * We will overwrite the from and to address before we output
592 * the sample.
593 */
594 perf_prepare_sample(&header, &data, event, &regs);
595
596 if (perf_output_begin(&handle, event,
597 header.size * (top - at), 1, 1))
598 return;
599
600 for (; at < top; at++) {
601 data.ip = at->from;
602 data.addr = at->to;
603
604 perf_output_sample(&handle, &header, &data, event);
605 }
606
607 perf_output_end(&handle);
608
609 /* There's new data available. */
610 event->hw.interrupts++;
611 event->pending_kill = POLL_IN;
612}
613
614static inline void
615intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
616{
617 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
618 intel_pmu_disable_bts();
619 intel_pmu_drain_bts_buffer();
620 return;
621 }
622
623 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
624 intel_pmu_disable_fixed(hwc, idx);
625 return;
626 }
627
628 x86_pmu_disable_event(hwc, idx);
629}
630
631static inline void
632intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
633{
634 int idx = __idx - X86_PMC_IDX_FIXED;
635 u64 ctrl_val, bits, mask;
636 int err;
637
638 /*
639 * Enable IRQ generation (0x8),
640 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
641 * if requested:
642 */
643 bits = 0x8ULL;
644 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
645 bits |= 0x2;
646 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
647 bits |= 0x1;
648
649 /*
650 * ANY bit is supported in v3 and up
651 */
652 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
653 bits |= 0x4;
654
655 bits <<= (idx * 4);
656 mask = 0xfULL << (idx * 4);
657
658 rdmsrl(hwc->config_base, ctrl_val);
659 ctrl_val &= ~mask;
660 ctrl_val |= bits;
661 err = checking_wrmsrl(hwc->config_base, ctrl_val);
662}
663
664static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
665{
666 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
667 if (!__get_cpu_var(cpu_hw_events).enabled)
668 return;
669
670 intel_pmu_enable_bts(hwc->config);
671 return;
672 }
673
674 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
675 intel_pmu_enable_fixed(hwc, idx);
676 return;
677 }
678
679 __x86_pmu_enable_event(hwc, idx);
680}
681
682/*
683 * Save and restart an expired event. Called by NMI contexts,
684 * so it has to be careful about preempting normal event ops:
685 */
686static int intel_pmu_save_and_restart(struct perf_event *event)
687{
688 struct hw_perf_event *hwc = &event->hw;
689 int idx = hwc->idx;
690 int ret;
691
692 x86_perf_event_update(event, hwc, idx);
693 ret = x86_perf_event_set_period(event, hwc, idx);
694
695 return ret;
696}
697
698static void intel_pmu_reset(void)
699{
700 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
701 unsigned long flags;
702 int idx;
703
704 if (!x86_pmu.num_events)
705 return;
706
707 local_irq_save(flags);
708
709 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
710
711 for (idx = 0; idx < x86_pmu.num_events; idx++) {
712 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
713 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
714 }
715 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
716 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
717 }
718 if (ds)
719 ds->bts_index = ds->bts_buffer_base;
720
721 local_irq_restore(flags);
722}
723
724/*
725 * This handler is triggered by the local APIC, so the APIC IRQ handling
726 * rules apply:
727 */
728static int intel_pmu_handle_irq(struct pt_regs *regs)
729{
730 struct perf_sample_data data;
731 struct cpu_hw_events *cpuc;
732 int bit, loops;
733 u64 ack, status;
734
735 data.addr = 0;
736 data.raw = NULL;
737
738 cpuc = &__get_cpu_var(cpu_hw_events);
739
740 perf_disable();
741 intel_pmu_drain_bts_buffer();
742 status = intel_pmu_get_status();
743 if (!status) {
744 perf_enable();
745 return 0;
746 }
747
748 loops = 0;
749again:
750 if (++loops > 100) {
751 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
752 perf_event_print_debug();
753 intel_pmu_reset();
754 perf_enable();
755 return 1;
756 }
757
758 inc_irq_stat(apic_perf_irqs);
759 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit];
762
763 clear_bit(bit, (unsigned long *) &status);
764 if (!test_bit(bit, cpuc->active_mask))
765 continue;
766
767 if (!intel_pmu_save_and_restart(event))
768 continue;
769
770 data.period = event->hw.last_period;
771
772 if (perf_event_overflow(event, 1, &data, regs))
773 intel_pmu_disable_event(&event->hw, bit);
774 }
775
776 intel_pmu_ack_status(ack);
777
778 /*
779 * Repeat if there is more work to be done:
780 */
781 status = intel_pmu_get_status();
782 if (status)
783 goto again;
784
785 perf_enable();
786
787 return 1;
788}
789
790static struct event_constraint bts_constraint =
791 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
792
793static struct event_constraint *
794intel_special_constraints(struct perf_event *event)
795{
796 unsigned int hw_event;
797
798 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
799
800 if (unlikely((hw_event ==
801 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
802 (event->hw.sample_period == 1))) {
803
804 return &bts_constraint;
805 }
806 return NULL;
807}
808
809static struct event_constraint *
810intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
811{
812 struct event_constraint *c;
813
814 c = intel_special_constraints(event);
815 if (c)
816 return c;
817
818 return x86_get_event_constraints(cpuc, event);
819}
820
821static __initconst struct x86_pmu core_pmu = {
822 .name = "core",
823 .handle_irq = x86_pmu_handle_irq,
824 .disable_all = x86_pmu_disable_all,
825 .enable_all = x86_pmu_enable_all,
826 .enable = x86_pmu_enable_event,
827 .disable = x86_pmu_disable_event,
828 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
829 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
830 .event_map = intel_pmu_event_map,
831 .raw_event = intel_pmu_raw_event,
832 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
833 .apic = 1,
834 /*
835 * Intel PMCs cannot be accessed sanely above 32 bit width,
836 * so we install an artificial 1<<31 period regardless of
837 * the generic event period:
838 */
839 .max_period = (1ULL << 31) - 1,
840 .get_event_constraints = intel_get_event_constraints,
841 .event_constraints = intel_core_event_constraints,
842};
843
844static __initconst struct x86_pmu intel_pmu = {
845 .name = "Intel",
846 .handle_irq = intel_pmu_handle_irq,
847 .disable_all = intel_pmu_disable_all,
848 .enable_all = intel_pmu_enable_all,
849 .enable = intel_pmu_enable_event,
850 .disable = intel_pmu_disable_event,
851 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
852 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
853 .event_map = intel_pmu_event_map,
854 .raw_event = intel_pmu_raw_event,
855 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
856 .apic = 1,
857 /*
858 * Intel PMCs cannot be accessed sanely above 32 bit width,
859 * so we install an artificial 1<<31 period regardless of
860 * the generic event period:
861 */
862 .max_period = (1ULL << 31) - 1,
863 .enable_bts = intel_pmu_enable_bts,
864 .disable_bts = intel_pmu_disable_bts,
865 .get_event_constraints = intel_get_event_constraints
866};
867
868static __init int intel_pmu_init(void)
869{
870 union cpuid10_edx edx;
871 union cpuid10_eax eax;
872 unsigned int unused;
873 unsigned int ebx;
874 int version;
875
876 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
877 /* check for P6 processor family */
878 if (boot_cpu_data.x86 == 6) {
879 return p6_pmu_init();
880 } else {
881 return -ENODEV;
882 }
883 }
884
885 /*
886 * Check whether the Architectural PerfMon supports
887 * Branch Misses Retired hw_event or not.
888 */
889 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
890 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
891 return -ENODEV;
892
893 version = eax.split.version_id;
894 if (version < 2)
895 x86_pmu = core_pmu;
896 else
897 x86_pmu = intel_pmu;
898
899 x86_pmu.version = version;
900 x86_pmu.num_events = eax.split.num_events;
901 x86_pmu.event_bits = eax.split.bit_width;
902 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
903
904 /*
905 * Quirk: v2 perfmon does not report fixed-purpose events, so
906 * assume at least 3 events:
907 */
908 if (version > 1)
909 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
910
911 /*
912 * Install the hw-cache-events table:
913 */
914 switch (boot_cpu_data.x86_model) {
915 case 14: /* 65 nm core solo/duo, "Yonah" */
916 pr_cont("Core events, ");
917 break;
918
919 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
920 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
921 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
922 case 29: /* six-core 45 nm xeon "Dunnington" */
923 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
924 sizeof(hw_cache_event_ids));
925
926 x86_pmu.event_constraints = intel_core2_event_constraints;
927 pr_cont("Core2 events, ");
928 break;
929
930 case 26: /* 45 nm nehalem, "Bloomfield" */
931 case 30: /* 45 nm nehalem, "Lynnfield" */
932 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
933 sizeof(hw_cache_event_ids));
934
935 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, ");
937 break;
938 case 28:
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids));
941
942 x86_pmu.event_constraints = intel_gen_event_constraints;
943 pr_cont("Atom events, ");
944 break;
945
946 case 37: /* 32 nm nehalem, "Clarkdale" */
947 case 44: /* 32 nm nehalem, "Gulftown" */
948 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
949 sizeof(hw_cache_event_ids));
950
951 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, ");
953 break;
954 default:
955 /*
956 * default constraints for v2 and up
957 */
958 x86_pmu.event_constraints = intel_gen_event_constraints;
959 pr_cont("generic architected perfmon, ");
960 }
961 return 0;
962}
963
964#else /* CONFIG_CPU_SUP_INTEL */
965
966static int intel_pmu_init(void)
967{
968 return 0;
969}
970
971#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..1ca5ba078afd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 u64 val = P6_NOP_EVENT;
84
85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
87
88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89}
90
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
92{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
94 u64 val;
95
96 val = hwc->config;
97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
99
100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101}
102
103static __initconst struct x86_pmu p6_pmu = {
104 .name = "p6",
105 .handle_irq = x86_pmu_handle_irq,
106 .disable_all = p6_pmu_disable_all,
107 .enable_all = p6_pmu_enable_all,
108 .enable = p6_pmu_enable_event,
109 .disable = p6_pmu_disable_event,
110 .eventsel = MSR_P6_EVNTSEL0,
111 .perfctr = MSR_P6_PERFCTR0,
112 .event_map = p6_pmu_event_map,
113 .raw_event = p6_pmu_raw_event,
114 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
115 .apic = 1,
116 .max_period = (1ULL << 31) - 1,
117 .version = 0,
118 .num_events = 2,
119 /*
120 * Events have 40 bits implemented. However they are designed such
121 * that bits [32-39] are sign extensions of bit 31. As such the
122 * effective width of a event for P6-like PMU is 32 bits only.
123 *
124 * See IA-32 Intel Architecture Software developer manual Vol 3B
125 */
126 .event_bits = 32,
127 .event_mask = (1ULL << 32) - 1,
128 .get_event_constraints = x86_get_event_constraints,
129 .event_constraints = p6_event_constraints,
130};
131
132static __init int p6_pmu_init(void)
133{
134 switch (boot_cpu_data.x86_model) {
135 case 1:
136 case 3: /* Pentium Pro */
137 case 5:
138 case 6: /* Pentium II */
139 case 7:
140 case 8:
141 case 11: /* Pentium III */
142 case 9:
143 case 13:
144 /* Pentium M */
145 break;
146 default:
147 pr_cont("unsupported p6 CPU model %d ",
148 boot_cpu_data.x86_model);
149 return -ENODEV;
150 }
151
152 x86_pmu = p6_pmu;
153
154 return 0;
155}
156
157#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..74f4e85a5727 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
115 115
116 return !test_bit(counter, perfctr_nmi_owner); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118
119/* checks the an msr for availability */
120int avail_to_resrv_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return !test_bit(counter, perfctr_nmi_owner);
128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 119
131int reserve_perfctr_nmi(unsigned int msr) 120int reserve_perfctr_nmi(unsigned int msr)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca47b25..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
18 18
19#include "dumpstack.h" 19#include "dumpstack.h"
20 20
21/* Just a stub for now */
22int x86_is_stack_id(int id, char *name)
23{
24 return 0;
25}
26 21
27void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
28 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 907a90e2901c..dce99abb4496 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
33#endif 33#endif
34}; 34};
35 35
36int x86_is_stack_id(int id, char *name)
37{
38 return x86_stack_ids[id - 1] == name;
39}
40
41static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 36static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
42 unsigned *usedp, char **idp) 37 unsigned *usedp, char **idp)
43{ 38{
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index bb6006e3e295..dca2802c666f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -486,8 +486,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
486 rcu_read_lock(); 486 rcu_read_lock();
487 487
488 bp = per_cpu(bp_per_reg[i], cpu); 488 bp = per_cpu(bp_per_reg[i], cpu);
489 if (bp)
490 rc = NOTIFY_DONE;
491 /* 489 /*
492 * Reset the 'i'th TRAP bit in dr6 to denote completion of 490 * Reset the 'i'th TRAP bit in dr6 to denote completion of
493 * exception handling 491 * exception handling
@@ -506,7 +504,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
506 504
507 rcu_read_unlock(); 505 rcu_read_unlock();
508 } 506 }
509 if (dr6 & (~DR_TRAP_BITS)) 507 /*
508 * Further processing in do_debug() is needed for a) user-space
509 * breakpoints (to generate signals) and b) when the system has
510 * taken exception due to multiple causes
511 */
512 if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
513 (dr6 & (~DR_TRAP_BITS)))
510 rc = NOTIFY_DONE; 514 rc = NOTIFY_DONE;
511 515
512 set_debugreg(dr7, 7); 516 set_debugreg(dr7, 7);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5b8c7505b3bc..5de9f4a9c3fd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
337 337
338int __kprobes arch_prepare_kprobe(struct kprobe *p) 338int __kprobes arch_prepare_kprobe(struct kprobe *p)
339{ 339{
340 if (alternatives_text_reserved(p->addr, p->addr))
341 return -EINVAL;
342
340 if (!can_probe((unsigned long)p->addr)) 343 if (!can_probe((unsigned long)p->addr))
341 return -EILSEQ; 344 return -EILSEQ;
342 /* insn: must be on special executable page on x86. */ 345 /* insn: must be on special executable page on x86. */
@@ -429,7 +432,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
430 struct kprobe_ctlblk *kcb) 433 struct kprobe_ctlblk *kcb)
431{ 434{
432#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) 435#if !defined(CONFIG_PREEMPT)
433 if (p->ainsn.boostable == 1 && !p->post_handler) { 436 if (p->ainsn.boostable == 1 && !p->post_handler) {
434 /* Boost up -- we can execute copied instructions directly */ 437 /* Boost up -- we can execute copied instructions directly */
435 reset_current_kprobe(); 438 reset_current_kprobe();
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0c1033d61e59..d03146f71b2f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@ static const int arg_offs_table[] = {
140#endif 140#endif
141}; 141};
142 142
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
167/* 143/*
168 * does not yet catch signals sent when the child dies. 144 * does not yet catch signals sent when the child dies.
169 * in exit.c or in signal.c. 145 * in exit.c or in signal.c.
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 33399176512a..1168e4454188 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 534
535 get_debugreg(dr6, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Filter out all the reserved bits which are preset to 1 */
538 dr6 &= ~DR6_RESERVED;
539
537 /* Catch kmemcheck conditions first of all! */ 540 /* Catch kmemcheck conditions first of all! */
538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 541 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 542 return;
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c05a29cb9bb2..25b8b2f33ae9 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -25,7 +25,7 @@
25static __inline__ int get_bitmask_order(unsigned int count) 25static __inline__ int get_bitmask_order(unsigned int count)
26{ 26{
27 int order; 27 int order;
28 28
29 order = fls(count); 29 order = fls(count);
30 return order; /* We could be slightly more clever with -1 here... */ 30 return order; /* We could be slightly more clever with -1 here... */
31} 31}
@@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count)
33static __inline__ int get_count_order(unsigned int count) 33static __inline__ int get_count_order(unsigned int count)
34{ 34{
35 int order; 35 int order;
36 36
37 order = fls(count) - 1; 37 order = fls(count) - 1;
38 if (count & (count - 1)) 38 if (count & (count - 1))
39 order++; 39 order++;
@@ -45,6 +45,31 @@ static inline unsigned long hweight_long(unsigned long w)
45 return sizeof(w) == 4 ? hweight32(w) : hweight64(w); 45 return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
46} 46}
47 47
48/*
49 * Clearly slow versions of the hweightN() functions, their benefit is
50 * of course compile time evaluation of constant arguments.
51 */
52#define HWEIGHT8(w) \
53 ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \
54 (!!((w) & (1ULL << 0))) + \
55 (!!((w) & (1ULL << 1))) + \
56 (!!((w) & (1ULL << 2))) + \
57 (!!((w) & (1ULL << 3))) + \
58 (!!((w) & (1ULL << 4))) + \
59 (!!((w) & (1ULL << 5))) + \
60 (!!((w) & (1ULL << 6))) + \
61 (!!((w) & (1ULL << 7))) )
62
63#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8))
64#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
65#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
66
67/*
68 * Type invariant version that simply casts things to the
69 * largest type.
70 */
71#define HWEIGHT(w) HWEIGHT64((u64)(w))
72
48/** 73/**
49 * rol32 - rotate a 32-bit value left 74 * rol32 - rotate a 32-bit value left
50 * @word: value to rotate 75 * @word: value to rotate
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1cbb36f2759c..01e6adea07ec 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -134,6 +134,8 @@ extern void
134unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); 134unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
135extern void unregister_ftrace_function_probe_all(char *glob); 135extern void unregister_ftrace_function_probe_all(char *glob);
136 136
137extern int ftrace_text_reserved(void *start, void *end);
138
137enum { 139enum {
138 FTRACE_FL_FREE = (1 << 0), 140 FTRACE_FL_FREE = (1 << 0),
139 FTRACE_FL_FAILED = (1 << 1), 141 FTRACE_FL_FAILED = (1 << 1),
@@ -141,7 +143,6 @@ enum {
141 FTRACE_FL_ENABLED = (1 << 3), 143 FTRACE_FL_ENABLED = (1 << 3),
142 FTRACE_FL_NOTRACE = (1 << 4), 144 FTRACE_FL_NOTRACE = (1 << 4),
143 FTRACE_FL_CONVERTED = (1 << 5), 145 FTRACE_FL_CONVERTED = (1 << 5),
144 FTRACE_FL_FROZEN = (1 << 6),
145}; 146};
146 147
147struct dyn_ftrace { 148struct dyn_ftrace {
@@ -250,6 +251,10 @@ static inline int unregister_ftrace_command(char *cmd_name)
250{ 251{
251 return -EINVAL; 252 return -EINVAL;
252} 253}
254static inline int ftrace_text_reserved(void *start, void *end)
255{
256 return 0;
257}
253#endif /* CONFIG_DYNAMIC_FTRACE */ 258#endif /* CONFIG_DYNAMIC_FTRACE */
254 259
255/* totally disable ftrace - can not re-enable after this */ 260/* totally disable ftrace - can not re-enable after this */
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 84a5629adfd8..6b7c444ab8f6 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -5,6 +5,7 @@
5#include <linux/trace_seq.h> 5#include <linux/trace_seq.h>
6#include <linux/percpu.h> 6#include <linux/percpu.h>
7#include <linux/hardirq.h> 7#include <linux/hardirq.h>
8#include <linux/perf_event.h>
8 9
9struct trace_array; 10struct trace_array;
10struct tracer; 11struct tracer;
@@ -137,9 +138,6 @@ struct ftrace_event_call {
137 138
138#define FTRACE_MAX_PROFILE_SIZE 2048 139#define FTRACE_MAX_PROFILE_SIZE 2048
139 140
140extern char *perf_trace_buf;
141extern char *perf_trace_buf_nmi;
142
143#define MAX_FILTER_PRED 32 141#define MAX_FILTER_PRED 32
144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 142#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
145 143
@@ -187,13 +185,27 @@ do { \
187 __trace_printk(ip, fmt, ##args); \ 185 __trace_printk(ip, fmt, ##args); \
188} while (0) 186} while (0)
189 187
190#ifdef CONFIG_EVENT_PROFILE 188#ifdef CONFIG_PERF_EVENTS
191struct perf_event; 189struct perf_event;
192extern int ftrace_profile_enable(int event_id); 190extern int ftrace_profile_enable(int event_id);
193extern void ftrace_profile_disable(int event_id); 191extern void ftrace_profile_disable(int event_id);
194extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 192extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
195 char *filter_str); 193 char *filter_str);
196extern void ftrace_profile_free_filter(struct perf_event *event); 194extern void ftrace_profile_free_filter(struct perf_event *event);
195extern void *
196ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
197 unsigned long *irq_flags);
198
199static inline void
200ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
201 u64 count, unsigned long irq_flags)
202{
203 struct trace_entry *entry = raw_data;
204
205 perf_tp_event(entry->type, addr, count, raw_data, size);
206 perf_swevent_put_recursion_context(rctx);
207 local_irq_restore(irq_flags);
208}
197#endif 209#endif
198 210
199#endif /* _LINUX_FTRACE_EVENT_H */ 211#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/list.h b/include/linux/list.h
index 969f6e92d089..5d9c6558e8ab 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -206,6 +206,20 @@ static inline int list_empty_careful(const struct list_head *head)
206} 206}
207 207
208/** 208/**
209 * list_rotate_left - rotate the list to the left
210 * @head: the head of the list
211 */
212static inline void list_rotate_left(struct list_head *head)
213{
214 struct list_head *first;
215
216 if (!list_empty(head)) {
217 first = head->next;
218 list_move_tail(first, head);
219 }
220}
221
222/**
209 * list_is_singular - tests whether a list has just one entry. 223 * list_is_singular - tests whether a list has just one entry.
210 * @head: the list to test. 224 * @head: the list to test.
211 */ 225 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a177698d95e2..7b18b4fd5df7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -288,7 +288,7 @@ struct perf_event_mmap_page {
288}; 288};
289 289
290#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) 290#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0)
291#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) 291#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
292#define PERF_RECORD_MISC_KERNEL (1 << 0) 292#define PERF_RECORD_MISC_KERNEL (1 << 0)
293#define PERF_RECORD_MISC_USER (2 << 0) 293#define PERF_RECORD_MISC_USER (2 << 0)
294#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) 294#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
@@ -354,8 +354,8 @@ enum perf_event_type {
354 * u64 stream_id; 354 * u64 stream_id;
355 * }; 355 * };
356 */ 356 */
357 PERF_RECORD_THROTTLE = 5, 357 PERF_RECORD_THROTTLE = 5,
358 PERF_RECORD_UNTHROTTLE = 6, 358 PERF_RECORD_UNTHROTTLE = 6,
359 359
360 /* 360 /*
361 * struct { 361 * struct {
@@ -369,10 +369,10 @@ enum perf_event_type {
369 369
370 /* 370 /*
371 * struct { 371 * struct {
372 * struct perf_event_header header; 372 * struct perf_event_header header;
373 * u32 pid, tid; 373 * u32 pid, tid;
374 * 374 *
375 * struct read_format values; 375 * struct read_format values;
376 * }; 376 * };
377 */ 377 */
378 PERF_RECORD_READ = 8, 378 PERF_RECORD_READ = 8,
@@ -410,7 +410,7 @@ enum perf_event_type {
410 * char data[size];}&& PERF_SAMPLE_RAW 410 * char data[size];}&& PERF_SAMPLE_RAW
411 * }; 411 * };
412 */ 412 */
413 PERF_RECORD_SAMPLE = 9, 413 PERF_RECORD_SAMPLE = 9,
414 414
415 PERF_RECORD_MAX, /* non-ABI */ 415 PERF_RECORD_MAX, /* non-ABI */
416}; 416};
@@ -476,9 +476,11 @@ struct hw_perf_event {
476 union { 476 union {
477 struct { /* hardware */ 477 struct { /* hardware */
478 u64 config; 478 u64 config;
479 u64 last_tag;
479 unsigned long config_base; 480 unsigned long config_base;
480 unsigned long event_base; 481 unsigned long event_base;
481 int idx; 482 int idx;
483 int last_cpu;
482 }; 484 };
483 struct { /* software */ 485 struct { /* software */
484 s64 remaining; 486 s64 remaining;
@@ -496,9 +498,8 @@ struct hw_perf_event {
496 atomic64_t period_left; 498 atomic64_t period_left;
497 u64 interrupts; 499 u64 interrupts;
498 500
499 u64 freq_count; 501 u64 freq_time_stamp;
500 u64 freq_interrupts; 502 u64 freq_count_stamp;
501 u64 freq_stamp;
502#endif 503#endif
503}; 504};
504 505
@@ -510,6 +511,8 @@ struct perf_event;
510struct pmu { 511struct pmu {
511 int (*enable) (struct perf_event *event); 512 int (*enable) (struct perf_event *event);
512 void (*disable) (struct perf_event *event); 513 void (*disable) (struct perf_event *event);
514 int (*start) (struct perf_event *event);
515 void (*stop) (struct perf_event *event);
513 void (*read) (struct perf_event *event); 516 void (*read) (struct perf_event *event);
514 void (*unthrottle) (struct perf_event *event); 517 void (*unthrottle) (struct perf_event *event);
515}; 518};
@@ -563,6 +566,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
563 struct perf_sample_data *, 566 struct perf_sample_data *,
564 struct pt_regs *regs); 567 struct pt_regs *regs);
565 568
569enum perf_group_flag {
570 PERF_GROUP_SOFTWARE = 0x1,
571};
572
566/** 573/**
567 * struct perf_event - performance event kernel representation: 574 * struct perf_event - performance event kernel representation:
568 */ 575 */
@@ -572,6 +579,7 @@ struct perf_event {
572 struct list_head event_entry; 579 struct list_head event_entry;
573 struct list_head sibling_list; 580 struct list_head sibling_list;
574 int nr_siblings; 581 int nr_siblings;
582 int group_flags;
575 struct perf_event *group_leader; 583 struct perf_event *group_leader;
576 struct perf_event *output; 584 struct perf_event *output;
577 const struct pmu *pmu; 585 const struct pmu *pmu;
@@ -656,7 +664,7 @@ struct perf_event {
656 664
657 perf_overflow_handler_t overflow_handler; 665 perf_overflow_handler_t overflow_handler;
658 666
659#ifdef CONFIG_EVENT_PROFILE 667#ifdef CONFIG_EVENT_TRACING
660 struct event_filter *filter; 668 struct event_filter *filter;
661#endif 669#endif
662 670
@@ -681,7 +689,8 @@ struct perf_event_context {
681 */ 689 */
682 struct mutex mutex; 690 struct mutex mutex;
683 691
684 struct list_head group_list; 692 struct list_head pinned_groups;
693 struct list_head flexible_groups;
685 struct list_head event_list; 694 struct list_head event_list;
686 int nr_events; 695 int nr_events;
687 int nr_active; 696 int nr_active;
@@ -744,10 +753,9 @@ extern int perf_max_events;
744 753
745extern const struct pmu *hw_perf_event_init(struct perf_event *event); 754extern const struct pmu *hw_perf_event_init(struct perf_event *event);
746 755
747extern void perf_event_task_sched_in(struct task_struct *task, int cpu); 756extern void perf_event_task_sched_in(struct task_struct *task);
748extern void perf_event_task_sched_out(struct task_struct *task, 757extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
749 struct task_struct *next, int cpu); 758extern void perf_event_task_tick(struct task_struct *task);
750extern void perf_event_task_tick(struct task_struct *task, int cpu);
751extern int perf_event_init_task(struct task_struct *child); 759extern int perf_event_init_task(struct task_struct *child);
752extern void perf_event_exit_task(struct task_struct *child); 760extern void perf_event_exit_task(struct task_struct *child);
753extern void perf_event_free_task(struct task_struct *task); 761extern void perf_event_free_task(struct task_struct *task);
@@ -762,7 +770,7 @@ extern int perf_event_task_disable(void);
762extern int perf_event_task_enable(void); 770extern int perf_event_task_enable(void);
763extern int hw_perf_group_sched_in(struct perf_event *group_leader, 771extern int hw_perf_group_sched_in(struct perf_event *group_leader,
764 struct perf_cpu_context *cpuctx, 772 struct perf_cpu_context *cpuctx,
765 struct perf_event_context *ctx, int cpu); 773 struct perf_event_context *ctx);
766extern void perf_event_update_userpage(struct perf_event *event); 774extern void perf_event_update_userpage(struct perf_event *event);
767extern int perf_event_release_kernel(struct perf_event *event); 775extern int perf_event_release_kernel(struct perf_event *event);
768extern struct perf_event * 776extern struct perf_event *
@@ -851,8 +859,7 @@ extern int sysctl_perf_event_mlock;
851extern int sysctl_perf_event_sample_rate; 859extern int sysctl_perf_event_sample_rate;
852 860
853extern void perf_event_init(void); 861extern void perf_event_init(void);
854extern void perf_tp_event(int event_id, u64 addr, u64 count, 862extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
855 void *record, int entry_size);
856extern void perf_bp_event(struct perf_event *event, void *data); 863extern void perf_bp_event(struct perf_event *event, void *data);
857 864
858#ifndef perf_misc_flags 865#ifndef perf_misc_flags
@@ -873,12 +880,12 @@ extern void perf_event_enable(struct perf_event *event);
873extern void perf_event_disable(struct perf_event *event); 880extern void perf_event_disable(struct perf_event *event);
874#else 881#else
875static inline void 882static inline void
876perf_event_task_sched_in(struct task_struct *task, int cpu) { } 883perf_event_task_sched_in(struct task_struct *task) { }
877static inline void 884static inline void
878perf_event_task_sched_out(struct task_struct *task, 885perf_event_task_sched_out(struct task_struct *task,
879 struct task_struct *next, int cpu) { } 886 struct task_struct *next) { }
880static inline void 887static inline void
881perf_event_task_tick(struct task_struct *task, int cpu) { } 888perf_event_task_tick(struct task_struct *task) { }
882static inline int perf_event_init_task(struct task_struct *child) { return 0; } 889static inline int perf_event_init_task(struct task_struct *child) { return 0; }
883static inline void perf_event_exit_task(struct task_struct *child) { } 890static inline void perf_event_exit_task(struct task_struct *child) { }
884static inline void perf_event_free_task(struct task_struct *task) { } 891static inline void perf_event_free_task(struct task_struct *task) { }
@@ -893,13 +900,13 @@ static inline void
893perf_sw_event(u32 event_id, u64 nr, int nmi, 900perf_sw_event(u32 event_id, u64 nr, int nmi,
894 struct pt_regs *regs, u64 addr) { } 901 struct pt_regs *regs, u64 addr) { }
895static inline void 902static inline void
896perf_bp_event(struct perf_event *event, void *data) { } 903perf_bp_event(struct perf_event *event, void *data) { }
897 904
898static inline void perf_event_mmap(struct vm_area_struct *vma) { } 905static inline void perf_event_mmap(struct vm_area_struct *vma) { }
899static inline void perf_event_comm(struct task_struct *tsk) { } 906static inline void perf_event_comm(struct task_struct *tsk) { }
900static inline void perf_event_fork(struct task_struct *tsk) { } 907static inline void perf_event_fork(struct task_struct *tsk) { }
901static inline void perf_event_init(void) { } 908static inline void perf_event_init(void) { }
902static inline int perf_swevent_get_recursion_context(void) { return -1; } 909static inline int perf_swevent_get_recursion_context(void) { return -1; }
903static inline void perf_swevent_put_recursion_context(int rctx) { } 910static inline void perf_swevent_put_recursion_context(int rctx) { }
904static inline void perf_event_enable(struct perf_event *event) { } 911static inline void perf_event_enable(struct perf_event *event) { }
905static inline void perf_event_disable(struct perf_event *event) { } 912static inline void perf_event_disable(struct perf_event *event) { }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91bd7d78a07d..8126f239edf0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,7 +99,7 @@ struct perf_event_attr;
99#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) 99#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
100#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) 100#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
101 101
102#ifdef CONFIG_EVENT_PROFILE 102#ifdef CONFIG_PERF_EVENTS
103 103
104#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ 104#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
105 .profile_enable = prof_sysenter_enable, \ 105 .profile_enable = prof_sysenter_enable, \
@@ -113,7 +113,7 @@ struct perf_event_attr;
113#define TRACE_SYS_ENTER_PROFILE_INIT(sname) 113#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
114#define TRACE_SYS_EXIT_PROFILE(sname) 114#define TRACE_SYS_EXIT_PROFILE(sname)
115#define TRACE_SYS_EXIT_PROFILE_INIT(sname) 115#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
116#endif 116#endif /* CONFIG_PERF_EVENTS */
117 117
118#ifdef CONFIG_FTRACE_SYSCALLS 118#ifdef CONFIG_FTRACE_SYSCALLS
119#define __SC_STR_ADECL1(t, a) #a 119#define __SC_STR_ADECL1(t, a) #a
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index a870ba125aa8..5c1dcfc16c60 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -20,14 +20,17 @@ TRACE_EVENT(lock_acquire,
20 TP_STRUCT__entry( 20 TP_STRUCT__entry(
21 __field(unsigned int, flags) 21 __field(unsigned int, flags)
22 __string(name, lock->name) 22 __string(name, lock->name)
23 __field(void *, lockdep_addr)
23 ), 24 ),
24 25
25 TP_fast_assign( 26 TP_fast_assign(
26 __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); 27 __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
27 __assign_str(name, lock->name); 28 __assign_str(name, lock->name);
29 __entry->lockdep_addr = lock;
28 ), 30 ),
29 31
30 TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", 32 TP_printk("%p %s%s%s", __entry->lockdep_addr,
33 (__entry->flags & 1) ? "try " : "",
31 (__entry->flags & 2) ? "read " : "", 34 (__entry->flags & 2) ? "read " : "",
32 __get_str(name)) 35 __get_str(name))
33); 36);
@@ -40,13 +43,16 @@ TRACE_EVENT(lock_release,
40 43
41 TP_STRUCT__entry( 44 TP_STRUCT__entry(
42 __string(name, lock->name) 45 __string(name, lock->name)
46 __field(void *, lockdep_addr)
43 ), 47 ),
44 48
45 TP_fast_assign( 49 TP_fast_assign(
46 __assign_str(name, lock->name); 50 __assign_str(name, lock->name);
51 __entry->lockdep_addr = lock;
47 ), 52 ),
48 53
49 TP_printk("%s", __get_str(name)) 54 TP_printk("%p %s",
55 __entry->lockdep_addr, __get_str(name))
50); 56);
51 57
52#ifdef CONFIG_LOCK_STAT 58#ifdef CONFIG_LOCK_STAT
@@ -59,13 +65,16 @@ TRACE_EVENT(lock_contended,
59 65
60 TP_STRUCT__entry( 66 TP_STRUCT__entry(
61 __string(name, lock->name) 67 __string(name, lock->name)
68 __field(void *, lockdep_addr)
62 ), 69 ),
63 70
64 TP_fast_assign( 71 TP_fast_assign(
65 __assign_str(name, lock->name); 72 __assign_str(name, lock->name);
73 __entry->lockdep_addr = lock;
66 ), 74 ),
67 75
68 TP_printk("%s", __get_str(name)) 76 TP_printk("%p %s",
77 __entry->lockdep_addr, __get_str(name))
69); 78);
70 79
71TRACE_EVENT(lock_acquired, 80TRACE_EVENT(lock_acquired,
@@ -75,16 +84,18 @@ TRACE_EVENT(lock_acquired,
75 84
76 TP_STRUCT__entry( 85 TP_STRUCT__entry(
77 __string(name, lock->name) 86 __string(name, lock->name)
78 __field(unsigned long, wait_usec) 87 __field(s64, wait_nsec)
79 __field(unsigned long, wait_nsec_rem) 88 __field(void *, lockdep_addr)
80 ), 89 ),
90
81 TP_fast_assign( 91 TP_fast_assign(
82 __assign_str(name, lock->name); 92 __assign_str(name, lock->name);
83 __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); 93 __entry->wait_nsec = waittime;
84 __entry->wait_usec = (unsigned long) waittime; 94 __entry->lockdep_addr = lock;
85 ), 95 ),
86 TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, 96 TP_printk("%p %s (%llu ns)", __entry->lockdep_addr,
87 __entry->wait_nsec_rem) 97 __get_str(name),
98 __entry->wait_nsec)
88); 99);
89 100
90#endif 101#endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index f23a0ca6910a..0804cd594803 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -376,7 +376,7 @@ static inline notrace int ftrace_get_offsets_##call( \
376 376
377#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 377#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
378 378
379#ifdef CONFIG_EVENT_PROFILE 379#ifdef CONFIG_PERF_EVENTS
380 380
381/* 381/*
382 * Generate the functions needed for tracepoint perf_event support. 382 * Generate the functions needed for tracepoint perf_event support.
@@ -421,7 +421,7 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused) \
421 421
422#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 422#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
423 423
424#endif 424#endif /* CONFIG_PERF_EVENTS */
425 425
426/* 426/*
427 * Stage 4 of the trace events. 427 * Stage 4 of the trace events.
@@ -505,7 +505,7 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused) \
505 * 505 *
506 */ 506 */
507 507
508#ifdef CONFIG_EVENT_PROFILE 508#ifdef CONFIG_PERF_EVENTS
509 509
510#define _TRACE_PROFILE_INIT(call) \ 510#define _TRACE_PROFILE_INIT(call) \
511 .profile_enable = ftrace_profile_enable_##call, \ 511 .profile_enable = ftrace_profile_enable_##call, \
@@ -513,7 +513,7 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused) \
513 513
514#else 514#else
515#define _TRACE_PROFILE_INIT(call) 515#define _TRACE_PROFILE_INIT(call)
516#endif 516#endif /* CONFIG_PERF_EVENTS */
517 517
518#undef __entry 518#undef __entry
519#define __entry entry 519#define __entry entry
@@ -736,7 +736,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
736 * } 736 * }
737 */ 737 */
738 738
739#ifdef CONFIG_EVENT_PROFILE 739#ifdef CONFIG_PERF_EVENTS
740 740
741#undef __entry 741#undef __entry
742#define __entry entry 742#define __entry entry
@@ -761,22 +761,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
761 proto) \ 761 proto) \
762{ \ 762{ \
763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
764 extern int perf_swevent_get_recursion_context(void); \
765 extern void perf_swevent_put_recursion_context(int rctx); \
766 extern void perf_tp_event(int, u64, u64, void *, int); \
767 struct ftrace_raw_##call *entry; \ 764 struct ftrace_raw_##call *entry; \
768 u64 __addr = 0, __count = 1; \ 765 u64 __addr = 0, __count = 1; \
769 unsigned long irq_flags; \ 766 unsigned long irq_flags; \
770 struct trace_entry *ent; \
771 int __entry_size; \ 767 int __entry_size; \
772 int __data_size; \ 768 int __data_size; \
773 char *trace_buf; \
774 char *raw_data; \
775 int __cpu; \
776 int rctx; \ 769 int rctx; \
777 int pc; \
778 \
779 pc = preempt_count(); \
780 \ 770 \
781 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 771 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
782 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ 772 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
@@ -786,42 +776,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
786 if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ 776 if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
787 "profile buffer not large enough")) \ 777 "profile buffer not large enough")) \
788 return; \ 778 return; \
789 \ 779 entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \
790 local_irq_save(irq_flags); \ 780 __entry_size, event_call->id, &rctx, &irq_flags); \
791 \ 781 if (!entry) \
792 rctx = perf_swevent_get_recursion_context(); \ 782 return; \
793 if (rctx < 0) \
794 goto end_recursion; \
795 \
796 __cpu = smp_processor_id(); \
797 \
798 if (in_nmi()) \
799 trace_buf = rcu_dereference(perf_trace_buf_nmi); \
800 else \
801 trace_buf = rcu_dereference(perf_trace_buf); \
802 \
803 if (!trace_buf) \
804 goto end; \
805 \
806 raw_data = per_cpu_ptr(trace_buf, __cpu); \
807 \
808 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
809 entry = (struct ftrace_raw_##call *)raw_data; \
810 ent = &entry->ent; \
811 tracing_generic_entry_update(ent, irq_flags, pc); \
812 ent->type = event_call->id; \
813 \
814 tstruct \ 783 tstruct \
815 \ 784 \
816 { assign; } \ 785 { assign; } \
817 \ 786 \
818 perf_tp_event(event_call->id, __addr, __count, entry, \ 787 ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \
819 __entry_size); \ 788 __count, irq_flags); \
820 \
821end: \
822 perf_swevent_put_recursion_context(rctx); \
823end_recursion: \
824 local_irq_restore(irq_flags); \
825} 789}
826 790
827#undef DEFINE_EVENT 791#undef DEFINE_EVENT
@@ -838,7 +802,7 @@ static notrace void ftrace_profile_##call(proto) \
838 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 802 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
839 803
840#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 804#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
841#endif /* CONFIG_EVENT_PROFILE */ 805#endif /* CONFIG_PERF_EVENTS */
842 806
843#undef _TRACE_PROFILE_INIT 807#undef _TRACE_PROFILE_INIT
844 808
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 8cd410254456..0387100752f0 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -45,12 +45,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
45enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); 45enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
46enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); 46enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
47#endif 47#endif
48#ifdef CONFIG_EVENT_PROFILE 48
49#ifdef CONFIG_PERF_EVENTS
49int prof_sysenter_enable(struct ftrace_event_call *call); 50int prof_sysenter_enable(struct ftrace_event_call *call);
50void prof_sysenter_disable(struct ftrace_event_call *call); 51void prof_sysenter_disable(struct ftrace_event_call *call);
51int prof_sysexit_enable(struct ftrace_event_call *call); 52int prof_sysexit_enable(struct ftrace_event_call *call);
52void prof_sysexit_disable(struct ftrace_event_call *call); 53void prof_sysexit_disable(struct ftrace_event_call *call);
53
54#endif 54#endif
55 55
56#endif /* _TRACE_SYSCALL_H */ 56#endif /* _TRACE_SYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 207ae29354a3..c6d95f8ea055 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -992,19 +992,6 @@ config PERF_EVENTS
992 992
993 Say Y if unsure. 993 Say Y if unsure.
994 994
995config EVENT_PROFILE
996 bool "Tracepoint profiling sources"
997 depends on PERF_EVENTS && EVENT_TRACING
998 default y
999 help
1000 Allow the use of tracepoints as software performance events.
1001
1002 When this is enabled, you can create perf events based on
1003 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
1004 found in debugfs://tracing/events/*/*/id. (The -e/--events
1005 option to the perf tool can parse and interpret symbolic
1006 tracepoints, in the subsystem:tracepoint_name format.)
1007
1008config PERF_COUNTERS 995config PERF_COUNTERS
1009 bool "Kernel performance counters (old config option)" 996 bool "Kernel performance counters (old config option)"
1010 depends on HAVE_PERF_EVENTS 997 depends on HAVE_PERF_EVENTS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c4b43430d393..ccec774c716d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -44,6 +44,7 @@
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/kdebug.h> 45#include <linux/kdebug.h>
46#include <linux/memory.h> 46#include <linux/memory.h>
47#include <linux/ftrace.h>
47 48
48#include <asm-generic/sections.h> 49#include <asm-generic/sections.h>
49#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
@@ -125,30 +126,6 @@ static LIST_HEAD(kprobe_insn_pages);
125static int kprobe_garbage_slots; 126static int kprobe_garbage_slots;
126static int collect_garbage_slots(void); 127static int collect_garbage_slots(void);
127 128
128static int __kprobes check_safety(void)
129{
130 int ret = 0;
131#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER)
132 ret = freeze_processes();
133 if (ret == 0) {
134 struct task_struct *p, *q;
135 do_each_thread(p, q) {
136 if (p != current && p->state == TASK_RUNNING &&
137 p->pid != 0) {
138 printk("Check failed: %s is running\n",p->comm);
139 ret = -1;
140 goto loop_end;
141 }
142 } while_each_thread(p, q);
143 }
144loop_end:
145 thaw_processes();
146#else
147 synchronize_sched();
148#endif
149 return ret;
150}
151
152/** 129/**
153 * __get_insn_slot() - Find a slot on an executable page for an instruction. 130 * __get_insn_slot() - Find a slot on an executable page for an instruction.
154 * We allocate an executable page if there's no room on existing ones. 131 * We allocate an executable page if there's no room on existing ones.
@@ -236,9 +213,8 @@ static int __kprobes collect_garbage_slots(void)
236{ 213{
237 struct kprobe_insn_page *kip, *next; 214 struct kprobe_insn_page *kip, *next;
238 215
239 /* Ensure no-one is preepmted on the garbages */ 216 /* Ensure no-one is interrupted on the garbages */
240 if (check_safety()) 217 synchronize_sched();
241 return -EAGAIN;
242 218
243 list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { 219 list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
244 int i; 220 int i;
@@ -729,7 +705,8 @@ int __kprobes register_kprobe(struct kprobe *p)
729 705
730 preempt_disable(); 706 preempt_disable();
731 if (!kernel_text_address((unsigned long) p->addr) || 707 if (!kernel_text_address((unsigned long) p->addr) ||
732 in_kprobes_functions((unsigned long) p->addr)) { 708 in_kprobes_functions((unsigned long) p->addr) ||
709 ftrace_text_reserved(p->addr, p->addr)) {
733 preempt_enable(); 710 preempt_enable();
734 return -EINVAL; 711 return -EINVAL;
735 } 712 }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2ae7409bf38f..a661e7991865 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -98,11 +98,12 @@ void __weak hw_perf_enable(void) { barrier(); }
98 98
99void __weak hw_perf_event_setup(int cpu) { barrier(); } 99void __weak hw_perf_event_setup(int cpu) { barrier(); }
100void __weak hw_perf_event_setup_online(int cpu) { barrier(); } 100void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
101void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
101 102
102int __weak 103int __weak
103hw_perf_group_sched_in(struct perf_event *group_leader, 104hw_perf_group_sched_in(struct perf_event *group_leader,
104 struct perf_cpu_context *cpuctx, 105 struct perf_cpu_context *cpuctx,
105 struct perf_event_context *ctx, int cpu) 106 struct perf_event_context *ctx)
106{ 107{
107 return 0; 108 return 0;
108} 109}
@@ -248,7 +249,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
248 249
249static inline u64 perf_clock(void) 250static inline u64 perf_clock(void)
250{ 251{
251 return cpu_clock(smp_processor_id()); 252 return cpu_clock(raw_smp_processor_id());
252} 253}
253 254
254/* 255/*
@@ -289,6 +290,15 @@ static void update_event_times(struct perf_event *event)
289 event->total_time_running = run_end - event->tstamp_running; 290 event->total_time_running = run_end - event->tstamp_running;
290} 291}
291 292
293static struct list_head *
294ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
295{
296 if (event->attr.pinned)
297 return &ctx->pinned_groups;
298 else
299 return &ctx->flexible_groups;
300}
301
292/* 302/*
293 * Add a event from the lists for its context. 303 * Add a event from the lists for its context.
294 * Must be called with ctx->mutex and ctx->lock held. 304 * Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +313,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
303 * add it straight to the context's event list, or to the group 313 * add it straight to the context's event list, or to the group
304 * leader's sibling list: 314 * leader's sibling list:
305 */ 315 */
306 if (group_leader == event) 316 if (group_leader == event) {
307 list_add_tail(&event->group_entry, &ctx->group_list); 317 struct list_head *list;
308 else { 318
319 if (is_software_event(event))
320 event->group_flags |= PERF_GROUP_SOFTWARE;
321
322 list = ctx_group_list(event, ctx);
323 list_add_tail(&event->group_entry, list);
324 } else {
325 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
326 !is_software_event(event))
327 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
328
309 list_add_tail(&event->group_entry, &group_leader->sibling_list); 329 list_add_tail(&event->group_entry, &group_leader->sibling_list);
310 group_leader->nr_siblings++; 330 group_leader->nr_siblings++;
311 } 331 }
@@ -355,9 +375,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
355 * to the context list directly: 375 * to the context list directly:
356 */ 376 */
357 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 377 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
378 struct list_head *list;
358 379
359 list_move_tail(&sibling->group_entry, &ctx->group_list); 380 list = ctx_group_list(event, ctx);
381 list_move_tail(&sibling->group_entry, list);
360 sibling->group_leader = sibling; 382 sibling->group_leader = sibling;
383
384 /* Inherit group flags from the previous leader */
385 sibling->group_flags = event->group_flags;
361 } 386 }
362} 387}
363 388
@@ -608,14 +633,13 @@ void perf_event_disable(struct perf_event *event)
608static int 633static int
609event_sched_in(struct perf_event *event, 634event_sched_in(struct perf_event *event,
610 struct perf_cpu_context *cpuctx, 635 struct perf_cpu_context *cpuctx,
611 struct perf_event_context *ctx, 636 struct perf_event_context *ctx)
612 int cpu)
613{ 637{
614 if (event->state <= PERF_EVENT_STATE_OFF) 638 if (event->state <= PERF_EVENT_STATE_OFF)
615 return 0; 639 return 0;
616 640
617 event->state = PERF_EVENT_STATE_ACTIVE; 641 event->state = PERF_EVENT_STATE_ACTIVE;
618 event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ 642 event->oncpu = smp_processor_id();
619 /* 643 /*
620 * The new state must be visible before we turn it on in the hardware: 644 * The new state must be visible before we turn it on in the hardware:
621 */ 645 */
@@ -642,8 +666,7 @@ event_sched_in(struct perf_event *event,
642static int 666static int
643group_sched_in(struct perf_event *group_event, 667group_sched_in(struct perf_event *group_event,
644 struct perf_cpu_context *cpuctx, 668 struct perf_cpu_context *cpuctx,
645 struct perf_event_context *ctx, 669 struct perf_event_context *ctx)
646 int cpu)
647{ 670{
648 struct perf_event *event, *partial_group; 671 struct perf_event *event, *partial_group;
649 int ret; 672 int ret;
@@ -651,18 +674,18 @@ group_sched_in(struct perf_event *group_event,
651 if (group_event->state == PERF_EVENT_STATE_OFF) 674 if (group_event->state == PERF_EVENT_STATE_OFF)
652 return 0; 675 return 0;
653 676
654 ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); 677 ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
655 if (ret) 678 if (ret)
656 return ret < 0 ? ret : 0; 679 return ret < 0 ? ret : 0;
657 680
658 if (event_sched_in(group_event, cpuctx, ctx, cpu)) 681 if (event_sched_in(group_event, cpuctx, ctx))
659 return -EAGAIN; 682 return -EAGAIN;
660 683
661 /* 684 /*
662 * Schedule in siblings as one group (if any): 685 * Schedule in siblings as one group (if any):
663 */ 686 */
664 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 687 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
665 if (event_sched_in(event, cpuctx, ctx, cpu)) { 688 if (event_sched_in(event, cpuctx, ctx)) {
666 partial_group = event; 689 partial_group = event;
667 goto group_error; 690 goto group_error;
668 } 691 }
@@ -686,24 +709,6 @@ group_error:
686} 709}
687 710
688/* 711/*
689 * Return 1 for a group consisting entirely of software events,
690 * 0 if the group contains any hardware events.
691 */
692static int is_software_only_group(struct perf_event *leader)
693{
694 struct perf_event *event;
695
696 if (!is_software_event(leader))
697 return 0;
698
699 list_for_each_entry(event, &leader->sibling_list, group_entry)
700 if (!is_software_event(event))
701 return 0;
702
703 return 1;
704}
705
706/*
707 * Work out whether we can put this event group on the CPU now. 712 * Work out whether we can put this event group on the CPU now.
708 */ 713 */
709static int group_can_go_on(struct perf_event *event, 714static int group_can_go_on(struct perf_event *event,
@@ -713,7 +718,7 @@ static int group_can_go_on(struct perf_event *event,
713 /* 718 /*
714 * Groups consisting entirely of software events can always go on. 719 * Groups consisting entirely of software events can always go on.
715 */ 720 */
716 if (is_software_only_group(event)) 721 if (event->group_flags & PERF_GROUP_SOFTWARE)
717 return 1; 722 return 1;
718 /* 723 /*
719 * If an exclusive group is already on, no other hardware 724 * If an exclusive group is already on, no other hardware
@@ -754,7 +759,6 @@ static void __perf_install_in_context(void *info)
754 struct perf_event *event = info; 759 struct perf_event *event = info;
755 struct perf_event_context *ctx = event->ctx; 760 struct perf_event_context *ctx = event->ctx;
756 struct perf_event *leader = event->group_leader; 761 struct perf_event *leader = event->group_leader;
757 int cpu = smp_processor_id();
758 int err; 762 int err;
759 763
760 /* 764 /*
@@ -801,7 +805,7 @@ static void __perf_install_in_context(void *info)
801 if (!group_can_go_on(event, cpuctx, 1)) 805 if (!group_can_go_on(event, cpuctx, 1))
802 err = -EEXIST; 806 err = -EEXIST;
803 else 807 else
804 err = event_sched_in(event, cpuctx, ctx, cpu); 808 err = event_sched_in(event, cpuctx, ctx);
805 809
806 if (err) { 810 if (err) {
807 /* 811 /*
@@ -943,11 +947,9 @@ static void __perf_event_enable(void *info)
943 } else { 947 } else {
944 perf_disable(); 948 perf_disable();
945 if (event == leader) 949 if (event == leader)
946 err = group_sched_in(event, cpuctx, ctx, 950 err = group_sched_in(event, cpuctx, ctx);
947 smp_processor_id());
948 else 951 else
949 err = event_sched_in(event, cpuctx, ctx, 952 err = event_sched_in(event, cpuctx, ctx);
950 smp_processor_id());
951 perf_enable(); 953 perf_enable();
952 } 954 }
953 955
@@ -1043,8 +1045,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1043 return 0; 1045 return 0;
1044} 1046}
1045 1047
1046void __perf_event_sched_out(struct perf_event_context *ctx, 1048enum event_type_t {
1047 struct perf_cpu_context *cpuctx) 1049 EVENT_FLEXIBLE = 0x1,
1050 EVENT_PINNED = 0x2,
1051 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
1052};
1053
1054static void ctx_sched_out(struct perf_event_context *ctx,
1055 struct perf_cpu_context *cpuctx,
1056 enum event_type_t event_type)
1048{ 1057{
1049 struct perf_event *event; 1058 struct perf_event *event;
1050 1059
@@ -1055,10 +1064,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1055 update_context_time(ctx); 1064 update_context_time(ctx);
1056 1065
1057 perf_disable(); 1066 perf_disable();
1058 if (ctx->nr_active) { 1067 if (!ctx->nr_active)
1059 list_for_each_entry(event, &ctx->group_list, group_entry) 1068 goto out_enable;
1069
1070 if (event_type & EVENT_PINNED)
1071 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1060 group_sched_out(event, cpuctx, ctx); 1072 group_sched_out(event, cpuctx, ctx);
1061 } 1073
1074 if (event_type & EVENT_FLEXIBLE)
1075 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1076 group_sched_out(event, cpuctx, ctx);
1077
1078 out_enable:
1062 perf_enable(); 1079 perf_enable();
1063 out: 1080 out:
1064 raw_spin_unlock(&ctx->lock); 1081 raw_spin_unlock(&ctx->lock);
@@ -1170,9 +1187,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1170 * not restart the event. 1187 * not restart the event.
1171 */ 1188 */
1172void perf_event_task_sched_out(struct task_struct *task, 1189void perf_event_task_sched_out(struct task_struct *task,
1173 struct task_struct *next, int cpu) 1190 struct task_struct *next)
1174{ 1191{
1175 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 1192 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1176 struct perf_event_context *ctx = task->perf_event_ctxp; 1193 struct perf_event_context *ctx = task->perf_event_ctxp;
1177 struct perf_event_context *next_ctx; 1194 struct perf_event_context *next_ctx;
1178 struct perf_event_context *parent; 1195 struct perf_event_context *parent;
@@ -1220,15 +1237,13 @@ void perf_event_task_sched_out(struct task_struct *task,
1220 rcu_read_unlock(); 1237 rcu_read_unlock();
1221 1238
1222 if (do_switch) { 1239 if (do_switch) {
1223 __perf_event_sched_out(ctx, cpuctx); 1240 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
1224 cpuctx->task_ctx = NULL; 1241 cpuctx->task_ctx = NULL;
1225 } 1242 }
1226} 1243}
1227 1244
1228/* 1245static void task_ctx_sched_out(struct perf_event_context *ctx,
1229 * Called with IRQs disabled 1246 enum event_type_t event_type)
1230 */
1231static void __perf_event_task_sched_out(struct perf_event_context *ctx)
1232{ 1247{
1233 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1248 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1234 1249
@@ -1238,47 +1253,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx)
1238 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) 1253 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
1239 return; 1254 return;
1240 1255
1241 __perf_event_sched_out(ctx, cpuctx); 1256 ctx_sched_out(ctx, cpuctx, event_type);
1242 cpuctx->task_ctx = NULL; 1257 cpuctx->task_ctx = NULL;
1243} 1258}
1244 1259
1245/* 1260/*
1246 * Called with IRQs disabled 1261 * Called with IRQs disabled
1247 */ 1262 */
1248static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) 1263static void __perf_event_task_sched_out(struct perf_event_context *ctx)
1264{
1265 task_ctx_sched_out(ctx, EVENT_ALL);
1266}
1267
1268/*
1269 * Called with IRQs disabled
1270 */
1271static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
1272 enum event_type_t event_type)
1249{ 1273{
1250 __perf_event_sched_out(&cpuctx->ctx, cpuctx); 1274 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
1251} 1275}
1252 1276
1253static void 1277static void
1254__perf_event_sched_in(struct perf_event_context *ctx, 1278ctx_pinned_sched_in(struct perf_event_context *ctx,
1255 struct perf_cpu_context *cpuctx, int cpu) 1279 struct perf_cpu_context *cpuctx)
1256{ 1280{
1257 struct perf_event *event; 1281 struct perf_event *event;
1258 int can_add_hw = 1;
1259
1260 raw_spin_lock(&ctx->lock);
1261 ctx->is_active = 1;
1262 if (likely(!ctx->nr_events))
1263 goto out;
1264 1282
1265 ctx->timestamp = perf_clock(); 1283 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1266 1284 if (event->state <= PERF_EVENT_STATE_OFF)
1267 perf_disable();
1268
1269 /*
1270 * First go through the list and put on any pinned groups
1271 * in order to give them the best chance of going on.
1272 */
1273 list_for_each_entry(event, &ctx->group_list, group_entry) {
1274 if (event->state <= PERF_EVENT_STATE_OFF ||
1275 !event->attr.pinned)
1276 continue; 1285 continue;
1277 if (event->cpu != -1 && event->cpu != cpu) 1286 if (event->cpu != -1 && event->cpu != smp_processor_id())
1278 continue; 1287 continue;
1279 1288
1280 if (group_can_go_on(event, cpuctx, 1)) 1289 if (group_can_go_on(event, cpuctx, 1))
1281 group_sched_in(event, cpuctx, ctx, cpu); 1290 group_sched_in(event, cpuctx, ctx);
1282 1291
1283 /* 1292 /*
1284 * If this pinned group hasn't been scheduled, 1293 * If this pinned group hasn't been scheduled,
@@ -1289,32 +1298,83 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1289 event->state = PERF_EVENT_STATE_ERROR; 1298 event->state = PERF_EVENT_STATE_ERROR;
1290 } 1299 }
1291 } 1300 }
1301}
1292 1302
1293 list_for_each_entry(event, &ctx->group_list, group_entry) { 1303static void
1294 /* 1304ctx_flexible_sched_in(struct perf_event_context *ctx,
1295 * Ignore events in OFF or ERROR state, and 1305 struct perf_cpu_context *cpuctx)
1296 * ignore pinned events since we did them already. 1306{
1297 */ 1307 struct perf_event *event;
1298 if (event->state <= PERF_EVENT_STATE_OFF || 1308 int can_add_hw = 1;
1299 event->attr.pinned)
1300 continue;
1301 1309
1310 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
1311 /* Ignore events in OFF or ERROR state */
1312 if (event->state <= PERF_EVENT_STATE_OFF)
1313 continue;
1302 /* 1314 /*
1303 * Listen to the 'cpu' scheduling filter constraint 1315 * Listen to the 'cpu' scheduling filter constraint
1304 * of events: 1316 * of events:
1305 */ 1317 */
1306 if (event->cpu != -1 && event->cpu != cpu) 1318 if (event->cpu != -1 && event->cpu != smp_processor_id())
1307 continue; 1319 continue;
1308 1320
1309 if (group_can_go_on(event, cpuctx, can_add_hw)) 1321 if (group_can_go_on(event, cpuctx, can_add_hw))
1310 if (group_sched_in(event, cpuctx, ctx, cpu)) 1322 if (group_sched_in(event, cpuctx, ctx))
1311 can_add_hw = 0; 1323 can_add_hw = 0;
1312 } 1324 }
1325}
1326
1327static void
1328ctx_sched_in(struct perf_event_context *ctx,
1329 struct perf_cpu_context *cpuctx,
1330 enum event_type_t event_type)
1331{
1332 raw_spin_lock(&ctx->lock);
1333 ctx->is_active = 1;
1334 if (likely(!ctx->nr_events))
1335 goto out;
1336
1337 ctx->timestamp = perf_clock();
1338
1339 perf_disable();
1340
1341 /*
1342 * First go through the list and put on any pinned groups
1343 * in order to give them the best chance of going on.
1344 */
1345 if (event_type & EVENT_PINNED)
1346 ctx_pinned_sched_in(ctx, cpuctx);
1347
1348 /* Then walk through the lower prio flexible groups */
1349 if (event_type & EVENT_FLEXIBLE)
1350 ctx_flexible_sched_in(ctx, cpuctx);
1351
1313 perf_enable(); 1352 perf_enable();
1314 out: 1353 out:
1315 raw_spin_unlock(&ctx->lock); 1354 raw_spin_unlock(&ctx->lock);
1316} 1355}
1317 1356
1357static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1358 enum event_type_t event_type)
1359{
1360 struct perf_event_context *ctx = &cpuctx->ctx;
1361
1362 ctx_sched_in(ctx, cpuctx, event_type);
1363}
1364
1365static void task_ctx_sched_in(struct task_struct *task,
1366 enum event_type_t event_type)
1367{
1368 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1369 struct perf_event_context *ctx = task->perf_event_ctxp;
1370
1371 if (likely(!ctx))
1372 return;
1373 if (cpuctx->task_ctx == ctx)
1374 return;
1375 ctx_sched_in(ctx, cpuctx, event_type);
1376 cpuctx->task_ctx = ctx;
1377}
1318/* 1378/*
1319 * Called from scheduler to add the events of the current task 1379 * Called from scheduler to add the events of the current task
1320 * with interrupts disabled. 1380 * with interrupts disabled.
@@ -1326,38 +1386,128 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1326 * accessing the event control register. If a NMI hits, then it will 1386 * accessing the event control register. If a NMI hits, then it will
1327 * keep the event running. 1387 * keep the event running.
1328 */ 1388 */
1329void perf_event_task_sched_in(struct task_struct *task, int cpu) 1389void perf_event_task_sched_in(struct task_struct *task)
1330{ 1390{
1331 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 1391 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1332 struct perf_event_context *ctx = task->perf_event_ctxp; 1392 struct perf_event_context *ctx = task->perf_event_ctxp;
1333 1393
1334 if (likely(!ctx)) 1394 if (likely(!ctx))
1335 return; 1395 return;
1396
1336 if (cpuctx->task_ctx == ctx) 1397 if (cpuctx->task_ctx == ctx)
1337 return; 1398 return;
1338 __perf_event_sched_in(ctx, cpuctx, cpu); 1399
1400 /*
1401 * We want to keep the following priority order:
1402 * cpu pinned (that don't need to move), task pinned,
1403 * cpu flexible, task flexible.
1404 */
1405 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1406
1407 ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
1408 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1409 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
1410
1339 cpuctx->task_ctx = ctx; 1411 cpuctx->task_ctx = ctx;
1340} 1412}
1341 1413
1342static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) 1414#define MAX_INTERRUPTS (~0ULL)
1415
1416static void perf_log_throttle(struct perf_event *event, int enable);
1417
1418static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1343{ 1419{
1344 struct perf_event_context *ctx = &cpuctx->ctx; 1420 u64 frequency = event->attr.sample_freq;
1421 u64 sec = NSEC_PER_SEC;
1422 u64 divisor, dividend;
1423
1424 int count_fls, nsec_fls, frequency_fls, sec_fls;
1425
1426 count_fls = fls64(count);
1427 nsec_fls = fls64(nsec);
1428 frequency_fls = fls64(frequency);
1429 sec_fls = 30;
1345 1430
1346 __perf_event_sched_in(ctx, cpuctx, cpu); 1431 /*
1432 * We got @count in @nsec, with a target of sample_freq HZ
1433 * the target period becomes:
1434 *
1435 * @count * 10^9
1436 * period = -------------------
1437 * @nsec * sample_freq
1438 *
1439 */
1440
1441 /*
1442 * Reduce accuracy by one bit such that @a and @b converge
1443 * to a similar magnitude.
1444 */
1445#define REDUCE_FLS(a, b) \
1446do { \
1447 if (a##_fls > b##_fls) { \
1448 a >>= 1; \
1449 a##_fls--; \
1450 } else { \
1451 b >>= 1; \
1452 b##_fls--; \
1453 } \
1454} while (0)
1455
1456 /*
1457 * Reduce accuracy until either term fits in a u64, then proceed with
1458 * the other, so that finally we can do a u64/u64 division.
1459 */
1460 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
1461 REDUCE_FLS(nsec, frequency);
1462 REDUCE_FLS(sec, count);
1463 }
1464
1465 if (count_fls + sec_fls > 64) {
1466 divisor = nsec * frequency;
1467
1468 while (count_fls + sec_fls > 64) {
1469 REDUCE_FLS(count, sec);
1470 divisor >>= 1;
1471 }
1472
1473 dividend = count * sec;
1474 } else {
1475 dividend = count * sec;
1476
1477 while (nsec_fls + frequency_fls > 64) {
1478 REDUCE_FLS(nsec, frequency);
1479 dividend >>= 1;
1480 }
1481
1482 divisor = nsec * frequency;
1483 }
1484
1485 return div64_u64(dividend, divisor);
1347} 1486}
1348 1487
1349#define MAX_INTERRUPTS (~0ULL) 1488static void perf_event_stop(struct perf_event *event)
1489{
1490 if (!event->pmu->stop)
1491 return event->pmu->disable(event);
1350 1492
1351static void perf_log_throttle(struct perf_event *event, int enable); 1493 return event->pmu->stop(event);
1494}
1495
1496static int perf_event_start(struct perf_event *event)
1497{
1498 if (!event->pmu->start)
1499 return event->pmu->enable(event);
1352 1500
1353static void perf_adjust_period(struct perf_event *event, u64 events) 1501 return event->pmu->start(event);
1502}
1503
1504static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1354{ 1505{
1355 struct hw_perf_event *hwc = &event->hw; 1506 struct hw_perf_event *hwc = &event->hw;
1356 u64 period, sample_period; 1507 u64 period, sample_period;
1357 s64 delta; 1508 s64 delta;
1358 1509
1359 events *= hwc->sample_period; 1510 period = perf_calculate_period(event, nsec, count);
1360 period = div64_u64(events, event->attr.sample_freq);
1361 1511
1362 delta = (s64)(period - hwc->sample_period); 1512 delta = (s64)(period - hwc->sample_period);
1363 delta = (delta + 7) / 8; /* low pass filter */ 1513 delta = (delta + 7) / 8; /* low pass filter */
@@ -1368,13 +1518,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
1368 sample_period = 1; 1518 sample_period = 1;
1369 1519
1370 hwc->sample_period = sample_period; 1520 hwc->sample_period = sample_period;
1521
1522 if (atomic64_read(&hwc->period_left) > 8*sample_period) {
1523 perf_disable();
1524 perf_event_stop(event);
1525 atomic64_set(&hwc->period_left, 0);
1526 perf_event_start(event);
1527 perf_enable();
1528 }
1371} 1529}
1372 1530
1373static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1531static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1374{ 1532{
1375 struct perf_event *event; 1533 struct perf_event *event;
1376 struct hw_perf_event *hwc; 1534 struct hw_perf_event *hwc;
1377 u64 interrupts, freq; 1535 u64 interrupts, now;
1536 s64 delta;
1378 1537
1379 raw_spin_lock(&ctx->lock); 1538 raw_spin_lock(&ctx->lock);
1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1539 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1395,44 +1554,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1395 if (interrupts == MAX_INTERRUPTS) { 1554 if (interrupts == MAX_INTERRUPTS) {
1396 perf_log_throttle(event, 1); 1555 perf_log_throttle(event, 1);
1397 event->pmu->unthrottle(event); 1556 event->pmu->unthrottle(event);
1398 interrupts = 2*sysctl_perf_event_sample_rate/HZ;
1399 } 1557 }
1400 1558
1401 if (!event->attr.freq || !event->attr.sample_freq) 1559 if (!event->attr.freq || !event->attr.sample_freq)
1402 continue; 1560 continue;
1403 1561
1404 /* 1562 event->pmu->read(event);
1405 * if the specified freq < HZ then we need to skip ticks 1563 now = atomic64_read(&event->count);
1406 */ 1564 delta = now - hwc->freq_count_stamp;
1407 if (event->attr.sample_freq < HZ) { 1565 hwc->freq_count_stamp = now;
1408 freq = event->attr.sample_freq;
1409
1410 hwc->freq_count += freq;
1411 hwc->freq_interrupts += interrupts;
1412
1413 if (hwc->freq_count < HZ)
1414 continue;
1415
1416 interrupts = hwc->freq_interrupts;
1417 hwc->freq_interrupts = 0;
1418 hwc->freq_count -= HZ;
1419 } else
1420 freq = HZ;
1421
1422 perf_adjust_period(event, freq * interrupts);
1423 1566
1424 /* 1567 if (delta > 0)
1425 * In order to avoid being stalled by an (accidental) huge 1568 perf_adjust_period(event, TICK_NSEC, delta);
1426 * sample period, force reset the sample period if we didn't
1427 * get any events in this freq period.
1428 */
1429 if (!interrupts) {
1430 perf_disable();
1431 event->pmu->disable(event);
1432 atomic64_set(&hwc->period_left, 0);
1433 event->pmu->enable(event);
1434 perf_enable();
1435 }
1436 } 1569 }
1437 raw_spin_unlock(&ctx->lock); 1570 raw_spin_unlock(&ctx->lock);
1438} 1571}
@@ -1442,26 +1575,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1442 */ 1575 */
1443static void rotate_ctx(struct perf_event_context *ctx) 1576static void rotate_ctx(struct perf_event_context *ctx)
1444{ 1577{
1445 struct perf_event *event;
1446
1447 if (!ctx->nr_events) 1578 if (!ctx->nr_events)
1448 return; 1579 return;
1449 1580
1450 raw_spin_lock(&ctx->lock); 1581 raw_spin_lock(&ctx->lock);
1451 /* 1582
1452 * Rotate the first entry last (works just fine for group events too): 1583 /* Rotate the first entry last of non-pinned groups */
1453 */ 1584 list_rotate_left(&ctx->flexible_groups);
1454 perf_disable();
1455 list_for_each_entry(event, &ctx->group_list, group_entry) {
1456 list_move_tail(&event->group_entry, &ctx->group_list);
1457 break;
1458 }
1459 perf_enable();
1460 1585
1461 raw_spin_unlock(&ctx->lock); 1586 raw_spin_unlock(&ctx->lock);
1462} 1587}
1463 1588
1464void perf_event_task_tick(struct task_struct *curr, int cpu) 1589void perf_event_task_tick(struct task_struct *curr)
1465{ 1590{
1466 struct perf_cpu_context *cpuctx; 1591 struct perf_cpu_context *cpuctx;
1467 struct perf_event_context *ctx; 1592 struct perf_event_context *ctx;
@@ -1469,24 +1594,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
1469 if (!atomic_read(&nr_events)) 1594 if (!atomic_read(&nr_events))
1470 return; 1595 return;
1471 1596
1472 cpuctx = &per_cpu(perf_cpu_context, cpu); 1597 cpuctx = &__get_cpu_var(perf_cpu_context);
1473 ctx = curr->perf_event_ctxp; 1598 ctx = curr->perf_event_ctxp;
1474 1599
1600 perf_disable();
1601
1475 perf_ctx_adjust_freq(&cpuctx->ctx); 1602 perf_ctx_adjust_freq(&cpuctx->ctx);
1476 if (ctx) 1603 if (ctx)
1477 perf_ctx_adjust_freq(ctx); 1604 perf_ctx_adjust_freq(ctx);
1478 1605
1479 perf_event_cpu_sched_out(cpuctx); 1606 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1480 if (ctx) 1607 if (ctx)
1481 __perf_event_task_sched_out(ctx); 1608 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
1482 1609
1483 rotate_ctx(&cpuctx->ctx); 1610 rotate_ctx(&cpuctx->ctx);
1484 if (ctx) 1611 if (ctx)
1485 rotate_ctx(ctx); 1612 rotate_ctx(ctx);
1486 1613
1487 perf_event_cpu_sched_in(cpuctx, cpu); 1614 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1488 if (ctx) 1615 if (ctx)
1489 perf_event_task_sched_in(curr, cpu); 1616 task_ctx_sched_in(curr, EVENT_FLEXIBLE);
1617
1618 perf_enable();
1619}
1620
1621static int event_enable_on_exec(struct perf_event *event,
1622 struct perf_event_context *ctx)
1623{
1624 if (!event->attr.enable_on_exec)
1625 return 0;
1626
1627 event->attr.enable_on_exec = 0;
1628 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1629 return 0;
1630
1631 __perf_event_mark_enabled(event, ctx);
1632
1633 return 1;
1490} 1634}
1491 1635
1492/* 1636/*
@@ -1499,6 +1643,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1499 struct perf_event *event; 1643 struct perf_event *event;
1500 unsigned long flags; 1644 unsigned long flags;
1501 int enabled = 0; 1645 int enabled = 0;
1646 int ret;
1502 1647
1503 local_irq_save(flags); 1648 local_irq_save(flags);
1504 ctx = task->perf_event_ctxp; 1649 ctx = task->perf_event_ctxp;
@@ -1509,14 +1654,16 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1509 1654
1510 raw_spin_lock(&ctx->lock); 1655 raw_spin_lock(&ctx->lock);
1511 1656
1512 list_for_each_entry(event, &ctx->group_list, group_entry) { 1657 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1513 if (!event->attr.enable_on_exec) 1658 ret = event_enable_on_exec(event, ctx);
1514 continue; 1659 if (ret)
1515 event->attr.enable_on_exec = 0; 1660 enabled = 1;
1516 if (event->state >= PERF_EVENT_STATE_INACTIVE) 1661 }
1517 continue; 1662
1518 __perf_event_mark_enabled(event, ctx); 1663 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
1519 enabled = 1; 1664 ret = event_enable_on_exec(event, ctx);
1665 if (ret)
1666 enabled = 1;
1520 } 1667 }
1521 1668
1522 /* 1669 /*
@@ -1527,7 +1674,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1527 1674
1528 raw_spin_unlock(&ctx->lock); 1675 raw_spin_unlock(&ctx->lock);
1529 1676
1530 perf_event_task_sched_in(task, smp_processor_id()); 1677 perf_event_task_sched_in(task);
1531 out: 1678 out:
1532 local_irq_restore(flags); 1679 local_irq_restore(flags);
1533} 1680}
@@ -1590,7 +1737,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
1590{ 1737{
1591 raw_spin_lock_init(&ctx->lock); 1738 raw_spin_lock_init(&ctx->lock);
1592 mutex_init(&ctx->mutex); 1739 mutex_init(&ctx->mutex);
1593 INIT_LIST_HEAD(&ctx->group_list); 1740 INIT_LIST_HEAD(&ctx->pinned_groups);
1741 INIT_LIST_HEAD(&ctx->flexible_groups);
1594 INIT_LIST_HEAD(&ctx->event_list); 1742 INIT_LIST_HEAD(&ctx->event_list);
1595 atomic_set(&ctx->refcount, 1); 1743 atomic_set(&ctx->refcount, 1);
1596 ctx->task = task; 1744 ctx->task = task;
@@ -3608,7 +3756,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
3608 /* .tid */ 3756 /* .tid */
3609 .start = vma->vm_start, 3757 .start = vma->vm_start,
3610 .len = vma->vm_end - vma->vm_start, 3758 .len = vma->vm_end - vma->vm_start,
3611 .pgoff = vma->vm_pgoff, 3759 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT,
3612 }, 3760 },
3613 }; 3761 };
3614 3762
@@ -3688,12 +3836,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3688 3836
3689 if (event->attr.freq) { 3837 if (event->attr.freq) {
3690 u64 now = perf_clock(); 3838 u64 now = perf_clock();
3691 s64 delta = now - hwc->freq_stamp; 3839 s64 delta = now - hwc->freq_time_stamp;
3692 3840
3693 hwc->freq_stamp = now; 3841 hwc->freq_time_stamp = now;
3694 3842
3695 if (delta > 0 && delta < TICK_NSEC) 3843 if (delta > 0 && delta < 2*TICK_NSEC)
3696 perf_adjust_period(event, NSEC_PER_SEC / (int)delta); 3844 perf_adjust_period(event, delta, hwc->last_period);
3697 } 3845 }
3698 3846
3699 /* 3847 /*
@@ -4184,7 +4332,7 @@ static const struct pmu perf_ops_task_clock = {
4184 .read = task_clock_perf_event_read, 4332 .read = task_clock_perf_event_read,
4185}; 4333};
4186 4334
4187#ifdef CONFIG_EVENT_PROFILE 4335#ifdef CONFIG_EVENT_TRACING
4188 4336
4189void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4337void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4190 int entry_size) 4338 int entry_size)
@@ -4289,7 +4437,7 @@ static void perf_event_free_filter(struct perf_event *event)
4289{ 4437{
4290} 4438}
4291 4439
4292#endif /* CONFIG_EVENT_PROFILE */ 4440#endif /* CONFIG_EVENT_TRACING */
4293 4441
4294#ifdef CONFIG_HAVE_HW_BREAKPOINT 4442#ifdef CONFIG_HAVE_HW_BREAKPOINT
4295static void bp_perf_event_destroy(struct perf_event *event) 4443static void bp_perf_event_destroy(struct perf_event *event)
@@ -4870,8 +5018,15 @@ inherit_event(struct perf_event *parent_event,
4870 else 5018 else
4871 child_event->state = PERF_EVENT_STATE_OFF; 5019 child_event->state = PERF_EVENT_STATE_OFF;
4872 5020
4873 if (parent_event->attr.freq) 5021 if (parent_event->attr.freq) {
4874 child_event->hw.sample_period = parent_event->hw.sample_period; 5022 u64 sample_period = parent_event->hw.sample_period;
5023 struct hw_perf_event *hwc = &child_event->hw;
5024
5025 hwc->sample_period = sample_period;
5026 hwc->last_period = sample_period;
5027
5028 atomic64_set(&hwc->period_left, sample_period);
5029 }
4875 5030
4876 child_event->overflow_handler = parent_event->overflow_handler; 5031 child_event->overflow_handler = parent_event->overflow_handler;
4877 5032
@@ -5039,7 +5194,11 @@ void perf_event_exit_task(struct task_struct *child)
5039 mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); 5194 mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
5040 5195
5041again: 5196again:
5042 list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, 5197 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
5198 group_entry)
5199 __perf_event_exit_task(child_event, child_ctx, child);
5200
5201 list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
5043 group_entry) 5202 group_entry)
5044 __perf_event_exit_task(child_event, child_ctx, child); 5203 __perf_event_exit_task(child_event, child_ctx, child);
5045 5204
@@ -5048,7 +5207,8 @@ again:
5048 * its siblings to the list, but we obtained 'tmp' before that which 5207 * its siblings to the list, but we obtained 'tmp' before that which
5049 * will still point to the list head terminating the iteration. 5208 * will still point to the list head terminating the iteration.
5050 */ 5209 */
5051 if (!list_empty(&child_ctx->group_list)) 5210 if (!list_empty(&child_ctx->pinned_groups) ||
5211 !list_empty(&child_ctx->flexible_groups))
5052 goto again; 5212 goto again;
5053 5213
5054 mutex_unlock(&child_ctx->mutex); 5214 mutex_unlock(&child_ctx->mutex);
@@ -5056,6 +5216,24 @@ again:
5056 put_ctx(child_ctx); 5216 put_ctx(child_ctx);
5057} 5217}
5058 5218
5219static void perf_free_event(struct perf_event *event,
5220 struct perf_event_context *ctx)
5221{
5222 struct perf_event *parent = event->parent;
5223
5224 if (WARN_ON_ONCE(!parent))
5225 return;
5226
5227 mutex_lock(&parent->child_mutex);
5228 list_del_init(&event->child_list);
5229 mutex_unlock(&parent->child_mutex);
5230
5231 fput(parent->filp);
5232
5233 list_del_event(event, ctx);
5234 free_event(event);
5235}
5236
5059/* 5237/*
5060 * free an unexposed, unused context as created by inheritance by 5238 * free an unexposed, unused context as created by inheritance by
5061 * init_task below, used by fork() in case of fail. 5239 * init_task below, used by fork() in case of fail.
@@ -5070,36 +5248,70 @@ void perf_event_free_task(struct task_struct *task)
5070 5248
5071 mutex_lock(&ctx->mutex); 5249 mutex_lock(&ctx->mutex);
5072again: 5250again:
5073 list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { 5251 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5074 struct perf_event *parent = event->parent; 5252 perf_free_event(event, ctx);
5075 5253
5076 if (WARN_ON_ONCE(!parent)) 5254 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
5077 continue; 5255 group_entry)
5256 perf_free_event(event, ctx);
5078 5257
5079 mutex_lock(&parent->child_mutex); 5258 if (!list_empty(&ctx->pinned_groups) ||
5080 list_del_init(&event->child_list); 5259 !list_empty(&ctx->flexible_groups))
5081 mutex_unlock(&parent->child_mutex); 5260 goto again;
5082 5261
5083 fput(parent->filp); 5262 mutex_unlock(&ctx->mutex);
5084 5263
5085 list_del_event(event, ctx); 5264 put_ctx(ctx);
5086 free_event(event); 5265}
5266
5267static int
5268inherit_task_group(struct perf_event *event, struct task_struct *parent,
5269 struct perf_event_context *parent_ctx,
5270 struct task_struct *child,
5271 int *inherited_all)
5272{
5273 int ret;
5274 struct perf_event_context *child_ctx = child->perf_event_ctxp;
5275
5276 if (!event->attr.inherit) {
5277 *inherited_all = 0;
5278 return 0;
5087 } 5279 }
5088 5280
5089 if (!list_empty(&ctx->group_list)) 5281 if (!child_ctx) {
5090 goto again; 5282 /*
5283 * This is executed from the parent task context, so
5284 * inherit events that have been marked for cloning.
5285 * First allocate and initialize a context for the
5286 * child.
5287 */
5091 5288
5092 mutex_unlock(&ctx->mutex); 5289 child_ctx = kzalloc(sizeof(struct perf_event_context),
5290 GFP_KERNEL);
5291 if (!child_ctx)
5292 return -ENOMEM;
5093 5293
5094 put_ctx(ctx); 5294 __perf_event_init_context(child_ctx, child);
5295 child->perf_event_ctxp = child_ctx;
5296 get_task_struct(child);
5297 }
5298
5299 ret = inherit_group(event, parent, parent_ctx,
5300 child, child_ctx);
5301
5302 if (ret)
5303 *inherited_all = 0;
5304
5305 return ret;
5095} 5306}
5096 5307
5308
5097/* 5309/*
5098 * Initialize the perf_event context in task_struct 5310 * Initialize the perf_event context in task_struct
5099 */ 5311 */
5100int perf_event_init_task(struct task_struct *child) 5312int perf_event_init_task(struct task_struct *child)
5101{ 5313{
5102 struct perf_event_context *child_ctx = NULL, *parent_ctx; 5314 struct perf_event_context *child_ctx, *parent_ctx;
5103 struct perf_event_context *cloned_ctx; 5315 struct perf_event_context *cloned_ctx;
5104 struct perf_event *event; 5316 struct perf_event *event;
5105 struct task_struct *parent = current; 5317 struct task_struct *parent = current;
@@ -5137,41 +5349,22 @@ int perf_event_init_task(struct task_struct *child)
5137 * We dont have to disable NMIs - we are only looking at 5349 * We dont have to disable NMIs - we are only looking at
5138 * the list, not manipulating it: 5350 * the list, not manipulating it:
5139 */ 5351 */
5140 list_for_each_entry(event, &parent_ctx->group_list, group_entry) { 5352 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
5141 5353 ret = inherit_task_group(event, parent, parent_ctx, child,
5142 if (!event->attr.inherit) { 5354 &inherited_all);
5143 inherited_all = 0; 5355 if (ret)
5144 continue; 5356 break;
5145 } 5357 }
5146
5147 if (!child->perf_event_ctxp) {
5148 /*
5149 * This is executed from the parent task context, so
5150 * inherit events that have been marked for cloning.
5151 * First allocate and initialize a context for the
5152 * child.
5153 */
5154
5155 child_ctx = kzalloc(sizeof(struct perf_event_context),
5156 GFP_KERNEL);
5157 if (!child_ctx) {
5158 ret = -ENOMEM;
5159 break;
5160 }
5161
5162 __perf_event_init_context(child_ctx, child);
5163 child->perf_event_ctxp = child_ctx;
5164 get_task_struct(child);
5165 }
5166 5358
5167 ret = inherit_group(event, parent, parent_ctx, 5359 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
5168 child, child_ctx); 5360 ret = inherit_task_group(event, parent, parent_ctx, child,
5169 if (ret) { 5361 &inherited_all);
5170 inherited_all = 0; 5362 if (ret)
5171 break; 5363 break;
5172 }
5173 } 5364 }
5174 5365
5366 child_ctx = child->perf_event_ctxp;
5367
5175 if (child_ctx && inherited_all) { 5368 if (child_ctx && inherited_all) {
5176 /* 5369 /*
5177 * Mark the child context as a clone of the parent 5370 * Mark the child context as a clone of the parent
@@ -5220,7 +5413,9 @@ static void __perf_event_exit_cpu(void *info)
5220 struct perf_event_context *ctx = &cpuctx->ctx; 5413 struct perf_event_context *ctx = &cpuctx->ctx;
5221 struct perf_event *event, *tmp; 5414 struct perf_event *event, *tmp;
5222 5415
5223 list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) 5416 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5417 __perf_event_remove_from_context(event);
5418 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
5224 __perf_event_remove_from_context(event); 5419 __perf_event_remove_from_context(event);
5225} 5420}
5226static void perf_event_exit_cpu(int cpu) 5421static void perf_event_exit_cpu(int cpu)
@@ -5258,6 +5453,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5258 perf_event_exit_cpu(cpu); 5453 perf_event_exit_cpu(cpu);
5259 break; 5454 break;
5260 5455
5456 case CPU_DEAD:
5457 hw_perf_event_setup_offline(cpu);
5458 break;
5459
5261 default: 5460 default:
5262 break; 5461 break;
5263 } 5462 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 3218f5213717..9d163f83e5c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2799,7 +2799,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2799 */ 2799 */
2800 prev_state = prev->state; 2800 prev_state = prev->state;
2801 finish_arch_switch(prev); 2801 finish_arch_switch(prev);
2802 perf_event_task_sched_in(current, cpu_of(rq)); 2802#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2803 local_irq_disable();
2804#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
2805 perf_event_task_sched_in(current);
2806#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2807 local_irq_enable();
2808#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
2803 finish_lock_switch(rq, prev); 2809 finish_lock_switch(rq, prev);
2804 2810
2805 fire_sched_in_preempt_notifiers(current); 2811 fire_sched_in_preempt_notifiers(current);
@@ -5314,7 +5320,7 @@ void scheduler_tick(void)
5314 curr->sched_class->task_tick(rq, curr, 0); 5320 curr->sched_class->task_tick(rq, curr, 0);
5315 raw_spin_unlock(&rq->lock); 5321 raw_spin_unlock(&rq->lock);
5316 5322
5317 perf_event_task_tick(curr, cpu); 5323 perf_event_task_tick(curr);
5318 5324
5319#ifdef CONFIG_SMP 5325#ifdef CONFIG_SMP
5320 rq->idle_at_tick = idle_cpu(cpu); 5326 rq->idle_at_tick = idle_cpu(cpu);
@@ -5528,7 +5534,7 @@ need_resched_nonpreemptible:
5528 5534
5529 if (likely(prev != next)) { 5535 if (likely(prev != next)) {
5530 sched_info_switch(prev, next); 5536 sched_info_switch(prev, next);
5531 perf_event_task_sched_out(prev, next, cpu); 5537 perf_event_task_sched_out(prev, next);
5532 5538
5533 rq->nr_switches++; 5539 rq->nr_switches++;
5534 rq->curr = next; 5540 rq->curr = next;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 51obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
56endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 59obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d996353473fd..83783579378f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
28#include <linux/ctype.h> 27#include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 897 } \
899 } 898 }
900 899
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 900static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 901{
933 rec->freelist = ftrace_free_records; 902 rec->freelist = ftrace_free_records;
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 994}
1026 995
1027 996
997/* Return 1 if the address range is reserved for ftrace */
998int ftrace_text_reserved(void *start, void *end)
999{
1000 struct dyn_ftrace *rec;
1001 struct ftrace_page *pg;
1002
1003 do_for_each_ftrace_rec(pg, rec) {
1004 if (rec->ip <= (unsigned long)end &&
1005 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1006 return 1;
1007 } while_for_each_ftrace_rec();
1008 return 0;
1009}
1010
1011
1028static int 1012static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1013__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1014{
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1060 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1061 continue;
1078 1062
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1063 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1064 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1065 rec->flags |= FTRACE_FL_FAILED;
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573242cf..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h>
9#include "trace.h" 10#include "trace.h"
10 11
11 12
12char *perf_trace_buf; 13static char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf); 14static char *perf_trace_buf_nmi;
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17 15
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19 17
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
120 } 118 }
121 mutex_unlock(&event_mutex); 119 mutex_unlock(&event_mutex);
122} 120}
121
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags)
124{
125 struct trace_entry *entry;
126 char *trace_buf, *raw_data;
127 int pc, cpu;
128
129 pc = preempt_count();
130
131 /* Protect the per cpu buffer, begin the rcu read side */
132 local_irq_save(*irq_flags);
133
134 *rctxp = perf_swevent_get_recursion_context();
135 if (*rctxp < 0)
136 goto err_recursion;
137
138 cpu = smp_processor_id();
139
140 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi);
142 else
143 trace_buf = rcu_dereference(perf_trace_buf);
144
145 if (!trace_buf)
146 goto err;
147
148 raw_data = per_cpu_ptr(trace_buf, cpu);
149
150 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
152
153 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc);
155 entry->type = type;
156
157 return raw_data;
158err:
159 perf_swevent_put_recursion_context(*rctxp);
160err_recursion:
161 local_irq_restore(*irq_flags);
162 return NULL;
163}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9aad69f..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1371,7 +1371,7 @@ out_unlock:
1371 return err; 1371 return err;
1372} 1372}
1373 1373
1374#ifdef CONFIG_EVENT_PROFILE 1374#ifdef CONFIG_PERF_EVENTS
1375 1375
1376void ftrace_profile_free_filter(struct perf_event *event) 1376void ftrace_profile_free_filter(struct perf_event *event)
1377{ 1377{
@@ -1439,5 +1439,5 @@ out_unlock:
1439 return err; 1439 return err;
1440} 1440}
1441 1441
1442#endif /* CONFIG_EVENT_PROFILE */ 1442#endif /* CONFIG_PERF_EVENTS */
1443 1443
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 465b36bef4ca..505c92273b1a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
91 return retval; 91 return retval;
92} 92}
93 93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy) 95 void *dummy)
101{ 96{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{ 226{
232 int ret = -EINVAL; 227 int ret = -EINVAL;
233 228
234 if (ff->func == fetch_argument) 229 if (ff->func == fetch_register) {
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name; 230 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data)); 231 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name); 232 ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
489 } 482 }
490 } else 483 } else
491 ret = -EINVAL; 484 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else 485 } else
501 ret = -EINVAL; 486 ret = -EINVAL;
502 return ret; 487 return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 596 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 597 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args: 598 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value 599 * $retval : fetch return value
616 * $stack : fetch stack address 600 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-) 601 * $stackN : fetch Nth of stack (N:0-)
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = {
958}; 942};
959 943
960/* Kprobe handler */ 944/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{ 946{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry; 948 struct kprobe_trace_entry *entry;
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
979 irq_flags, pc); 963 irq_flags, pc);
980 if (!event) 964 if (!event)
981 return 0; 965 return;
982 966
983 entry = ring_buffer_event_data(event); 967 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args; 968 entry->nargs = tp->nr_args;
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
988 972
989 if (!filter_current_check_discard(buffer, call, entry, event)) 973 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992} 975}
993 976
994/* Kretprobe handler */ 977/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 978static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs) 979 struct pt_regs *regs)
997{ 980{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
1012 irq_flags, pc); 995 irq_flags, pc);
1013 if (!event) 996 if (!event)
1014 return 0; 997 return;
1015 998
1016 entry = ring_buffer_event_data(event); 999 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args; 1000 entry->nargs = tp->nr_args;
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1022 1005
1023 if (!filter_current_check_discard(buffer, call, entry, event)) 1006 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027} 1008}
1028 1009
1029/* Event entry printers */ 1010/* Event entry printers */
@@ -1230,137 +1211,67 @@ static int set_print_fmt(struct trace_probe *tp)
1230 return 0; 1211 return 0;
1231} 1212}
1232 1213
1233#ifdef CONFIG_EVENT_PROFILE 1214#ifdef CONFIG_PERF_EVENTS
1234 1215
1235/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1236static __kprobes int kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_profile_func(struct kprobe *kp,
1237 struct pt_regs *regs) 1218 struct pt_regs *regs)
1238{ 1219{
1239 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1240 struct ftrace_event_call *call = &tp->call; 1221 struct ftrace_event_call *call = &tp->call;
1241 struct kprobe_trace_entry *entry; 1222 struct kprobe_trace_entry *entry;
1242 struct trace_entry *ent; 1223 int size, __size, i;
1243 int size, __size, i, pc, __cpu;
1244 unsigned long irq_flags; 1224 unsigned long irq_flags;
1245 char *trace_buf;
1246 char *raw_data;
1247 int rctx; 1225 int rctx;
1248 1226
1249 pc = preempt_count();
1250 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1251 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1252 size -= sizeof(u32); 1229 size -= sizeof(u32);
1253 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1254 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1255 return 0; 1232 return;
1256
1257 /*
1258 * Protect the non nmi buffer
1259 * This also protects the rcu read side
1260 */
1261 local_irq_save(irq_flags);
1262 1233
1263 rctx = perf_swevent_get_recursion_context(); 1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1264 if (rctx < 0) 1235 if (!entry)
1265 goto end_recursion; 1236 return;
1266
1267 __cpu = smp_processor_id();
1268
1269 if (in_nmi())
1270 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1271 else
1272 trace_buf = rcu_dereference(perf_trace_buf);
1273
1274 if (!trace_buf)
1275 goto end;
1276
1277 raw_data = per_cpu_ptr(trace_buf, __cpu);
1278
1279 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1280 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1281 entry = (struct kprobe_trace_entry *)raw_data;
1282 ent = &entry->ent;
1283 1237
1284 tracing_generic_entry_update(ent, irq_flags, pc);
1285 ent->type = call->id;
1286 entry->nargs = tp->nr_args; 1238 entry->nargs = tp->nr_args;
1287 entry->ip = (unsigned long)kp->addr; 1239 entry->ip = (unsigned long)kp->addr;
1288 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1289 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1290 perf_tp_event(call->id, entry->ip, 1, entry, size);
1291 1242
1292end: 1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
1293 perf_swevent_put_recursion_context(rctx);
1294end_recursion:
1295 local_irq_restore(irq_flags);
1296
1297 return 0;
1298} 1244}
1299 1245
1300/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1301static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1302 struct pt_regs *regs) 1248 struct pt_regs *regs)
1303{ 1249{
1304 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1305 struct ftrace_event_call *call = &tp->call; 1251 struct ftrace_event_call *call = &tp->call;
1306 struct kretprobe_trace_entry *entry; 1252 struct kretprobe_trace_entry *entry;
1307 struct trace_entry *ent; 1253 int size, __size, i;
1308 int size, __size, i, pc, __cpu;
1309 unsigned long irq_flags; 1254 unsigned long irq_flags;
1310 char *trace_buf;
1311 char *raw_data;
1312 int rctx; 1255 int rctx;
1313 1256
1314 pc = preempt_count();
1315 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1316 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1317 size -= sizeof(u32); 1259 size -= sizeof(u32);
1318 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1319 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1320 return 0; 1262 return;
1321
1322 /*
1323 * Protect the non nmi buffer
1324 * This also protects the rcu read side
1325 */
1326 local_irq_save(irq_flags);
1327
1328 rctx = perf_swevent_get_recursion_context();
1329 if (rctx < 0)
1330 goto end_recursion;
1331
1332 __cpu = smp_processor_id();
1333
1334 if (in_nmi())
1335 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1336 else
1337 trace_buf = rcu_dereference(perf_trace_buf);
1338
1339 if (!trace_buf)
1340 goto end;
1341
1342 raw_data = per_cpu_ptr(trace_buf, __cpu);
1343 1263
1344 /* Zero dead bytes from alignment to avoid buffer leak to userspace */ 1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
1345 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 1265 if (!entry)
1346 entry = (struct kretprobe_trace_entry *)raw_data; 1266 return;
1347 ent = &entry->ent;
1348 1267
1349 tracing_generic_entry_update(ent, irq_flags, pc);
1350 ent->type = call->id;
1351 entry->nargs = tp->nr_args; 1268 entry->nargs = tp->nr_args;
1352 entry->func = (unsigned long)tp->rp.kp.addr; 1269 entry->func = (unsigned long)tp->rp.kp.addr;
1353 entry->ret_ip = (unsigned long)ri->ret_addr; 1270 entry->ret_ip = (unsigned long)ri->ret_addr;
1354 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1355 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1356 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1357
1358end:
1359 perf_swevent_put_recursion_context(rctx);
1360end_recursion:
1361 local_irq_restore(irq_flags);
1362 1273
1363 return 0; 1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
1364} 1275}
1365 1276
1366static int probe_profile_enable(struct ftrace_event_call *call) 1277static int probe_profile_enable(struct ftrace_event_call *call)
@@ -1388,7 +1299,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1388 disable_kprobe(&tp->rp.kp); 1299 disable_kprobe(&tp->rp.kp);
1389 } 1300 }
1390} 1301}
1391#endif /* CONFIG_EVENT_PROFILE */ 1302#endif /* CONFIG_PERF_EVENTS */
1392 1303
1393 1304
1394static __kprobes 1305static __kprobes
@@ -1398,10 +1309,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1398 1309
1399 if (tp->flags & TP_FLAG_TRACE) 1310 if (tp->flags & TP_FLAG_TRACE)
1400 kprobe_trace_func(kp, regs); 1311 kprobe_trace_func(kp, regs);
1401#ifdef CONFIG_EVENT_PROFILE 1312#ifdef CONFIG_PERF_EVENTS
1402 if (tp->flags & TP_FLAG_PROFILE) 1313 if (tp->flags & TP_FLAG_PROFILE)
1403 kprobe_profile_func(kp, regs); 1314 kprobe_profile_func(kp, regs);
1404#endif /* CONFIG_EVENT_PROFILE */ 1315#endif
1405 return 0; /* We don't tweek kernel, so just return 0 */ 1316 return 0; /* We don't tweek kernel, so just return 0 */
1406} 1317}
1407 1318
@@ -1412,10 +1323,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1412 1323
1413 if (tp->flags & TP_FLAG_TRACE) 1324 if (tp->flags & TP_FLAG_TRACE)
1414 kretprobe_trace_func(ri, regs); 1325 kretprobe_trace_func(ri, regs);
1415#ifdef CONFIG_EVENT_PROFILE 1326#ifdef CONFIG_PERF_EVENTS
1416 if (tp->flags & TP_FLAG_PROFILE) 1327 if (tp->flags & TP_FLAG_PROFILE)
1417 kretprobe_profile_func(ri, regs); 1328 kretprobe_profile_func(ri, regs);
1418#endif /* CONFIG_EVENT_PROFILE */ 1329#endif
1419 return 0; /* We don't tweek kernel, so just return 0 */ 1330 return 0; /* We don't tweek kernel, so just return 0 */
1420} 1331}
1421 1332
@@ -1446,7 +1357,7 @@ static int register_probe_event(struct trace_probe *tp)
1446 call->regfunc = probe_event_enable; 1357 call->regfunc = probe_event_enable;
1447 call->unregfunc = probe_event_disable; 1358 call->unregfunc = probe_event_disable;
1448 1359
1449#ifdef CONFIG_EVENT_PROFILE 1360#ifdef CONFIG_PERF_EVENTS
1450 call->profile_enable = probe_profile_enable; 1361 call->profile_enable = probe_profile_enable;
1451 call->profile_disable = probe_profile_disable; 1362 call->profile_disable = probe_profile_disable;
1452#endif 1363#endif
@@ -1507,28 +1418,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1507 1418
1508static __init int kprobe_trace_self_tests_init(void) 1419static __init int kprobe_trace_self_tests_init(void)
1509{ 1420{
1510 int ret; 1421 int ret, warn = 0;
1511 int (*target)(int, int, int, int, int, int); 1422 int (*target)(int, int, int, int, int, int);
1423 struct trace_probe *tp;
1512 1424
1513 target = kprobe_trace_selftest_target; 1425 target = kprobe_trace_selftest_target;
1514 1426
1515 pr_info("Testing kprobe tracing: "); 1427 pr_info("Testing kprobe tracing: ");
1516 1428
1517 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1429 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1518 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1430 "$stack $stack0 +0($stack)");
1519 if (WARN_ON_ONCE(ret)) 1431 if (WARN_ON_ONCE(ret)) {
1520 pr_warning("error enabling function entry\n"); 1432 pr_warning("error on probing function entry.\n");
1433 warn++;
1434 } else {
1435 /* Enable trace point */
1436 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1437 if (WARN_ON_ONCE(tp == NULL)) {
1438 pr_warning("error on getting new probe.\n");
1439 warn++;
1440 } else
1441 probe_event_enable(&tp->call);
1442 }
1521 1443
1522 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1444 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1523 "$retval"); 1445 "$retval");
1524 if (WARN_ON_ONCE(ret)) 1446 if (WARN_ON_ONCE(ret)) {
1525 pr_warning("error enabling function return\n"); 1447 pr_warning("error on probing function return.\n");
1448 warn++;
1449 } else {
1450 /* Enable trace point */
1451 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1452 if (WARN_ON_ONCE(tp == NULL)) {
1453 pr_warning("error on getting new probe.\n");
1454 warn++;
1455 } else
1456 probe_event_enable(&tp->call);
1457 }
1458
1459 if (warn)
1460 goto end;
1526 1461
1527 ret = target(1, 2, 3, 4, 5, 6); 1462 ret = target(1, 2, 3, 4, 5, 6);
1528 1463
1529 cleanup_all_probes(); 1464 ret = command_trace_probe("-:testprobe");
1465 if (WARN_ON_ONCE(ret)) {
1466 pr_warning("error on deleting a probe.\n");
1467 warn++;
1468 }
1469
1470 ret = command_trace_probe("-:testprobe2");
1471 if (WARN_ON_ONCE(ret)) {
1472 pr_warning("error on deleting a probe.\n");
1473 warn++;
1474 }
1530 1475
1531 pr_cont("OK\n"); 1476end:
1477 cleanup_all_probes();
1478 if (warn)
1479 pr_cont("NG: Some tests are failed. Please check them.\n");
1480 else
1481 pr_cont("OK\n");
1532 return 0; 1482 return 0;
1533} 1483}
1534 1484
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a1834dda85f4..cba47d7935cc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -426,7 +426,7 @@ int __init init_ftrace_syscalls(void)
426} 426}
427core_initcall(init_ftrace_syscalls); 427core_initcall(init_ftrace_syscalls);
428 428
429#ifdef CONFIG_EVENT_PROFILE 429#ifdef CONFIG_PERF_EVENTS
430 430
431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -438,12 +438,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
438 struct syscall_metadata *sys_data; 438 struct syscall_metadata *sys_data;
439 struct syscall_trace_enter *rec; 439 struct syscall_trace_enter *rec;
440 unsigned long flags; 440 unsigned long flags;
441 char *trace_buf;
442 char *raw_data;
443 int syscall_nr; 441 int syscall_nr;
444 int rctx; 442 int rctx;
445 int size; 443 int size;
446 int cpu;
447 444
448 syscall_nr = syscall_get_nr(current, regs); 445 syscall_nr = syscall_get_nr(current, regs);
449 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -462,37 +459,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
462 "profile buffer not large enough")) 459 "profile buffer not large enough"))
463 return; 460 return;
464 461
465 /* Protect the per cpu buffer, begin the rcu read side */ 462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
466 local_irq_save(flags); 463 sys_data->enter_event->id, &rctx, &flags);
467 464 if (!rec)
468 rctx = perf_swevent_get_recursion_context(); 465 return;
469 if (rctx < 0)
470 goto end_recursion;
471
472 cpu = smp_processor_id();
473
474 trace_buf = rcu_dereference(perf_trace_buf);
475
476 if (!trace_buf)
477 goto end;
478
479 raw_data = per_cpu_ptr(trace_buf, cpu);
480
481 /* zero the dead bytes from align to not leak stack to user */
482 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
483 466
484 rec = (struct syscall_trace_enter *) raw_data;
485 tracing_generic_entry_update(&rec->ent, 0, 0);
486 rec->ent.type = sys_data->enter_event->id;
487 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
488 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
489 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
490 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); 470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
491
492end:
493 perf_swevent_put_recursion_context(rctx);
494end_recursion:
495 local_irq_restore(flags);
496} 471}
497 472
498int prof_sysenter_enable(struct ftrace_event_call *call) 473int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -536,11 +511,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
536 struct syscall_trace_exit *rec; 511 struct syscall_trace_exit *rec;
537 unsigned long flags; 512 unsigned long flags;
538 int syscall_nr; 513 int syscall_nr;
539 char *trace_buf;
540 char *raw_data;
541 int rctx; 514 int rctx;
542 int size; 515 int size;
543 int cpu;
544 516
545 syscall_nr = syscall_get_nr(current, regs); 517 syscall_nr = syscall_get_nr(current, regs);
546 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -562,38 +534,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
562 "exit event has grown above profile buffer size")) 534 "exit event has grown above profile buffer size"))
563 return; 535 return;
564 536
565 /* Protect the per cpu buffer, begin the rcu read side */ 537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
566 local_irq_save(flags); 538 sys_data->exit_event->id, &rctx, &flags);
567 539 if (!rec)
568 rctx = perf_swevent_get_recursion_context(); 540 return;
569 if (rctx < 0)
570 goto end_recursion;
571
572 cpu = smp_processor_id();
573
574 trace_buf = rcu_dereference(perf_trace_buf);
575
576 if (!trace_buf)
577 goto end;
578
579 raw_data = per_cpu_ptr(trace_buf, cpu);
580
581 /* zero the dead bytes from align to not leak stack to user */
582 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
583
584 rec = (struct syscall_trace_exit *)raw_data;
585 541
586 tracing_generic_entry_update(&rec->ent, 0, 0);
587 rec->ent.type = sys_data->exit_event->id;
588 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
589 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
590 544
591 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
592
593end:
594 perf_swevent_put_recursion_context(rctx);
595end_recursion:
596 local_irq_restore(flags);
597} 546}
598 547
599int prof_sysexit_enable(struct ftrace_event_call *call) 548int prof_sysexit_enable(struct ftrace_event_call *call)
@@ -631,6 +580,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
631 mutex_unlock(&syscall_trace_lock); 580 mutex_unlock(&syscall_trace_lock);
632} 581}
633 582
634#endif 583#endif /* CONFIG_PERF_EVENTS */
635
636 584
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 124760bb37b5..e1d60d780784 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -14,6 +14,7 @@ perf*.html
14common-cmds.h 14common-cmds.h
15perf.data 15perf.data
16perf.data.old 16perf.data.old
17perf-archive
17tags 18tags
18TAGS 19TAGS
19cscope* 20cscope*
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
new file mode 100644
index 000000000000..fae174dc7d01
--- /dev/null
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
1perf-archive(1)
2===============
3
4NAME
5----
6perf-archive - Create archive with object files with build-ids found in perf.data file
7
8SYNOPSIS
9--------
10[verse]
11'perf archive' [file]
12
13DESCRIPTION
14-----------
15This command runs runs perf-buildid-list --with-hits, and collects the files
16with the buildids found so that analisys of perf.data contents can be possible
17on another machine.
18
19
20SEE ALSO
21--------
22linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
new file mode 100644
index 000000000000..88bc3b519746
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,33 @@
1perf-buildid-cache(1)
2=====================
3
4NAME
5----
6perf-buildid-cache - Manage build-id cache.
7
8SYNOPSIS
9--------
10[verse]
11'perf buildid-list <options>'
12
13DESCRIPTION
14-----------
15This command manages the build-id cache. It can add and remove files to the
16cache. In the future it should as well purge older entries, set upper limits
17for the space used by the cache, etc.
18
19OPTIONS
20-------
21-a::
22--add=::
23 Add specified file to the cache.
24-r::
25--remove=::
26 Remove specified file to the cache.
27-v::
28--verbose::
29 Be more verbose.
30
31SEE ALSO
32--------
33linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391b4bc8..2de34075f6a4 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@ or
15'perf probe' [options] --del='[GROUP:]EVENT' [...] 15'perf probe' [options] --del='[GROUP:]EVENT' [...]
16or 16or
17'perf probe' --list 17'perf probe' --list
18or
19'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
18 20
19DESCRIPTION 21DESCRIPTION
20----------- 22-----------
@@ -45,6 +47,11 @@ OPTIONS
45--list:: 47--list::
46 List up current probe events. 48 List up current probe events.
47 49
50-L::
51--line=::
52 Show source code lines which can be probed. This needs an argument
53 which specifies a range of the source code.
54
48PROBE SYNTAX 55PROBE SYNTAX
49------------ 56------------
50Probe points are defined by following syntax. 57Probe points are defined by following syntax.
@@ -56,6 +63,19 @@ Probe points are defined by following syntax.
56It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number. 63It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
57'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). 64'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
58 65
66LINE SYNTAX
67-----------
68Line range is descripted by following syntax.
69
70 "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
71
72FUNC specifies the function name of showing lines. 'RLN' is the start line
73number from function entry line, and 'RLN2' is the end line number. As same as
74probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
75and 'ALN2' is end line number in the file. It is also possible to specify how
76many lines to show by using 'NUM'.
77So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
78
59SEE ALSO 79SEE ALSO
60-------- 80--------
61linkperf:perf-trace[1], linkperf:perf-record[1] 81linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 4a7d558dc309..785b9fc32a46 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -74,7 +74,7 @@ OPTIONS
74 74
75-s <symbol>:: 75-s <symbol>::
76--sym-annotate=<symbol>:: 76--sym-annotate=<symbol>::
77 Annotate this symbol. Requires -k option. 77 Annotate this symbol.
78 78
79-v:: 79-v::
80--verbose:: 80--verbose::
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
index c5f55f439091..d729cee8d987 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -8,7 +8,7 @@ perf-trace-perl - Process trace data with a Perl script
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [lang]:script[.ext] ] 11'perf trace' [-s [Perl]:script[.pl] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-trace-python.txt
new file mode 100644
index 000000000000..a241aca77184
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace-python.txt
@@ -0,0 +1,625 @@
1perf-trace-python(1)
2==================
3
4NAME
5----
6perf-trace-python - Process trace data with a Python script
7
8SYNOPSIS
9--------
10[verse]
11'perf trace' [-s [Python]:script[.py] ]
12
13DESCRIPTION
14-----------
15
16This perf trace option is used to process perf trace data using perf's
17built-in Python interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given
19Python script, if any.
20
21A QUICK EXAMPLE
22---------------
23
24This section shows the process, start to finish, of creating a working
25Python script that aggregates and extracts useful information from a
26raw perf trace stream. You can avoid reading the rest of this
27document if an example is enough for you; the rest of the document
28provides more details on each step and lists the library functions
29available to script writers.
30
31This example actually details the steps that were used to create the
32'syscall-counts' script you see when you list the available perf trace
33scripts via 'perf trace -l'. As such, this script also shows how to
34integrate your script into the list of general-purpose 'perf trace'
35scripts listed by that command.
36
37The syscall-counts script is a simple script, but demonstrates all the
38basic ideas necessary to create a useful script. Here's an example
39of its output (syscall names are not yet supported, they will appear
40as numbers):
41
42----
43syscall events:
44
45event count
46---------------------------------------- -----------
47sys_write 455067
48sys_getdents 4072
49sys_close 3037
50sys_swapoff 1769
51sys_read 923
52sys_sched_setparam 826
53sys_open 331
54sys_newfstat 326
55sys_mmap 217
56sys_munmap 216
57sys_futex 141
58sys_select 102
59sys_poll 84
60sys_setitimer 12
61sys_writev 8
6215 8
63sys_lseek 7
64sys_rt_sigprocmask 6
65sys_wait4 3
66sys_ioctl 3
67sys_set_robust_list 1
68sys_exit 1
6956 1
70sys_access 1
71----
72
73Basically our task is to keep a per-syscall tally that gets updated
74every time a system call occurs in the system. Our script will do
75that, but first we need to record the data that will be processed by
76that script. Theoretically, there are a couple of ways we could do
77that:
78
79- we could enable every event under the tracing/events/syscalls
80 directory, but this is over 600 syscalls, well beyond the number
81 allowable by perf. These individual syscall events will however be
82 useful if we want to later use the guidance we get from the
83 general-purpose scripts to drill down and get more detail about
84 individual syscalls of interest.
85
86- we can enable the sys_enter and/or sys_exit syscalls found under
87 tracing/events/raw_syscalls. These are called for all syscalls; the
88 'id' field can be used to distinguish between individual syscall
89 numbers.
90
91For this script, we only need to know that a syscall was entered; we
92don't care how it exited, so we'll use 'perf record' to record only
93the sys_enter events:
94
95----
96# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
97
98^C[ perf record: Woken up 1 times to write data ]
99[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
100----
101
102The options basically say to collect data for every syscall event
103system-wide and multiplex the per-cpu output into a single stream.
104That single stream will be recorded in a file in the current directory
105called perf.data.
106
107Once we have a perf.data file containing our data, we can use the -g
108'perf trace' option to generate a Python script that will contain a
109callback handler for each event type found in the perf.data trace
110stream (for more details, see the STARTER SCRIPTS section).
111
112----
113# perf trace -g python
114generated Python script: perf-trace.py
115
116The output file created also in the current directory is named
117perf-trace.py. Here's the file in its entirety:
118
119# perf trace event handlers, generated by perf trace -g python
120# Licensed under the terms of the GNU GPL License version 2
121
122# The common_* event handler fields are the most useful fields common to
123# all events. They don't necessarily correspond to the 'common_*' fields
124# in the format files. Those fields not available as handler params can
125# be retrieved using Python functions of the form common_*(context).
126# See the perf-trace-python Documentation for the list of available functions.
127
128import os
129import sys
130
131sys.path.append(os.environ['PERF_EXEC_PATH'] + \
132 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
133
134from perf_trace_context import *
135from Core import *
136
137def trace_begin():
138 print "in trace_begin"
139
140def trace_end():
141 print "in trace_end"
142
143def raw_syscalls__sys_enter(event_name, context, common_cpu,
144 common_secs, common_nsecs, common_pid, common_comm,
145 id, args):
146 print_header(event_name, common_cpu, common_secs, common_nsecs,
147 common_pid, common_comm)
148
149 print "id=%d, args=%s\n" % \
150 (id, args),
151
152def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
153 common_pid, common_comm):
154 print_header(event_name, common_cpu, common_secs, common_nsecs,
155 common_pid, common_comm)
156
157def print_header(event_name, cpu, secs, nsecs, pid, comm):
158 print "%-20s %5u %05u.%09u %8u %-20s " % \
159 (event_name, cpu, secs, nsecs, pid, comm),
160----
161
162At the top is a comment block followed by some import statements and a
163path append which every perf trace script should include.
164
165Following that are a couple generated functions, trace_begin() and
166trace_end(), which are called at the beginning and the end of the
167script respectively (for more details, see the SCRIPT_LAYOUT section
168below).
169
170Following those are the 'event handler' functions generated one for
171every event in the 'perf record' output. The handler functions take
172the form subsystem__event_name, and contain named parameters, one for
173each field in the event; in this case, there's only one event,
174raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for
175more info on event handlers).
176
177The final couple of functions are, like the begin and end functions,
178generated for every script. The first, trace_unhandled(), is called
179every time the script finds an event in the perf.data file that
180doesn't correspond to any event handler in the script. This could
181mean either that the record step recorded event types that it wasn't
182really interested in, or the script was run against a trace file that
183doesn't correspond to the script.
184
185The script generated by -g option option simply prints a line for each
186event found in the trace stream i.e. it basically just dumps the event
187and its parameter values to stdout. The print_header() function is
188simply a utility function used for that purpose. Let's rename the
189script and run it to see the default output:
190
191----
192# mv perf-trace.py syscall-counts.py
193# perf trace -s syscall-counts.py
194
195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
197raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args=
198raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args=
199raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args=
200raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args=
201raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args=
202raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args=
203.
204.
205.
206----
207
208Of course, for this script, we're not interested in printing every
209trace event, but rather aggregating it in a useful way. So we'll get
210rid of everything to do with printing as well as the trace_begin() and
211trace_unhandled() functions, which we won't be using. That leaves us
212with this minimalistic skeleton:
213
214----
215import os
216import sys
217
218sys.path.append(os.environ['PERF_EXEC_PATH'] + \
219 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
220
221from perf_trace_context import *
222from Core import *
223
224def trace_end():
225 print "in trace_end"
226
227def raw_syscalls__sys_enter(event_name, context, common_cpu,
228 common_secs, common_nsecs, common_pid, common_comm,
229 id, args):
230----
231
232In trace_end(), we'll simply print the results, but first we need to
233generate some results to print. To do that we need to have our
234sys_enter() handler do the necessary tallying until all events have
235been counted. A hash table indexed by syscall id is a good way to
236store that information; every time the sys_enter() handler is called,
237we simply increment a count associated with that hash entry indexed by
238that syscall id:
239
240----
241 syscalls = autodict()
242
243 try:
244 syscalls[id] += 1
245 except TypeError:
246 syscalls[id] = 1
247----
248
249The syscalls 'autodict' object is a special kind of Python dictionary
250(implemented in Core.py) that implements Perl's 'autovivifying' hashes
251in Python i.e. with autovivifying hashes, you can assign nested hash
252values without having to go to the trouble of creating intermediate
253levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
254the intermediate hash levels and finally assign the value 1 to the
255hash entry for 'id' (because the value being assigned isn't a hash
256object itself, the initial value is assigned in the TypeError
257exception. Well, there may be a better way to do this in Python but
258that's what works for now).
259
260Putting that code into the raw_syscalls__sys_enter() handler, we
261effectively end up with a single-level dictionary keyed on syscall id
262and having the counts we've tallied as values.
263
264The print_syscall_totals() function iterates over the entries in the
265dictionary and displays a line for each entry containing the syscall
266name (the dictonary keys contain the syscall ids, which are passed to
267the Util function syscall_name(), which translates the raw syscall
268numbers to the corresponding syscall name strings). The output is
269displayed after all the events in the trace have been processed, by
270calling the print_syscall_totals() function from the trace_end()
271handler called at the end of script processing.
272
273The final script producing the output shown above is shown in its
274entirety below (syscall_name() helper is not yet available, you can
275only deal with id's for now):
276
277----
278import os
279import sys
280
281sys.path.append(os.environ['PERF_EXEC_PATH'] + \
282 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
283
284from perf_trace_context import *
285from Core import *
286from Util import *
287
288syscalls = autodict()
289
290def trace_end():
291 print_syscall_totals()
292
293def raw_syscalls__sys_enter(event_name, context, common_cpu,
294 common_secs, common_nsecs, common_pid, common_comm,
295 id, args):
296 try:
297 syscalls[id] += 1
298 except TypeError:
299 syscalls[id] = 1
300
301def print_syscall_totals():
302 if for_comm is not None:
303 print "\nsyscall events for %s:\n\n" % (for_comm),
304 else:
305 print "\nsyscall events:\n\n",
306
307 print "%-40s %10s\n" % ("event", "count"),
308 print "%-40s %10s\n" % ("----------------------------------------", \
309 "-----------"),
310
311 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
312 reverse = True):
313 print "%-40s %10d\n" % (syscall_name(id), val),
314----
315
316The script can be run just as before:
317
318 # perf trace -s syscall-counts.py
319
320So those are the essential steps in writing and running a script. The
321process can be generalized to any tracepoint or set of tracepoints
322you're interested in - basically find the tracepoint(s) you're
323interested in by looking at the list of available events shown by
324'perf list' and/or look in /sys/kernel/debug/tracing events for
325detailed event and field info, record the corresponding trace data
326using 'perf record', passing it the list of interesting events,
327generate a skeleton script using 'perf trace -g python' and modify the
328code to aggregate and display it for your particular needs.
329
330After you've done that you may end up with a general-purpose script
331that you want to keep around and have available for future use. By
332writing a couple of very simple shell scripts and putting them in the
333right place, you can have your script listed alongside the other
334scripts listed by the 'perf trace -l' command e.g.:
335
336----
337root@tropicana:~# perf trace -l
338List of available trace scripts:
339 workqueue-stats workqueue stats (ins/exe/create/destroy)
340 wakeup-latency system-wide min/max/avg wakeup latency
341 rw-by-file <comm> r/w activity for a program, by file
342 rw-by-pid system-wide r/w activity
343----
344
345A nice side effect of doing this is that you also then capture the
346probably lengthy 'perf record' command needed to record the events for
347the script.
348
349To have the script appear as a 'built-in' script, you write two simple
350scripts, one for recording and one for 'reporting'.
351
352The 'record' script is a shell script with the same base name as your
353script, but with -record appended. The shell script should be put
354into the perf/scripts/python/bin directory in the kernel source tree.
355In that script, you write the 'perf record' command-line needed for
356your script:
357
358----
359# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
360
361#!/bin/bash
362perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
363----
364
365The 'report' script is also a shell script with the same base name as
366your script, but with -report appended. It should also be located in
367the perf/scripts/python/bin directory. In that script, you write the
368'perf trace -s' command-line needed for running your script:
369
370----
371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
372
373#!/bin/bash
374# description: system-wide syscall counts
375perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
376----
377
378Note that the location of the Python script given in the shell script
379is in the libexec/perf-core/scripts/python directory - this is where
380the script will be copied by 'make install' when you install perf.
381For the installation to install your script there, your script needs
382to be located in the perf/scripts/python directory in the kernel
383source tree:
384
385----
386# ls -al kernel-source/tools/perf/scripts/python
387
388root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
389total 32
390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
396----
397
398Once you've done that (don't forget to do a new 'make install',
399otherwise your script won't show up at run-time), 'perf trace -l'
400should show a new entry for your script:
401
402----
403root@tropicana:~# perf trace -l
404List of available trace scripts:
405 workqueue-stats workqueue stats (ins/exe/create/destroy)
406 wakeup-latency system-wide min/max/avg wakeup latency
407 rw-by-file <comm> r/w activity for a program, by file
408 rw-by-pid system-wide r/w activity
409 syscall-counts system-wide syscall counts
410----
411
412You can now perform the record step via 'perf trace record':
413
414 # perf trace record syscall-counts
415
416and display the output using 'perf trace report':
417
418 # perf trace report syscall-counts
419
420STARTER SCRIPTS
421---------------
422
423You can quickly get started writing a script for a particular set of
424trace data by generating a skeleton script using 'perf trace -g
425python' in the same directory as an existing perf.data trace file.
426That will generate a starter script containing a handler for each of
427the event types in the trace file; it simply prints every available
428field for each event in the trace file.
429
430You can also look at the existing scripts in
431~/libexec/perf-core/scripts/python for typical examples showing how to
432do basic things like aggregate event data, print results, etc. Also,
433the check-perf-trace.py script, while not interesting for its results,
434attempts to exercise all of the main scripting features.
435
436EVENT HANDLERS
437--------------
438
439When perf trace is invoked using a trace script, a user-defined
440'handler function' is called for each event in the trace. If there's
441no handler function defined for a given event type, the event is
442ignored (or passed to a 'trace_handled' function, see below) and the
443next event is processed.
444
445Most of the event's field values are passed as arguments to the
446handler function; some of the less common ones aren't - those are
447available as calls back into the perf executable (see below).
448
449As an example, the following perf record command can be used to record
450all sched_wakeup events in the system:
451
452 # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
453
454Traces meant to be processed using a script should be recorded with
455the above options: -c 1 says to sample every event, -a to enable
456system-wide collection, -M to multiplex the output, and -R to collect
457raw samples.
458
459The format file for the sched_wakep event defines the following fields
460(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
461
462----
463 format:
464 field:unsigned short common_type;
465 field:unsigned char common_flags;
466 field:unsigned char common_preempt_count;
467 field:int common_pid;
468 field:int common_lock_depth;
469
470 field:char comm[TASK_COMM_LEN];
471 field:pid_t pid;
472 field:int prio;
473 field:int success;
474 field:int target_cpu;
475----
476
477The handler function for this event would be defined as:
478
479----
480def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
481 common_nsecs, common_pid, common_comm,
482 comm, pid, prio, success, target_cpu):
483 pass
484----
485
486The handler function takes the form subsystem__event_name.
487
488The common_* arguments in the handler's argument list are the set of
489arguments passed to all event handlers; some of the fields correspond
490to the common_* fields in the format file, but some are synthesized,
491and some of the common_* fields aren't common enough to to be passed
492to every event as arguments but are available as library functions.
493
494Here's a brief description of each of the invariant event args:
495
496 event_name the name of the event as text
497 context an opaque 'cookie' used in calls back into perf
498 common_cpu the cpu the event occurred on
499 common_secs the secs portion of the event timestamp
500 common_nsecs the nsecs portion of the event timestamp
501 common_pid the pid of the current task
502 common_comm the name of the current process
503
504All of the remaining fields in the event's format file have
505counterparts as handler function arguments of the same name, as can be
506seen in the example above.
507
508The above provides the basics needed to directly access every field of
509every event in a trace, which covers 90% of what you need to know to
510write a useful trace script. The sections below cover the rest.
511
512SCRIPT LAYOUT
513-------------
514
515Every perf trace Python script should start by setting up a Python
516module search path and 'import'ing a few support modules (see module
517descriptions below):
518
519----
520 import os
521 import sys
522
523 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
524 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
525
526 from perf_trace_context import *
527 from Core import *
528----
529
530The rest of the script can contain handler functions and support
531functions in any order.
532
533Aside from the event handler functions discussed above, every script
534can implement a set of optional functions:
535
536*trace_begin*, if defined, is called before any event is processed and
537gives scripts a chance to do setup tasks:
538
539----
540def trace_begin:
541 pass
542----
543
544*trace_end*, if defined, is called after all events have been
545 processed and gives scripts a chance to do end-of-script tasks, such
546 as display results:
547
548----
549def trace_end:
550 pass
551----
552
553*trace_unhandled*, if defined, is called after for any event that
554 doesn't have a handler explicitly defined for it. The standard set
555 of common arguments are passed into it:
556
557----
558def trace_unhandled(event_name, context, common_cpu, common_secs,
559 common_nsecs, common_pid, common_comm):
560 pass
561----
562
563The remaining sections provide descriptions of each of the available
564built-in perf trace Python modules and their associated functions.
565
566AVAILABLE MODULES AND FUNCTIONS
567-------------------------------
568
569The following sections describe the functions and variables available
570via the various perf trace Python modules. To use the functions and
571variables from the given module, add the corresponding 'from XXXX
572import' line to your perf trace script.
573
574Core.py Module
575~~~~~~~~~~~~~~
576
577These functions provide some essential functions to user scripts.
578
579The *flag_str* and *symbol_str* functions provide human-readable
580strings for flag and symbolic fields. These correspond to the strings
581and values parsed from the 'print fmt' fields of the event format
582files:
583
584 flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name
585 symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name
586
587The *autodict* function returns a special special kind of Python
588dictionary that implements Perl's 'autovivifying' hashes in Python
589i.e. with autovivifying hashes, you can assign nested hash values
590without having to go to the trouble of creating intermediate levels if
591they don't exist.
592
593 autodict() - returns an autovivifying dictionary instance
594
595
596perf_trace_context Module
597~~~~~~~~~~~~~~~~~~~~~~~~~
598
599Some of the 'common' fields in the event format file aren't all that
600common, but need to be made accessible to user scripts nonetheless.
601
602perf_trace_context defines a set of functions that can be used to
603access this data in the context of the current event. Each of these
604functions expects a context variable, which is the same as the
605context variable passed into every event handler as the second
606argument.
607
608 common_pc(context) - returns common_preempt count for the current event
609 common_flags(context) - returns common_flags for the current event
610 common_lock_depth(context) - returns common_lock_depth for the current event
611
612Util.py Module
613~~~~~~~~~~~~~~
614
615Various utility functions for use with perf trace:
616
617 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
618 nsecs_secs(nsecs) - returns whole secs portion given nsecs
619 nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
620 nsecs_str(nsecs) - returns printable string in the form secs.nsecs
621 avg(total, n) - returns average given a sum and a total number of values
622
623SEE ALSO
624--------
625linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 60e5900da483..8879299cd9df 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -19,6 +19,11 @@ There are several variants of perf trace:
19 'perf trace' to see a detailed trace of the workload that was 19 'perf trace' to see a detailed trace of the workload that was
20 recorded. 20 recorded.
21 21
22 You can also run a set of pre-canned scripts that aggregate and
23 summarize the raw trace data in various ways (the list of scripts is
24 available via 'perf trace -l'). The following variants allow you to
25 record and run those scripts:
26
22 'perf trace record <script>' to record the events required for 'perf 27 'perf trace record <script>' to record the events required for 'perf
23 trace report'. <script> is the name displayed in the output of 28 trace report'. <script> is the name displayed in the output of
24 'perf trace --list' i.e. the actual script name minus any language 29 'perf trace --list' i.e. the actual script name minus any language
@@ -31,6 +36,9 @@ There are several variants of perf trace:
31 record <script>' is used and should be present for this command to 36 record <script>' is used and should be present for this command to
32 succeed. 37 succeed.
33 38
39 See the 'SEE ALSO' section for links to language-specific
40 information on how to write and run your own trace scripts.
41
34OPTIONS 42OPTIONS
35------- 43-------
36-D:: 44-D::
@@ -45,9 +53,11 @@ OPTIONS
45--list=:: 53--list=::
46 Display a list of available trace scripts. 54 Display a list of available trace scripts.
47 55
48-s:: 56-s ['lang']::
49--script=:: 57--script=::
50 Process trace data with the given script ([lang]:script[.ext]). 58 Process trace data with the given script ([lang]:script[.ext]).
59 If the string 'lang' is specified in place of a script name, a
60 list of supported languages will be displayed instead.
51 61
52-g:: 62-g::
53--gen-script=:: 63--gen-script=::
@@ -56,4 +66,5 @@ OPTIONS
56 66
57SEE ALSO 67SEE ALSO
58-------- 68--------
59linkperf:perf-record[1], linkperf:perf-trace-perl[1] 69linkperf:perf-record[1], linkperf:perf-trace-perl[1],
70linkperf:perf-trace-python[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 69c832557199..0eeb247dc7d2 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15Performance counters for Linux are are a new kernel-based subsystem 15Performance counters for Linux are a new kernel-based subsystem
16that provide a framework for all things performance analysis. It 16that provide a framework for all things performance analysis. It
17covers hardware level (CPU/PMU, Performance Monitoring Unit) features 17covers hardware level (CPU/PMU, Performance Monitoring Unit) features
18and software features (software counters, tracepoints) as well. 18and software features (software counters, tracepoints) as well.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2e7fa3a06806..54a5b50ff312 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -286,11 +286,7 @@ SCRIPT_PERL =
286SCRIPT_SH = 286SCRIPT_SH =
287TEST_PROGRAMS = 287TEST_PROGRAMS =
288 288
289# 289SCRIPT_SH += perf-archive.sh
290# No scripts right now:
291#
292
293# SCRIPT_SH += perf-am.sh
294 290
295# 291#
296# No Perl scripts right now: 292# No Perl scripts right now:
@@ -315,9 +311,6 @@ PROGRAMS += perf
315# List built-in command $C whose implementation cmd_$C() is not in 311# List built-in command $C whose implementation cmd_$C() is not in
316# builtin-$C.o but is linked in as part of some other command. 312# builtin-$C.o but is linked in as part of some other command.
317# 313#
318# None right now:
319#
320# BUILT_INS += perf-init $X
321 314
322# what 'all' will build and 'install' will install, in perfexecdir 315# what 'all' will build and 'install' will install, in perfexecdir
323ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) 316ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
@@ -340,6 +333,7 @@ LIB_FILE=libperf.a
340LIB_H += ../../include/linux/perf_event.h 333LIB_H += ../../include/linux/perf_event.h
341LIB_H += ../../include/linux/rbtree.h 334LIB_H += ../../include/linux/rbtree.h
342LIB_H += ../../include/linux/list.h 335LIB_H += ../../include/linux/list.h
336LIB_H += ../../include/linux/hash.h
343LIB_H += ../../include/linux/stringify.h 337LIB_H += ../../include/linux/stringify.h
344LIB_H += util/include/linux/bitmap.h 338LIB_H += util/include/linux/bitmap.h
345LIB_H += util/include/linux/bitops.h 339LIB_H += util/include/linux/bitops.h
@@ -363,12 +357,14 @@ LIB_H += util/include/asm/uaccess.h
363LIB_H += perf.h 357LIB_H += perf.h
364LIB_H += util/cache.h 358LIB_H += util/cache.h
365LIB_H += util/callchain.h 359LIB_H += util/callchain.h
360LIB_H += util/build-id.h
366LIB_H += util/debug.h 361LIB_H += util/debug.h
367LIB_H += util/debugfs.h 362LIB_H += util/debugfs.h
368LIB_H += util/event.h 363LIB_H += util/event.h
369LIB_H += util/exec_cmd.h 364LIB_H += util/exec_cmd.h
370LIB_H += util/types.h 365LIB_H += util/types.h
371LIB_H += util/levenshtein.h 366LIB_H += util/levenshtein.h
367LIB_H += util/map.h
372LIB_H += util/parse-options.h 368LIB_H += util/parse-options.h
373LIB_H += util/parse-events.h 369LIB_H += util/parse-events.h
374LIB_H += util/quote.h 370LIB_H += util/quote.h
@@ -389,12 +385,12 @@ LIB_H += util/sort.h
389LIB_H += util/hist.h 385LIB_H += util/hist.h
390LIB_H += util/thread.h 386LIB_H += util/thread.h
391LIB_H += util/trace-event.h 387LIB_H += util/trace-event.h
392LIB_H += util/trace-event-perl.h
393LIB_H += util/probe-finder.h 388LIB_H += util/probe-finder.h
394LIB_H += util/probe-event.h 389LIB_H += util/probe-event.h
395 390
396LIB_OBJS += util/abspath.o 391LIB_OBJS += util/abspath.o
397LIB_OBJS += util/alias.o 392LIB_OBJS += util/alias.o
393LIB_OBJS += util/build-id.o
398LIB_OBJS += util/config.o 394LIB_OBJS += util/config.o
399LIB_OBJS += util/ctype.o 395LIB_OBJS += util/ctype.o
400LIB_OBJS += util/debugfs.o 396LIB_OBJS += util/debugfs.o
@@ -431,12 +427,12 @@ LIB_OBJS += util/thread.o
431LIB_OBJS += util/trace-event-parse.o 427LIB_OBJS += util/trace-event-parse.o
432LIB_OBJS += util/trace-event-read.o 428LIB_OBJS += util/trace-event-read.o
433LIB_OBJS += util/trace-event-info.o 429LIB_OBJS += util/trace-event-info.o
434LIB_OBJS += util/trace-event-perl.o 430LIB_OBJS += util/trace-event-scripting.o
435LIB_OBJS += util/svghelper.o 431LIB_OBJS += util/svghelper.o
436LIB_OBJS += util/sort.o 432LIB_OBJS += util/sort.o
437LIB_OBJS += util/hist.o 433LIB_OBJS += util/hist.o
438LIB_OBJS += util/data_map.o
439LIB_OBJS += util/probe-event.o 434LIB_OBJS += util/probe-event.o
435LIB_OBJS += util/util.o
440 436
441BUILTIN_OBJS += builtin-annotate.o 437BUILTIN_OBJS += builtin-annotate.o
442 438
@@ -451,6 +447,7 @@ BUILTIN_OBJS += builtin-diff.o
451BUILTIN_OBJS += builtin-help.o 447BUILTIN_OBJS += builtin-help.o
452BUILTIN_OBJS += builtin-sched.o 448BUILTIN_OBJS += builtin-sched.o
453BUILTIN_OBJS += builtin-buildid-list.o 449BUILTIN_OBJS += builtin-buildid-list.o
450BUILTIN_OBJS += builtin-buildid-cache.o
454BUILTIN_OBJS += builtin-list.o 451BUILTIN_OBJS += builtin-list.o
455BUILTIN_OBJS += builtin-record.o 452BUILTIN_OBJS += builtin-record.o
456BUILTIN_OBJS += builtin-report.o 453BUILTIN_OBJS += builtin-report.o
@@ -460,6 +457,7 @@ BUILTIN_OBJS += builtin-top.o
460BUILTIN_OBJS += builtin-trace.o 457BUILTIN_OBJS += builtin-trace.o
461BUILTIN_OBJS += builtin-probe.o 458BUILTIN_OBJS += builtin-probe.o
462BUILTIN_OBJS += builtin-kmem.o 459BUILTIN_OBJS += builtin-kmem.o
460BUILTIN_OBJS += builtin-lock.o
463 461
464PERFLIBS = $(LIB_FILE) 462PERFLIBS = $(LIB_FILE)
465 463
@@ -520,9 +518,23 @@ ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; e
520 BASIC_CFLAGS += -DNO_LIBPERL 518 BASIC_CFLAGS += -DNO_LIBPERL
521else 519else
522 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 520 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
521 LIB_OBJS += util/scripting-engines/trace-event-perl.o
523 LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o 522 LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
524endif 523endif
525 524
525ifndef NO_LIBPYTHON
526PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
527PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
528endif
529
530ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o /dev/null $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
531 BASIC_CFLAGS += -DNO_LIBPYTHON
532else
533 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
534 LIB_OBJS += util/scripting-engines/trace-event-python.o
535 LIB_OBJS += scripts/python/Perf-Trace-Util/Context.o
536endif
537
526ifdef NO_DEMANGLE 538ifdef NO_DEMANGLE
527 BASIC_CFLAGS += -DNO_DEMANGLE 539 BASIC_CFLAGS += -DNO_DEMANGLE
528else 540else
@@ -894,12 +906,18 @@ util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
894util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS 906util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
895 $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 907 $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
896 908
897util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS 909util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c PERF-CFLAGS
898 $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< 910 $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
899 911
900scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS 912scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
901 $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 913 $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
902 914
915util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c PERF-CFLAGS
916 $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-python.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
917
918scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c PERF-CFLAGS
919 $(QUIET_CC)$(CC) -o scripts/python/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
920
903perf-%$X: %.o $(PERFLIBS) 921perf-%$X: %.o $(PERFLIBS)
904 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 922 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
905 923
@@ -1009,9 +1027,16 @@ install: all
1009 $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' 1027 $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
1010 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 1028 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1011 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 1029 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
1030 $(INSTALL) perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1012 $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 1031 $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1013 $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' 1032 $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
1014 $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 1033 $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
1034 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
1035 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
1036 $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
1037 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
1038 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
1039
1015ifdef BUILT_INS 1040ifdef BUILT_INS
1016 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 1041 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1017 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 1042 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 593ff25006de..5ec5de995872 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -53,32 +53,20 @@ struct sym_priv {
53 53
54static const char *sym_hist_filter; 54static const char *sym_hist_filter;
55 55
56static int symbol_filter(struct map *map __used, struct symbol *sym) 56static int sym__alloc_hist(struct symbol *self)
57{ 57{
58 if (sym_hist_filter == NULL || 58 struct sym_priv *priv = symbol__priv(self);
59 strcmp(sym->name, sym_hist_filter) == 0) { 59 const int size = (sizeof(*priv->hist) +
60 struct sym_priv *priv = symbol__priv(sym); 60 (self->end - self->start) * sizeof(u64));
61 const int size = (sizeof(*priv->hist) +
62 (sym->end - sym->start) * sizeof(u64));
63 61
64 priv->hist = malloc(size); 62 priv->hist = zalloc(size);
65 if (priv->hist) 63 return priv->hist == NULL ? -1 : 0;
66 memset(priv->hist, 0, size);
67 return 0;
68 }
69 /*
70 * FIXME: We should really filter it out, as we don't want to go thru symbols
71 * we're not interested, and if a DSO ends up with no symbols, delete it too,
72 * but right now the kernel loading routines in symbol.c bail out if no symbols
73 * are found, fix it later.
74 */
75 return 0;
76} 64}
77 65
78/* 66/*
79 * collect histogram counts 67 * collect histogram counts
80 */ 68 */
81static void hist_hit(struct hist_entry *he, u64 ip) 69static int annotate__hist_hit(struct hist_entry *he, u64 ip)
82{ 70{
83 unsigned int sym_size, offset; 71 unsigned int sym_size, offset;
84 struct symbol *sym = he->sym; 72 struct symbol *sym = he->sym;
@@ -88,83 +76,127 @@ static void hist_hit(struct hist_entry *he, u64 ip)
88 he->count++; 76 he->count++;
89 77
90 if (!sym || !he->map) 78 if (!sym || !he->map)
91 return; 79 return 0;
92 80
93 priv = symbol__priv(sym); 81 priv = symbol__priv(sym);
94 if (!priv->hist) 82 if (priv->hist == NULL && sym__alloc_hist(sym) < 0)
95 return; 83 return -ENOMEM;
96 84
97 sym_size = sym->end - sym->start; 85 sym_size = sym->end - sym->start;
98 offset = ip - sym->start; 86 offset = ip - sym->start;
99 87
100 if (verbose) 88 pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip));
101 fprintf(stderr, "%s: ip=%Lx\n", __func__,
102 he->map->unmap_ip(he->map, ip));
103 89
104 if (offset >= sym_size) 90 if (offset >= sym_size)
105 return; 91 return 0;
106 92
107 h = priv->hist; 93 h = priv->hist;
108 h->sum++; 94 h->sum++;
109 h->ip[offset]++; 95 h->ip[offset]++;
110 96
111 if (verbose >= 3) 97 pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start,
112 printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", 98 he->sym->name, ip, ip - he->sym->start, h->ip[offset]);
113 (void *)(unsigned long)he->sym->start, 99 return 0;
114 he->sym->name,
115 (void *)(unsigned long)ip, ip - he->sym->start,
116 h->ip[offset]);
117} 100}
118 101
119static int perf_session__add_hist_entry(struct perf_session *self, 102static int perf_session__add_hist_entry(struct perf_session *self,
120 struct addr_location *al, u64 count) 103 struct addr_location *al, u64 count)
121{ 104{
122 bool hit; 105 bool hit;
123 struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL, 106 struct hist_entry *he;
124 count, &hit); 107
108 if (sym_hist_filter != NULL &&
109 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
110 /* We're only interested in a symbol named sym_hist_filter */
111 if (al->sym != NULL) {
112 rb_erase(&al->sym->rb_node,
113 &al->map->dso->symbols[al->map->type]);
114 symbol__delete(al->sym);
115 }
116 return 0;
117 }
118
119 he = __perf_session__add_hist_entry(self, al, NULL, count, &hit);
125 if (he == NULL) 120 if (he == NULL)
126 return -ENOMEM; 121 return -ENOMEM;
127 hist_hit(he, al->addr); 122
128 return 0; 123 return annotate__hist_hit(he, al->addr);
129} 124}
130 125
131static int process_sample_event(event_t *event, struct perf_session *session) 126static int process_sample_event(event_t *event, struct perf_session *session)
132{ 127{
133 struct addr_location al; 128 struct addr_location al;
134 129
135 dump_printf("(IP, %d): %d: %p\n", event->header.misc, 130 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
136 event->ip.pid, (void *)(long)event->ip.ip); 131 event->ip.pid, event->ip.ip);
137 132
138 if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) { 133 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
139 fprintf(stderr, "problem processing %d event, skipping it.\n", 134 pr_warning("problem processing %d event, skipping it.\n",
140 event->header.type); 135 event->header.type);
141 return -1; 136 return -1;
142 } 137 }
143 138
144 if (!al.filtered && perf_session__add_hist_entry(session, &al, 1)) { 139 if (!al.filtered && perf_session__add_hist_entry(session, &al, 1)) {
145 fprintf(stderr, "problem incrementing symbol count, " 140 pr_warning("problem incrementing symbol count, "
146 "skipping event\n"); 141 "skipping event\n");
147 return -1; 142 return -1;
148 } 143 }
149 144
150 return 0; 145 return 0;
151} 146}
152 147
153static int parse_line(FILE *file, struct hist_entry *he, u64 len) 148struct objdump_line {
149 struct list_head node;
150 s64 offset;
151 char *line;
152};
153
154static struct objdump_line *objdump_line__new(s64 offset, char *line)
155{
156 struct objdump_line *self = malloc(sizeof(*self));
157
158 if (self != NULL) {
159 self->offset = offset;
160 self->line = line;
161 }
162
163 return self;
164}
165
166static void objdump_line__free(struct objdump_line *self)
167{
168 free(self->line);
169 free(self);
170}
171
172static void objdump__add_line(struct list_head *head, struct objdump_line *line)
173{
174 list_add_tail(&line->node, head);
175}
176
177static struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
178 struct objdump_line *pos)
179{
180 list_for_each_entry_continue(pos, head, node)
181 if (pos->offset >= 0)
182 return pos;
183
184 return NULL;
185}
186
187static int parse_line(FILE *file, struct hist_entry *he,
188 struct list_head *head)
154{ 189{
155 struct symbol *sym = he->sym; 190 struct symbol *sym = he->sym;
191 struct objdump_line *objdump_line;
156 char *line = NULL, *tmp, *tmp2; 192 char *line = NULL, *tmp, *tmp2;
157 static const char *prev_line;
158 static const char *prev_color;
159 unsigned int offset;
160 size_t line_len; 193 size_t line_len;
161 u64 start; 194 s64 line_ip, offset = -1;
162 s64 line_ip;
163 int ret;
164 char *c; 195 char *c;
165 196
166 if (getline(&line, &line_len, file) < 0) 197 if (getline(&line, &line_len, file) < 0)
167 return -1; 198 return -1;
199
168 if (!line) 200 if (!line)
169 return -1; 201 return -1;
170 202
@@ -173,8 +205,6 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
173 *c = 0; 205 *c = 0;
174 206
175 line_ip = -1; 207 line_ip = -1;
176 offset = 0;
177 ret = -2;
178 208
179 /* 209 /*
180 * Strip leading spaces: 210 * Strip leading spaces:
@@ -195,9 +225,30 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
195 line_ip = -1; 225 line_ip = -1;
196 } 226 }
197 227
198 start = he->map->unmap_ip(he->map, sym->start);
199
200 if (line_ip != -1) { 228 if (line_ip != -1) {
229 u64 start = map__rip_2objdump(he->map, sym->start);
230 offset = line_ip - start;
231 }
232
233 objdump_line = objdump_line__new(offset, line);
234 if (objdump_line == NULL) {
235 free(line);
236 return -1;
237 }
238 objdump__add_line(head, objdump_line);
239
240 return 0;
241}
242
243static int objdump_line__print(struct objdump_line *self,
244 struct list_head *head,
245 struct hist_entry *he, u64 len)
246{
247 struct symbol *sym = he->sym;
248 static const char *prev_line;
249 static const char *prev_color;
250
251 if (self->offset != -1) {
201 const char *path = NULL; 252 const char *path = NULL;
202 unsigned int hits = 0; 253 unsigned int hits = 0;
203 double percent = 0.0; 254 double percent = 0.0;
@@ -205,15 +256,22 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
205 struct sym_priv *priv = symbol__priv(sym); 256 struct sym_priv *priv = symbol__priv(sym);
206 struct sym_ext *sym_ext = priv->ext; 257 struct sym_ext *sym_ext = priv->ext;
207 struct sym_hist *h = priv->hist; 258 struct sym_hist *h = priv->hist;
259 s64 offset = self->offset;
260 struct objdump_line *next = objdump__get_next_ip_line(head, self);
261
262 while (offset < (s64)len &&
263 (next == NULL || offset < next->offset)) {
264 if (sym_ext) {
265 if (path == NULL)
266 path = sym_ext[offset].path;
267 percent += sym_ext[offset].percent;
268 } else
269 hits += h->ip[offset];
270
271 ++offset;
272 }
208 273
209 offset = line_ip - start; 274 if (sym_ext == NULL && h->sum)
210 if (offset < len)
211 hits = h->ip[offset];
212
213 if (offset < len && sym_ext) {
214 path = sym_ext[offset].path;
215 percent = sym_ext[offset].percent;
216 } else if (h->sum)
217 percent = 100.0 * hits / h->sum; 275 percent = 100.0 * hits / h->sum;
218 276
219 color = get_percent_color(percent); 277 color = get_percent_color(percent);
@@ -234,12 +292,12 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len)
234 292
235 color_fprintf(stdout, color, " %7.2f", percent); 293 color_fprintf(stdout, color, " %7.2f", percent);
236 printf(" : "); 294 printf(" : ");
237 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line); 295 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
238 } else { 296 } else {
239 if (!*line) 297 if (!*self->line)
240 printf(" :\n"); 298 printf(" :\n");
241 else 299 else
242 printf(" : %s\n", line); 300 printf(" : %s\n", self->line);
243 } 301 }
244 302
245 return 0; 303 return 0;
@@ -365,6 +423,20 @@ static void print_summary(const char *filename)
365 } 423 }
366} 424}
367 425
426static void hist_entry__print_hits(struct hist_entry *self)
427{
428 struct symbol *sym = self->sym;
429 struct sym_priv *priv = symbol__priv(sym);
430 struct sym_hist *h = priv->hist;
431 u64 len = sym->end - sym->start, offset;
432
433 for (offset = 0; offset < len; ++offset)
434 if (h->ip[offset] != 0)
435 printf("%*Lx: %Lu\n", BITS_PER_LONG / 2,
436 sym->start + offset, h->ip[offset]);
437 printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum);
438}
439
368static void annotate_sym(struct hist_entry *he) 440static void annotate_sym(struct hist_entry *he)
369{ 441{
370 struct map *map = he->map; 442 struct map *map = he->map;
@@ -374,15 +446,15 @@ static void annotate_sym(struct hist_entry *he)
374 u64 len; 446 u64 len;
375 char command[PATH_MAX*2]; 447 char command[PATH_MAX*2];
376 FILE *file; 448 FILE *file;
449 LIST_HEAD(head);
450 struct objdump_line *pos, *n;
377 451
378 if (!filename) 452 if (!filename)
379 return; 453 return;
380 454
381 if (verbose) 455 pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__,
382 fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n", 456 filename, sym->name, map->unmap_ip(map, sym->start),
383 __func__, filename, sym->name, 457 map->unmap_ip(map, sym->end));
384 map->unmap_ip(map, sym->start),
385 map->unmap_ip(map, sym->end));
386 458
387 if (full_paths) 459 if (full_paths)
388 d_filename = filename; 460 d_filename = filename;
@@ -405,7 +477,8 @@ static void annotate_sym(struct hist_entry *he)
405 dso, dso->long_name, sym, sym->name); 477 dso, dso->long_name, sym, sym->name);
406 478
407 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", 479 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
408 map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end), 480 map__rip_2objdump(map, sym->start),
481 map__rip_2objdump(map, sym->end),
409 filename, filename); 482 filename, filename);
410 483
411 if (verbose >= 3) 484 if (verbose >= 3)
@@ -416,11 +489,21 @@ static void annotate_sym(struct hist_entry *he)
416 return; 489 return;
417 490
418 while (!feof(file)) { 491 while (!feof(file)) {
419 if (parse_line(file, he, len) < 0) 492 if (parse_line(file, he, &head) < 0)
420 break; 493 break;
421 } 494 }
422 495
423 pclose(file); 496 pclose(file);
497
498 if (verbose)
499 hist_entry__print_hits(he);
500
501 list_for_each_entry_safe(pos, n, &head, node) {
502 objdump_line__print(pos, &head, he, len);
503 list_del(&pos->node);
504 objdump_line__free(pos);
505 }
506
424 if (print_line) 507 if (print_line)
425 free_source_line(he, len); 508 free_source_line(he, len);
426} 509}
@@ -451,10 +534,10 @@ static void perf_session__find_annotations(struct perf_session *self)
451} 534}
452 535
453static struct perf_event_ops event_ops = { 536static struct perf_event_ops event_ops = {
454 .process_sample_event = process_sample_event, 537 .sample = process_sample_event,
455 .process_mmap_event = event__process_mmap, 538 .mmap = event__process_mmap,
456 .process_comm_event = event__process_comm, 539 .comm = event__process_comm,
457 .process_fork_event = event__process_task, 540 .fork = event__process_task,
458}; 541};
459 542
460static int __cmd_annotate(void) 543static int __cmd_annotate(void)
@@ -542,9 +625,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
542 setup_pager(); 625 setup_pager();
543 626
544 if (field_sep && *field_sep == '.') { 627 if (field_sep && *field_sep == '.') {
545 fputs("'.' is the only non valid --field-separator argument\n", 628 pr_err("'.' is the only non valid --field-separator argument\n");
546 stderr); 629 return -1;
547 exit(129);
548 } 630 }
549 631
550 return __cmd_annotate(); 632 return __cmd_annotate();
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
new file mode 100644
index 000000000000..30a05f552c96
--- /dev/null
+++ b/tools/perf/builtin-buildid-cache.c
@@ -0,0 +1,133 @@
1/*
2 * builtin-buildid-cache.c
3 *
4 * Builtin buildid-cache command: Manages build-id cache
5 *
6 * Copyright (C) 2010, Red Hat Inc.
7 * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
8 */
9#include "builtin.h"
10#include "perf.h"
11#include "util/cache.h"
12#include "util/debug.h"
13#include "util/header.h"
14#include "util/parse-options.h"
15#include "util/strlist.h"
16#include "util/symbol.h"
17
18static char const *add_name_list_str, *remove_name_list_str;
19
20static const char * const buildid_cache_usage[] = {
21 "perf buildid-cache [<options>]",
22 NULL
23};
24
25static const struct option buildid_cache_options[] = {
26 OPT_STRING('a', "add", &add_name_list_str,
27 "file list", "file(s) to add"),
28 OPT_STRING('r', "remove", &remove_name_list_str, "file list",
29 "file(s) to remove"),
30 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose"),
31 OPT_END()
32};
33
34static int build_id_cache__add_file(const char *filename, const char *debugdir)
35{
36 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
37 u8 build_id[BUILD_ID_SIZE];
38 int err;
39
40 if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
41 pr_debug("Couldn't read a build-id in %s\n", filename);
42 return -1;
43 }
44
45 build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
46 err = build_id_cache__add_s(sbuild_id, debugdir, filename, false);
47 if (verbose)
48 pr_info("Adding %s %s: %s\n", sbuild_id, filename,
49 err ? "FAIL" : "Ok");
50 return err;
51}
52
53static int build_id_cache__remove_file(const char *filename __used,
54 const char *debugdir __used)
55{
56 u8 build_id[BUILD_ID_SIZE];
57 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
58
59 int err;
60
61 if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
62 pr_debug("Couldn't read a build-id in %s\n", filename);
63 return -1;
64 }
65
66 build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
67 err = build_id_cache__remove_s(sbuild_id, debugdir);
68 if (verbose)
69 pr_info("Removing %s %s: %s\n", sbuild_id, filename,
70 err ? "FAIL" : "Ok");
71
72 return err;
73}
74
75static int __cmd_buildid_cache(void)
76{
77 struct strlist *list;
78 struct str_node *pos;
79 char debugdir[PATH_MAX];
80
81 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
82 DEBUG_CACHE_DIR);
83
84 if (add_name_list_str) {
85 list = strlist__new(true, add_name_list_str);
86 if (list) {
87 strlist__for_each(pos, list)
88 if (build_id_cache__add_file(pos->s, debugdir)) {
89 if (errno == EEXIST) {
90 pr_debug("%s already in the cache\n",
91 pos->s);
92 continue;
93 }
94 pr_warning("Couldn't add %s: %s\n",
95 pos->s, strerror(errno));
96 }
97
98 strlist__delete(list);
99 }
100 }
101
102 if (remove_name_list_str) {
103 list = strlist__new(true, remove_name_list_str);
104 if (list) {
105 strlist__for_each(pos, list)
106 if (build_id_cache__remove_file(pos->s, debugdir)) {
107 if (errno == ENOENT) {
108 pr_debug("%s wasn't in the cache\n",
109 pos->s);
110 continue;
111 }
112 pr_warning("Couldn't remove %s: %s\n",
113 pos->s, strerror(errno));
114 }
115
116 strlist__delete(list);
117 }
118 }
119
120 return 0;
121}
122
123int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used)
124{
125 argc = parse_options(argc, argv, buildid_cache_options,
126 buildid_cache_usage, 0);
127
128 if (symbol__init() < 0)
129 return -1;
130
131 setup_pager();
132 return __cmd_buildid_cache();
133}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 1e99ac806913..d0675c02f81e 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -8,6 +8,7 @@
8 */ 8 */
9#include "builtin.h" 9#include "builtin.h"
10#include "perf.h" 10#include "perf.h"
11#include "util/build-id.h"
11#include "util/cache.h" 12#include "util/cache.h"
12#include "util/debug.h" 13#include "util/debug.h"
13#include "util/parse-options.h" 14#include "util/parse-options.h"
@@ -16,6 +17,7 @@
16 17
17static char const *input_name = "perf.data"; 18static char const *input_name = "perf.data";
18static int force; 19static int force;
20static bool with_hits;
19 21
20static const char * const buildid_list_usage[] = { 22static const char * const buildid_list_usage[] = {
21 "perf buildid-list [<options>]", 23 "perf buildid-list [<options>]",
@@ -23,6 +25,7 @@ static const char * const buildid_list_usage[] = {
23}; 25};
24 26
25static const struct option options[] = { 27static const struct option options[] = {
28 OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
26 OPT_STRING('i', "input", &input_name, "file", 29 OPT_STRING('i', "input", &input_name, "file",
27 "input file name"), 30 "input file name"),
28 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 31 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -31,26 +34,6 @@ static const struct option options[] = {
31 OPT_END() 34 OPT_END()
32}; 35};
33 36
34static int perf_file_section__process_buildids(struct perf_file_section *self,
35 int feat, int fd)
36{
37 if (feat != HEADER_BUILD_ID)
38 return 0;
39
40 if (lseek(fd, self->offset, SEEK_SET) < 0) {
41 pr_warning("Failed to lseek to %Ld offset for buildids!\n",
42 self->offset);
43 return -1;
44 }
45
46 if (perf_header__read_build_ids(fd, self->offset, self->size)) {
47 pr_warning("Failed to read buildids!\n");
48 return -1;
49 }
50
51 return 0;
52}
53
54static int __cmd_buildid_list(void) 37static int __cmd_buildid_list(void)
55{ 38{
56 int err = -1; 39 int err = -1;
@@ -60,10 +43,10 @@ static int __cmd_buildid_list(void)
60 if (session == NULL) 43 if (session == NULL)
61 return -1; 44 return -1;
62 45
63 err = perf_header__process_sections(&session->header, session->fd, 46 if (with_hits)
64 perf_file_section__process_buildids); 47 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
65 if (err >= 0) 48
66 dsos__fprintf_buildid(stdout); 49 dsos__fprintf_buildid(stdout, with_hits);
67 50
68 perf_session__delete(session); 51 perf_session__delete(session);
69 return err; 52 return err;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index bd71b8ceafb7..18b3f505f9db 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -42,8 +42,8 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
42 struct addr_location al; 42 struct addr_location al;
43 struct sample_data data = { .period = 1, }; 43 struct sample_data data = { .period = 1, };
44 44
45 dump_printf("(IP, %d): %d: %p\n", event->header.misc, 45 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
46 event->ip.pid, (void *)(long)event->ip.ip); 46 event->ip.pid, event->ip.ip);
47 47
48 if (event__preprocess_sample(event, session, &al, NULL) < 0) { 48 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
49 pr_warning("problem processing %d event, skipping it.\n", 49 pr_warning("problem processing %d event, skipping it.\n",
@@ -51,12 +51,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
51 return -1; 51 return -1;
52 } 52 }
53 53
54 if (al.filtered) 54 if (al.filtered || al.sym == NULL)
55 return 0; 55 return 0;
56 56
57 event__parse_sample(event, session->sample_type, &data); 57 event__parse_sample(event, session->sample_type, &data);
58 58
59 if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) { 59 if (perf_session__add_hist_entry(session, &al, data.period)) {
60 pr_warning("problem incrementing symbol count, skipping event\n"); 60 pr_warning("problem incrementing symbol count, skipping event\n");
61 return -1; 61 return -1;
62 } 62 }
@@ -66,12 +66,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
66} 66}
67 67
68static struct perf_event_ops event_ops = { 68static struct perf_event_ops event_ops = {
69 .process_sample_event = diff__process_sample_event, 69 .sample = diff__process_sample_event,
70 .process_mmap_event = event__process_mmap, 70 .mmap = event__process_mmap,
71 .process_comm_event = event__process_comm, 71 .comm = event__process_comm,
72 .process_exit_event = event__process_task, 72 .exit = event__process_task,
73 .process_fork_event = event__process_task, 73 .fork = event__process_task,
74 .process_lost_event = event__process_lost, 74 .lost = event__process_lost,
75}; 75};
76 76
77static void perf_session__insert_hist_entry_by_name(struct rb_root *root, 77static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -82,29 +82,19 @@ static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
82 struct hist_entry *iter; 82 struct hist_entry *iter;
83 83
84 while (*p != NULL) { 84 while (*p != NULL) {
85 int cmp;
86 parent = *p; 85 parent = *p;
87 iter = rb_entry(parent, struct hist_entry, rb_node); 86 iter = rb_entry(parent, struct hist_entry, rb_node);
88 87 if (hist_entry__cmp(he, iter) < 0)
89 cmp = strcmp(he->map->dso->name, iter->map->dso->name);
90 if (cmp > 0)
91 p = &(*p)->rb_left; 88 p = &(*p)->rb_left;
92 else if (cmp < 0) 89 else
93 p = &(*p)->rb_right; 90 p = &(*p)->rb_right;
94 else {
95 cmp = strcmp(he->sym->name, iter->sym->name);
96 if (cmp > 0)
97 p = &(*p)->rb_left;
98 else
99 p = &(*p)->rb_right;
100 }
101 } 91 }
102 92
103 rb_link_node(&he->rb_node, parent, p); 93 rb_link_node(&he->rb_node, parent, p);
104 rb_insert_color(&he->rb_node, root); 94 rb_insert_color(&he->rb_node, root);
105} 95}
106 96
107static void perf_session__resort_by_name(struct perf_session *self) 97static void perf_session__resort_hist_entries(struct perf_session *self)
108{ 98{
109 unsigned long position = 1; 99 unsigned long position = 1;
110 struct rb_root tmp = RB_ROOT; 100 struct rb_root tmp = RB_ROOT;
@@ -122,29 +112,28 @@ static void perf_session__resort_by_name(struct perf_session *self)
122 self->hists = tmp; 112 self->hists = tmp;
123} 113}
124 114
115static void perf_session__set_hist_entries_positions(struct perf_session *self)
116{
117 perf_session__output_resort(self, self->events_stats.total);
118 perf_session__resort_hist_entries(self);
119}
120
125static struct hist_entry * 121static struct hist_entry *
126perf_session__find_hist_entry_by_name(struct perf_session *self, 122perf_session__find_hist_entry(struct perf_session *self,
127 struct hist_entry *he) 123 struct hist_entry *he)
128{ 124{
129 struct rb_node *n = self->hists.rb_node; 125 struct rb_node *n = self->hists.rb_node;
130 126
131 while (n) { 127 while (n) {
132 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); 128 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
133 int cmp = strcmp(he->map->dso->name, iter->map->dso->name); 129 int64_t cmp = hist_entry__cmp(he, iter);
134 130
135 if (cmp > 0) 131 if (cmp < 0)
136 n = n->rb_left; 132 n = n->rb_left;
137 else if (cmp < 0) 133 else if (cmp > 0)
138 n = n->rb_right; 134 n = n->rb_right;
139 else { 135 else
140 cmp = strcmp(he->sym->name, iter->sym->name); 136 return iter;
141 if (cmp > 0)
142 n = n->rb_left;
143 else if (cmp < 0)
144 n = n->rb_right;
145 else
146 return iter;
147 }
148 } 137 }
149 138
150 return NULL; 139 return NULL;
@@ -155,11 +144,9 @@ static void perf_session__match_hists(struct perf_session *old_session,
155{ 144{
156 struct rb_node *nd; 145 struct rb_node *nd;
157 146
158 perf_session__resort_by_name(old_session);
159
160 for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) { 147 for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
161 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node); 148 struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
162 pos->pair = perf_session__find_hist_entry_by_name(old_session, pos); 149 pos->pair = perf_session__find_hist_entry(old_session, pos);
163 } 150 }
164} 151}
165 152
@@ -177,9 +164,12 @@ static int __cmd_diff(void)
177 ret = perf_session__process_events(session[i], &event_ops); 164 ret = perf_session__process_events(session[i], &event_ops);
178 if (ret) 165 if (ret)
179 goto out_delete; 166 goto out_delete;
180 perf_session__output_resort(session[i], session[i]->events_stats.total);
181 } 167 }
182 168
169 perf_session__output_resort(session[1], session[1]->events_stats.total);
170 if (show_displacement)
171 perf_session__set_hist_entries_positions(session[0]);
172
183 perf_session__match_hists(session[0], session[1]); 173 perf_session__match_hists(session[0], session[1]);
184 perf_session__fprintf_hists(session[1], session[0], 174 perf_session__fprintf_hists(session[1], session[0],
185 show_displacement, stdout); 175 show_displacement, stdout);
@@ -204,7 +194,7 @@ static const struct option options[] = {
204 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 194 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
205 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 195 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
206 "load module symbols - WARNING: use only with -k and LIVE kernel"), 196 "load module symbols - WARNING: use only with -k and LIVE kernel"),
207 OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths, 197 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
208 "Don't shorten the pathnames taking into account the cwd"), 198 "Don't shorten the pathnames taking into account the cwd"),
209 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 199 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
210 "only consider symbols in these dsos"), 200 "only consider symbols in these dsos"),
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 9f810b17c25c..215b584007b1 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -286,8 +286,7 @@ void list_common_cmds_help(void)
286 286
287 puts(" The most commonly used perf commands are:"); 287 puts(" The most commonly used perf commands are:");
288 for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { 288 for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
289 printf(" %s ", common_cmds[i].name); 289 printf(" %-*s ", longest, common_cmds[i].name);
290 mput_char(' ', longest - strlen(common_cmds[i].name));
291 puts(common_cmds[i].help); 290 puts(common_cmds[i].help);
292 } 291 }
293} 292}
@@ -314,8 +313,6 @@ static const char *cmd_to_page(const char *perf_cmd)
314 return "perf"; 313 return "perf";
315 else if (!prefixcmp(perf_cmd, "perf")) 314 else if (!prefixcmp(perf_cmd, "perf"))
316 return perf_cmd; 315 return perf_cmd;
317 else if (is_perf_command(perf_cmd))
318 return prepend("perf-", perf_cmd);
319 else 316 else
320 return prepend("perf-", perf_cmd); 317 return prepend("perf-", perf_cmd);
321} 318}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 93c67bf53d2c..924a9518931a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -92,23 +92,18 @@ static void setup_cpunode_map(void)
92 if (!dir1) 92 if (!dir1)
93 return; 93 return;
94 94
95 while (true) { 95 while ((dent1 = readdir(dir1)) != NULL) {
96 dent1 = readdir(dir1); 96 if (dent1->d_type != DT_DIR ||
97 if (!dent1) 97 sscanf(dent1->d_name, "node%u", &mem) < 1)
98 break;
99
100 if (sscanf(dent1->d_name, "node%u", &mem) < 1)
101 continue; 98 continue;
102 99
103 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); 100 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
104 dir2 = opendir(buf); 101 dir2 = opendir(buf);
105 if (!dir2) 102 if (!dir2)
106 continue; 103 continue;
107 while (true) { 104 while ((dent2 = readdir(dir2)) != NULL) {
108 dent2 = readdir(dir2); 105 if (dent2->d_type != DT_LNK ||
109 if (!dent2) 106 sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
110 break;
111 if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
112 continue; 107 continue;
113 cpunode_map[cpu] = mem; 108 cpunode_map[cpu] = mem;
114 } 109 }
@@ -321,11 +316,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
321 316
322 event__parse_sample(event, session->sample_type, &data); 317 event__parse_sample(event, session->sample_type, &data);
323 318
324 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 319 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
325 event->header.misc, 320 data.pid, data.tid, data.ip, data.period);
326 data.pid, data.tid,
327 (void *)(long)data.ip,
328 (long long)data.period);
329 321
330 thread = perf_session__findnew(session, event->ip.pid); 322 thread = perf_session__findnew(session, event->ip.pid);
331 if (thread == NULL) { 323 if (thread == NULL) {
@@ -342,22 +334,9 @@ static int process_sample_event(event_t *event, struct perf_session *session)
342 return 0; 334 return 0;
343} 335}
344 336
345static int sample_type_check(struct perf_session *session)
346{
347 if (!(session->sample_type & PERF_SAMPLE_RAW)) {
348 fprintf(stderr,
349 "No trace sample to read. Did you call perf record "
350 "without -R?");
351 return -1;
352 }
353
354 return 0;
355}
356
357static struct perf_event_ops event_ops = { 337static struct perf_event_ops event_ops = {
358 .process_sample_event = process_sample_event, 338 .sample = process_sample_event,
359 .process_comm_event = event__process_comm, 339 .comm = event__process_comm,
360 .sample_type_check = sample_type_check,
361}; 340};
362 341
363static double fragmentation(unsigned long n_req, unsigned long n_alloc) 342static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -390,7 +369,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
390 if (is_caller) { 369 if (is_caller) {
391 addr = data->call_site; 370 addr = data->call_site;
392 if (!raw_ip) 371 if (!raw_ip)
393 sym = map_groups__find_function(&session->kmaps, session, addr, NULL); 372 sym = map_groups__find_function(&session->kmaps, addr, NULL);
394 } else 373 } else
395 addr = data->ptr; 374 addr = data->ptr;
396 375
@@ -504,11 +483,14 @@ static void sort_result(void)
504 483
505static int __cmd_kmem(void) 484static int __cmd_kmem(void)
506{ 485{
507 int err; 486 int err = -EINVAL;
508 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); 487 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
509 if (session == NULL) 488 if (session == NULL)
510 return -ENOMEM; 489 return -ENOMEM;
511 490
491 if (!perf_session__has_traces(session, "kmem record"))
492 goto out_delete;
493
512 setup_pager(); 494 setup_pager();
513 err = perf_session__process_events(session, &event_ops); 495 err = perf_session__process_events(session, &event_ops);
514 if (err != 0) 496 if (err != 0)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
new file mode 100644
index 000000000000..fb9ab2ad3f92
--- /dev/null
+++ b/tools/perf/builtin-lock.c
@@ -0,0 +1,678 @@
1#include "builtin.h"
2#include "perf.h"
3
4#include "util/util.h"
5#include "util/cache.h"
6#include "util/symbol.h"
7#include "util/thread.h"
8#include "util/header.h"
9
10#include "util/parse-options.h"
11#include "util/trace-event.h"
12
13#include "util/debug.h"
14#include "util/session.h"
15
16#include <sys/types.h>
17#include <sys/prctl.h>
18#include <semaphore.h>
19#include <pthread.h>
20#include <math.h>
21#include <limits.h>
22
23#include <linux/list.h>
24#include <linux/hash.h>
25
26/* based on kernel/lockdep.c */
27#define LOCKHASH_BITS 12
28#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
29
30static struct list_head lockhash_table[LOCKHASH_SIZE];
31
32#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
33#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
34
35#define LOCK_STATE_UNLOCKED 0 /* initial state */
36#define LOCK_STATE_LOCKED 1
37
38struct lock_stat {
39 struct list_head hash_entry;
40 struct rb_node rb; /* used for sorting */
41
42 /*
43 * FIXME: raw_field_value() returns unsigned long long,
44 * so address of lockdep_map should be dealed as 64bit.
45 * Is there more better solution?
46 */
47 void *addr; /* address of lockdep_map, used as ID */
48 char *name; /* for strcpy(), we cannot use const */
49
50 int state;
51 u64 prev_event_time; /* timestamp of previous event */
52
53 unsigned int nr_acquired;
54 unsigned int nr_acquire;
55 unsigned int nr_contended;
56 unsigned int nr_release;
57
58 /* these times are in nano sec. */
59 u64 wait_time_total;
60 u64 wait_time_min;
61 u64 wait_time_max;
62};
63
64/* build simple key function one is bigger than two */
65#define SINGLE_KEY(member) \
66 static int lock_stat_key_ ## member(struct lock_stat *one, \
67 struct lock_stat *two) \
68 { \
69 return one->member > two->member; \
70 }
71
72SINGLE_KEY(nr_acquired)
73SINGLE_KEY(nr_contended)
74SINGLE_KEY(wait_time_total)
75SINGLE_KEY(wait_time_min)
76SINGLE_KEY(wait_time_max)
77
78struct lock_key {
79 /*
80 * name: the value for specify by user
81 * this should be simpler than raw name of member
82 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
83 */
84 const char *name;
85 int (*key)(struct lock_stat*, struct lock_stat*);
86};
87
88static const char *sort_key = "acquired";
89
90static int (*compare)(struct lock_stat *, struct lock_stat *);
91
92static struct rb_root result; /* place to store sorted data */
93
94#define DEF_KEY_LOCK(name, fn_suffix) \
95 { #name, lock_stat_key_ ## fn_suffix }
96struct lock_key keys[] = {
97 DEF_KEY_LOCK(acquired, nr_acquired),
98 DEF_KEY_LOCK(contended, nr_contended),
99 DEF_KEY_LOCK(wait_total, wait_time_total),
100 DEF_KEY_LOCK(wait_min, wait_time_min),
101 DEF_KEY_LOCK(wait_max, wait_time_max),
102
103 /* extra comparisons much complicated should be here */
104
105 { NULL, NULL }
106};
107
108static void select_key(void)
109{
110 int i;
111
112 for (i = 0; keys[i].name; i++) {
113 if (!strcmp(keys[i].name, sort_key)) {
114 compare = keys[i].key;
115 return;
116 }
117 }
118
119 die("Unknown compare key:%s\n", sort_key);
120}
121
122static void insert_to_result(struct lock_stat *st,
123 int (*bigger)(struct lock_stat *, struct lock_stat *))
124{
125 struct rb_node **rb = &result.rb_node;
126 struct rb_node *parent = NULL;
127 struct lock_stat *p;
128
129 while (*rb) {
130 p = container_of(*rb, struct lock_stat, rb);
131 parent = *rb;
132
133 if (bigger(st, p))
134 rb = &(*rb)->rb_left;
135 else
136 rb = &(*rb)->rb_right;
137 }
138
139 rb_link_node(&st->rb, parent, rb);
140 rb_insert_color(&st->rb, &result);
141}
142
143/* returns left most element of result, and erase it */
144static struct lock_stat *pop_from_result(void)
145{
146 struct rb_node *node = result.rb_node;
147
148 if (!node)
149 return NULL;
150
151 while (node->rb_left)
152 node = node->rb_left;
153
154 rb_erase(node, &result);
155 return container_of(node, struct lock_stat, rb);
156}
157
158static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
159{
160 struct list_head *entry = lockhashentry(addr);
161 struct lock_stat *ret, *new;
162
163 list_for_each_entry(ret, entry, hash_entry) {
164 if (ret->addr == addr)
165 return ret;
166 }
167
168 new = zalloc(sizeof(struct lock_stat));
169 if (!new)
170 goto alloc_failed;
171
172 new->addr = addr;
173 new->name = zalloc(sizeof(char) * strlen(name) + 1);
174 if (!new->name)
175 goto alloc_failed;
176 strcpy(new->name, name);
177
178 /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
179 new->state = LOCK_STATE_UNLOCKED;
180 new->wait_time_min = ULLONG_MAX;
181
182 list_add(&new->hash_entry, entry);
183 return new;
184
185alloc_failed:
186 die("memory allocation failed\n");
187}
188
189static char const *input_name = "perf.data";
190
191static int profile_cpu = -1;
192
193struct raw_event_sample {
194 u32 size;
195 char data[0];
196};
197
198struct trace_acquire_event {
199 void *addr;
200 const char *name;
201};
202
203struct trace_acquired_event {
204 void *addr;
205 const char *name;
206};
207
208struct trace_contended_event {
209 void *addr;
210 const char *name;
211};
212
213struct trace_release_event {
214 void *addr;
215 const char *name;
216};
217
218struct trace_lock_handler {
219 void (*acquire_event)(struct trace_acquire_event *,
220 struct event *,
221 int cpu,
222 u64 timestamp,
223 struct thread *thread);
224
225 void (*acquired_event)(struct trace_acquired_event *,
226 struct event *,
227 int cpu,
228 u64 timestamp,
229 struct thread *thread);
230
231 void (*contended_event)(struct trace_contended_event *,
232 struct event *,
233 int cpu,
234 u64 timestamp,
235 struct thread *thread);
236
237 void (*release_event)(struct trace_release_event *,
238 struct event *,
239 int cpu,
240 u64 timestamp,
241 struct thread *thread);
242};
243
244static void
245report_lock_acquire_event(struct trace_acquire_event *acquire_event,
246 struct event *__event __used,
247 int cpu __used,
248 u64 timestamp,
249 struct thread *thread __used)
250{
251 struct lock_stat *st;
252
253 st = lock_stat_findnew(acquire_event->addr, acquire_event->name);
254
255 switch (st->state) {
256 case LOCK_STATE_UNLOCKED:
257 break;
258 case LOCK_STATE_LOCKED:
259 break;
260 default:
261 BUG_ON(1);
262 break;
263 }
264
265 st->prev_event_time = timestamp;
266}
267
268static void
269report_lock_acquired_event(struct trace_acquired_event *acquired_event,
270 struct event *__event __used,
271 int cpu __used,
272 u64 timestamp,
273 struct thread *thread __used)
274{
275 struct lock_stat *st;
276
277 st = lock_stat_findnew(acquired_event->addr, acquired_event->name);
278
279 switch (st->state) {
280 case LOCK_STATE_UNLOCKED:
281 st->state = LOCK_STATE_LOCKED;
282 st->nr_acquired++;
283 break;
284 case LOCK_STATE_LOCKED:
285 break;
286 default:
287 BUG_ON(1);
288 break;
289 }
290
291 st->prev_event_time = timestamp;
292}
293
294static void
295report_lock_contended_event(struct trace_contended_event *contended_event,
296 struct event *__event __used,
297 int cpu __used,
298 u64 timestamp,
299 struct thread *thread __used)
300{
301 struct lock_stat *st;
302
303 st = lock_stat_findnew(contended_event->addr, contended_event->name);
304
305 switch (st->state) {
306 case LOCK_STATE_UNLOCKED:
307 break;
308 case LOCK_STATE_LOCKED:
309 st->nr_contended++;
310 break;
311 default:
312 BUG_ON(1);
313 break;
314 }
315
316 st->prev_event_time = timestamp;
317}
318
319static void
320report_lock_release_event(struct trace_release_event *release_event,
321 struct event *__event __used,
322 int cpu __used,
323 u64 timestamp,
324 struct thread *thread __used)
325{
326 struct lock_stat *st;
327 u64 hold_time;
328
329 st = lock_stat_findnew(release_event->addr, release_event->name);
330
331 switch (st->state) {
332 case LOCK_STATE_UNLOCKED:
333 break;
334 case LOCK_STATE_LOCKED:
335 st->state = LOCK_STATE_UNLOCKED;
336 hold_time = timestamp - st->prev_event_time;
337
338 if (timestamp < st->prev_event_time) {
339 /* terribly, this can happen... */
340 goto end;
341 }
342
343 if (st->wait_time_min > hold_time)
344 st->wait_time_min = hold_time;
345 if (st->wait_time_max < hold_time)
346 st->wait_time_max = hold_time;
347 st->wait_time_total += hold_time;
348
349 st->nr_release++;
350 break;
351 default:
352 BUG_ON(1);
353 break;
354 }
355
356end:
357 st->prev_event_time = timestamp;
358}
359
360/* lock oriented handlers */
361/* TODO: handlers for CPU oriented, thread oriented */
362static struct trace_lock_handler report_lock_ops = {
363 .acquire_event = report_lock_acquire_event,
364 .acquired_event = report_lock_acquired_event,
365 .contended_event = report_lock_contended_event,
366 .release_event = report_lock_release_event,
367};
368
369static struct trace_lock_handler *trace_handler;
370
371static void
372process_lock_acquire_event(void *data,
373 struct event *event __used,
374 int cpu __used,
375 u64 timestamp __used,
376 struct thread *thread __used)
377{
378 struct trace_acquire_event acquire_event;
379 u64 tmp; /* this is required for casting... */
380
381 tmp = raw_field_value(event, "lockdep_addr", data);
382 memcpy(&acquire_event.addr, &tmp, sizeof(void *));
383 acquire_event.name = (char *)raw_field_ptr(event, "name", data);
384
385 if (trace_handler->acquire_event)
386 trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread);
387}
388
389static void
390process_lock_acquired_event(void *data,
391 struct event *event __used,
392 int cpu __used,
393 u64 timestamp __used,
394 struct thread *thread __used)
395{
396 struct trace_acquired_event acquired_event;
397 u64 tmp; /* this is required for casting... */
398
399 tmp = raw_field_value(event, "lockdep_addr", data);
400 memcpy(&acquired_event.addr, &tmp, sizeof(void *));
401 acquired_event.name = (char *)raw_field_ptr(event, "name", data);
402
403 if (trace_handler->acquire_event)
404 trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread);
405}
406
407static void
408process_lock_contended_event(void *data,
409 struct event *event __used,
410 int cpu __used,
411 u64 timestamp __used,
412 struct thread *thread __used)
413{
414 struct trace_contended_event contended_event;
415 u64 tmp; /* this is required for casting... */
416
417 tmp = raw_field_value(event, "lockdep_addr", data);
418 memcpy(&contended_event.addr, &tmp, sizeof(void *));
419 contended_event.name = (char *)raw_field_ptr(event, "name", data);
420
421 if (trace_handler->acquire_event)
422 trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread);
423}
424
425static void
426process_lock_release_event(void *data,
427 struct event *event __used,
428 int cpu __used,
429 u64 timestamp __used,
430 struct thread *thread __used)
431{
432 struct trace_release_event release_event;
433 u64 tmp; /* this is required for casting... */
434
435 tmp = raw_field_value(event, "lockdep_addr", data);
436 memcpy(&release_event.addr, &tmp, sizeof(void *));
437 release_event.name = (char *)raw_field_ptr(event, "name", data);
438
439 if (trace_handler->acquire_event)
440 trace_handler->release_event(&release_event, event, cpu, timestamp, thread);
441}
442
443static void
444process_raw_event(void *data, int cpu,
445 u64 timestamp, struct thread *thread)
446{
447 struct event *event;
448 int type;
449
450 type = trace_parse_common_type(data);
451 event = trace_find_event(type);
452
453 if (!strcmp(event->name, "lock_acquire"))
454 process_lock_acquire_event(data, event, cpu, timestamp, thread);
455 if (!strcmp(event->name, "lock_acquired"))
456 process_lock_acquired_event(data, event, cpu, timestamp, thread);
457 if (!strcmp(event->name, "lock_contended"))
458 process_lock_contended_event(data, event, cpu, timestamp, thread);
459 if (!strcmp(event->name, "lock_release"))
460 process_lock_release_event(data, event, cpu, timestamp, thread);
461}
462
463static int process_sample_event(event_t *event, struct perf_session *session)
464{
465 struct thread *thread;
466 struct sample_data data;
467
468 bzero(&data, sizeof(struct sample_data));
469 event__parse_sample(event, session->sample_type, &data);
470 thread = perf_session__findnew(session, data.pid);
471
472 if (thread == NULL) {
473 pr_debug("problem processing %d event, skipping it.\n",
474 event->header.type);
475 return -1;
476 }
477
478 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
479
480 if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
481 return 0;
482
483 process_raw_event(data.raw_data, data.cpu, data.time, thread);
484
485 return 0;
486}
487
488/* TODO: various way to print, coloring, nano or milli sec */
489static void print_result(void)
490{
491 struct lock_stat *st;
492 char cut_name[20];
493
494 printf("%18s ", "ID");
495 printf("%20s ", "Name");
496 printf("%10s ", "acquired");
497 printf("%10s ", "contended");
498
499 printf("%15s ", "total wait (ns)");
500 printf("%15s ", "max wait (ns)");
501 printf("%15s ", "min wait (ns)");
502
503 printf("\n\n");
504
505 while ((st = pop_from_result())) {
506 bzero(cut_name, 20);
507
508 printf("%p ", st->addr);
509
510 if (strlen(st->name) < 16) {
511 /* output raw name */
512 printf("%20s ", st->name);
513 } else {
514 strncpy(cut_name, st->name, 16);
515 cut_name[16] = '.';
516 cut_name[17] = '.';
517 cut_name[18] = '.';
518 cut_name[19] = '\0';
519 /* cut off name for saving output style */
520 printf("%20s ", cut_name);
521 }
522
523 printf("%10u ", st->nr_acquired);
524 printf("%10u ", st->nr_contended);
525
526 printf("%15llu ", st->wait_time_total);
527 printf("%15llu ", st->wait_time_max);
528 printf("%15llu ", st->wait_time_min == ULLONG_MAX ?
529 0 : st->wait_time_min);
530 printf("\n");
531 }
532}
533
534static void dump_map(void)
535{
536 unsigned int i;
537 struct lock_stat *st;
538
539 for (i = 0; i < LOCKHASH_SIZE; i++) {
540 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
541 printf("%p: %s\n", st->addr, st->name);
542 }
543 }
544}
545
546static struct perf_event_ops eops = {
547 .sample = process_sample_event,
548 .comm = event__process_comm,
549};
550
551static struct perf_session *session;
552
553static int read_events(void)
554{
555 session = perf_session__new(input_name, O_RDONLY, 0);
556 if (!session)
557 die("Initializing perf session failed\n");
558
559 return perf_session__process_events(session, &eops);
560}
561
562static void sort_result(void)
563{
564 unsigned int i;
565 struct lock_stat *st;
566
567 for (i = 0; i < LOCKHASH_SIZE; i++) {
568 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
569 insert_to_result(st, compare);
570 }
571 }
572}
573
574static void __cmd_report(void)
575{
576 setup_pager();
577 select_key();
578 read_events();
579 sort_result();
580 print_result();
581}
582
583static const char * const report_usage[] = {
584 "perf lock report [<options>]",
585 NULL
586};
587
588static const struct option report_options[] = {
589 OPT_STRING('k', "key", &sort_key, "acquired",
590 "key for sorting"),
591 /* TODO: type */
592 OPT_END()
593};
594
595static const char * const lock_usage[] = {
596 "perf lock [<options>] {record|trace|report}",
597 NULL
598};
599
600static const struct option lock_options[] = {
601 OPT_STRING('i', "input", &input_name, "file", "input file name"),
602 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
603 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
604 OPT_END()
605};
606
607static const char *record_args[] = {
608 "record",
609 "-a",
610 "-R",
611 "-M",
612 "-f",
613 "-m", "1024",
614 "-c", "1",
615 "-e", "lock:lock_acquire:r",
616 "-e", "lock:lock_acquired:r",
617 "-e", "lock:lock_contended:r",
618 "-e", "lock:lock_release:r",
619};
620
621static int __cmd_record(int argc, const char **argv)
622{
623 unsigned int rec_argc, i, j;
624 const char **rec_argv;
625
626 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
627 rec_argv = calloc(rec_argc + 1, sizeof(char *));
628
629 for (i = 0; i < ARRAY_SIZE(record_args); i++)
630 rec_argv[i] = strdup(record_args[i]);
631
632 for (j = 1; j < (unsigned int)argc; j++, i++)
633 rec_argv[i] = argv[j];
634
635 BUG_ON(i != rec_argc);
636
637 return cmd_record(i, rec_argv, NULL);
638}
639
640int cmd_lock(int argc, const char **argv, const char *prefix __used)
641{
642 unsigned int i;
643
644 symbol__init();
645 for (i = 0; i < LOCKHASH_SIZE; i++)
646 INIT_LIST_HEAD(lockhash_table + i);
647
648 argc = parse_options(argc, argv, lock_options, lock_usage,
649 PARSE_OPT_STOP_AT_NON_OPTION);
650 if (!argc)
651 usage_with_options(lock_usage, lock_options);
652
653 if (!strncmp(argv[0], "rec", 3)) {
654 return __cmd_record(argc, argv);
655 } else if (!strncmp(argv[0], "report", 6)) {
656 trace_handler = &report_lock_ops;
657 if (argc) {
658 argc = parse_options(argc, argv,
659 report_options, report_usage, 0);
660 if (argc)
661 usage_with_options(report_usage, report_options);
662 }
663 __cmd_report();
664 } else if (!strcmp(argv[0], "trace")) {
665 /* Aliased to 'perf trace' */
666 return cmd_trace(argc, argv, prefix);
667 } else if (!strcmp(argv[0], "map")) {
668 /* recycling report_lock_ops */
669 trace_handler = &report_lock_ops;
670 setup_pager();
671 read_events();
672 dump_map();
673 } else {
674 usage_with_options(lock_usage, lock_options);
675 }
676
677 return 0;
678}
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c1e6774fd3ed..ad47bd4c50ef 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -41,7 +41,6 @@
41#include "util/debugfs.h" 41#include "util/debugfs.h"
42#include "util/symbol.h" 42#include "util/symbol.h"
43#include "util/thread.h" 43#include "util/thread.h"
44#include "util/session.h"
45#include "util/parse-options.h" 44#include "util/parse-options.h"
46#include "util/parse-events.h" /* For debugfs_path */ 45#include "util/parse-events.h" /* For debugfs_path */
47#include "util/probe-finder.h" 46#include "util/probe-finder.h"
@@ -55,11 +54,13 @@ static struct {
55 bool need_dwarf; 54 bool need_dwarf;
56 bool list_events; 55 bool list_events;
57 bool force_add; 56 bool force_add;
57 bool show_lines;
58 int nr_probe; 58 int nr_probe;
59 struct probe_point probes[MAX_PROBES]; 59 struct probe_point probes[MAX_PROBES];
60 struct strlist *dellist; 60 struct strlist *dellist;
61 struct perf_session *psession; 61 struct map_groups kmap_groups;
62 struct map *kmap; 62 struct map *kmaps[MAP__NR_TYPES];
63 struct line_range line_range;
63} session; 64} session;
64 65
65 66
@@ -120,8 +121,8 @@ static int opt_del_probe_event(const struct option *opt __used,
120static void evaluate_probe_point(struct probe_point *pp) 121static void evaluate_probe_point(struct probe_point *pp)
121{ 122{
122 struct symbol *sym; 123 struct symbol *sym;
123 sym = map__find_symbol_by_name(session.kmap, pp->function, 124 sym = map__find_symbol_by_name(session.kmaps[MAP__FUNCTION],
124 session.psession, NULL); 125 pp->function, NULL);
125 if (!sym) 126 if (!sym)
126 die("Kernel symbol \'%s\' not found - probe not added.", 127 die("Kernel symbol \'%s\' not found - probe not added.",
127 pp->function); 128 pp->function);
@@ -130,12 +131,23 @@ static void evaluate_probe_point(struct probe_point *pp)
130#ifndef NO_LIBDWARF 131#ifndef NO_LIBDWARF
131static int open_vmlinux(void) 132static int open_vmlinux(void)
132{ 133{
133 if (map__load(session.kmap, session.psession, NULL) < 0) { 134 if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) {
134 pr_debug("Failed to load kernel map.\n"); 135 pr_debug("Failed to load kernel map.\n");
135 return -EINVAL; 136 return -EINVAL;
136 } 137 }
137 pr_debug("Try to open %s\n", session.kmap->dso->long_name); 138 pr_debug("Try to open %s\n",
138 return open(session.kmap->dso->long_name, O_RDONLY); 139 session.kmaps[MAP__FUNCTION]->dso->long_name);
140 return open(session.kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
141}
142
143static int opt_show_lines(const struct option *opt __used,
144 const char *str, int unset __used)
145{
146 if (str)
147 parse_line_range_desc(str, &session.line_range);
148 INIT_LIST_HEAD(&session.line_range.line_list);
149 session.show_lines = true;
150 return 0;
139} 151}
140#endif 152#endif
141 153
@@ -144,6 +156,7 @@ static const char * const probe_usage[] = {
144 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 156 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
145 "perf probe [<options>] --del '[GROUP:]EVENT' ...", 157 "perf probe [<options>] --del '[GROUP:]EVENT' ...",
146 "perf probe --list", 158 "perf probe --list",
159 "perf probe --line 'LINEDESC'",
147 NULL 160 NULL
148}; 161};
149 162
@@ -182,9 +195,31 @@ static const struct option options[] = {
182 opt_add_probe_event), 195 opt_add_probe_event),
183 OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events" 196 OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
184 " with existing name"), 197 " with existing name"),
198#ifndef NO_LIBDWARF
199 OPT_CALLBACK('L', "line", NULL,
200 "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
201 "Show source code lines.", opt_show_lines),
202#endif
185 OPT_END() 203 OPT_END()
186}; 204};
187 205
206/* Initialize symbol maps for vmlinux */
207static void init_vmlinux(void)
208{
209 symbol_conf.sort_by_name = true;
210 if (symbol_conf.vmlinux_name == NULL)
211 symbol_conf.try_vmlinux_path = true;
212 else
213 pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
214 if (symbol__init() < 0)
215 die("Failed to init symbol map.");
216
217 map_groups__init(&session.kmap_groups);
218 if (map_groups__create_kernel_maps(&session.kmap_groups,
219 session.kmaps) < 0)
220 die("Failed to create kernel maps.");
221}
222
188int cmd_probe(int argc, const char **argv, const char *prefix __used) 223int cmd_probe(int argc, const char **argv, const char *prefix __used)
189{ 224{
190 int i, ret; 225 int i, ret;
@@ -203,7 +238,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
203 parse_probe_event_argv(argc, argv); 238 parse_probe_event_argv(argc, argv);
204 } 239 }
205 240
206 if ((!session.nr_probe && !session.dellist && !session.list_events)) 241 if ((!session.nr_probe && !session.dellist && !session.list_events &&
242 !session.show_lines))
207 usage_with_options(probe_usage, options); 243 usage_with_options(probe_usage, options);
208 244
209 if (debugfs_valid_mountpoint(debugfs_path) < 0) 245 if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +251,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
215 " --add/--del.\n"); 251 " --add/--del.\n");
216 usage_with_options(probe_usage, options); 252 usage_with_options(probe_usage, options);
217 } 253 }
254 if (session.show_lines) {
255 pr_warning(" Error: Don't use --list with --line.\n");
256 usage_with_options(probe_usage, options);
257 }
218 show_perf_probe_events(); 258 show_perf_probe_events();
219 return 0; 259 return 0;
220 } 260 }
221 261
262#ifndef NO_LIBDWARF
263 if (session.show_lines) {
264 if (session.nr_probe != 0 || session.dellist) {
265 pr_warning(" Error: Don't use --line with"
266 " --add/--del.\n");
267 usage_with_options(probe_usage, options);
268 }
269 init_vmlinux();
270 fd = open_vmlinux();
271 if (fd < 0)
272 die("Could not open debuginfo file.");
273 ret = find_line_range(fd, &session.line_range);
274 if (ret <= 0)
275 die("Source line is not found.\n");
276 close(fd);
277 show_line_range(&session.line_range);
278 return 0;
279 }
280#endif
281
222 if (session.dellist) { 282 if (session.dellist) {
223 del_trace_kprobe_events(session.dellist); 283 del_trace_kprobe_events(session.dellist);
224 strlist__delete(session.dellist); 284 strlist__delete(session.dellist);
@@ -226,20 +286,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
226 return 0; 286 return 0;
227 } 287 }
228 288
229 /* Initialize symbol maps for vmlinux */ 289 /* Add probes */
230 symbol_conf.sort_by_name = true; 290 init_vmlinux();
231 if (symbol_conf.vmlinux_name == NULL)
232 symbol_conf.try_vmlinux_path = true;
233 if (symbol__init() < 0)
234 die("Failed to init symbol map.");
235 session.psession = perf_session__new(NULL, O_WRONLY, false);
236 if (session.psession == NULL)
237 die("Failed to init perf_session.");
238 session.kmap = map_groups__find_by_name(&session.psession->kmaps,
239 MAP__FUNCTION,
240 "[kernel.kallsyms]");
241 if (!session.kmap)
242 die("Could not find kernel map.\n");
243 291
244 if (session.need_dwarf) 292 if (session.need_dwarf)
245#ifdef NO_LIBDWARF 293#ifdef NO_LIBDWARF
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 265425322734..771533ced6a8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -5,10 +5,13 @@
5 * (or a CPU, or a PID) into the perf.data output file - for 5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report. 6 * later analysis via perf report.
7 */ 7 */
8#define _FILE_OFFSET_BITS 64
9
8#include "builtin.h" 10#include "builtin.h"
9 11
10#include "perf.h" 12#include "perf.h"
11 13
14#include "util/build-id.h"
12#include "util/util.h" 15#include "util/util.h"
13#include "util/parse-options.h" 16#include "util/parse-options.h"
14#include "util/parse-events.h" 17#include "util/parse-events.h"
@@ -62,6 +65,7 @@ static int nr_poll = 0;
62static int nr_cpu = 0; 65static int nr_cpu = 0;
63 66
64static int file_new = 1; 67static int file_new = 1;
68static off_t post_processing_offset;
65 69
66static struct perf_session *session; 70static struct perf_session *session;
67 71
@@ -111,22 +115,10 @@ static void write_output(void *buf, size_t size)
111 } 115 }
112} 116}
113 117
114static void write_event(event_t *buf, size_t size)
115{
116 /*
117 * Add it to the list of DSOs, so that when we finish this
118 * record session we can pick the available build-ids.
119 */
120 if (buf->header.type == PERF_RECORD_MMAP)
121 dsos__findnew(buf->mmap.filename);
122
123 write_output(buf, size);
124}
125
126static int process_synthesized_event(event_t *event, 118static int process_synthesized_event(event_t *event,
127 struct perf_session *self __used) 119 struct perf_session *self __used)
128{ 120{
129 write_event(event, event->header.size); 121 write_output(event, event->header.size);
130 return 0; 122 return 0;
131} 123}
132 124
@@ -178,14 +170,14 @@ static void mmap_read(struct mmap_data *md)
178 size = md->mask + 1 - (old & md->mask); 170 size = md->mask + 1 - (old & md->mask);
179 old += size; 171 old += size;
180 172
181 write_event(buf, size); 173 write_output(buf, size);
182 } 174 }
183 175
184 buf = &data[old & md->mask]; 176 buf = &data[old & md->mask];
185 size = head - old; 177 size = head - old;
186 old += size; 178 old += size;
187 179
188 write_event(buf, size); 180 write_output(buf, size);
189 181
190 md->prev = old; 182 md->prev = old;
191 mmap_write_tail(md, old); 183 mmap_write_tail(md, old);
@@ -395,10 +387,21 @@ static void open_counters(int cpu, pid_t pid)
395 nr_cpu++; 387 nr_cpu++;
396} 388}
397 389
390static int process_buildids(void)
391{
392 u64 size = lseek(output, 0, SEEK_CUR);
393
394 session->fd = output;
395 return __perf_session__process_events(session, post_processing_offset,
396 size - post_processing_offset,
397 size, &build_id__mark_dso_hit_ops);
398}
399
398static void atexit_header(void) 400static void atexit_header(void)
399{ 401{
400 session->header.data_size += bytes_written; 402 session->header.data_size += bytes_written;
401 403
404 process_buildids();
402 perf_header__write(&session->header, output, true); 405 perf_header__write(&session->header, output, true);
403} 406}
404 407
@@ -551,8 +554,23 @@ static int __cmd_record(int argc, const char **argv)
551 return err; 554 return err;
552 } 555 }
553 556
557 post_processing_offset = lseek(output, 0, SEEK_CUR);
558
559 err = event__synthesize_kernel_mmap(process_synthesized_event,
560 session, "_text");
561 if (err < 0) {
562 pr_err("Couldn't record kernel reference relocation symbol.\n");
563 return err;
564 }
565
566 err = event__synthesize_modules(process_synthesized_event, session);
567 if (err < 0) {
568 pr_err("Couldn't record kernel reference relocation symbol.\n");
569 return err;
570 }
571
554 if (!system_wide && profile_cpu == -1) 572 if (!system_wide && profile_cpu == -1)
555 event__synthesize_thread(pid, process_synthesized_event, 573 event__synthesize_thread(target_pid, process_synthesized_event,
556 session); 574 session);
557 else 575 else
558 event__synthesize_threads(process_synthesized_event, session); 576 event__synthesize_threads(process_synthesized_event, session);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 860f1eeeea7d..cfc655d40bb7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -34,6 +34,8 @@
34static char const *input_name = "perf.data"; 34static char const *input_name = "perf.data";
35 35
36static int force; 36static int force;
37static bool hide_unresolved;
38static bool dont_use_callchains;
37 39
38static int show_threads; 40static int show_threads;
39static struct perf_read_values show_threads_values; 41static struct perf_read_values show_threads_values;
@@ -91,11 +93,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
91 93
92 event__parse_sample(event, session->sample_type, &data); 94 event__parse_sample(event, session->sample_type, &data);
93 95
94 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 96 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
95 event->header.misc, 97 data.pid, data.tid, data.ip, data.period);
96 data.pid, data.tid,
97 (void *)(long)data.ip,
98 (long long)data.period);
99 98
100 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) { 99 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
101 unsigned int i; 100 unsigned int i;
@@ -121,7 +120,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
121 return -1; 120 return -1;
122 } 121 }
123 122
124 if (al.filtered) 123 if (al.filtered || (hide_unresolved && al.sym == NULL))
125 return 0; 124 return 0;
126 125
127 if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) { 126 if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@ -156,14 +155,14 @@ static int process_read_event(event_t *event, struct perf_session *session __use
156 return 0; 155 return 0;
157} 156}
158 157
159static int sample_type_check(struct perf_session *session) 158static int perf_session__setup_sample_type(struct perf_session *self)
160{ 159{
161 if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) { 160 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
162 if (sort__has_parent) { 161 if (sort__has_parent) {
163 fprintf(stderr, "selected --sort parent, but no" 162 fprintf(stderr, "selected --sort parent, but no"
164 " callchain data. Did you call" 163 " callchain data. Did you call"
165 " perf record without -g?\n"); 164 " perf record without -g?\n");
166 return -1; 165 return -EINVAL;
167 } 166 }
168 if (symbol_conf.use_callchain) { 167 if (symbol_conf.use_callchain) {
169 fprintf(stderr, "selected -g but no callchain data." 168 fprintf(stderr, "selected -g but no callchain data."
@@ -171,12 +170,13 @@ static int sample_type_check(struct perf_session *session)
171 " -g?\n"); 170 " -g?\n");
172 return -1; 171 return -1;
173 } 172 }
174 } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { 173 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
174 !symbol_conf.use_callchain) {
175 symbol_conf.use_callchain = true; 175 symbol_conf.use_callchain = true;
176 if (register_callchain_param(&callchain_param) < 0) { 176 if (register_callchain_param(&callchain_param) < 0) {
177 fprintf(stderr, "Can't register callchain" 177 fprintf(stderr, "Can't register callchain"
178 " params\n"); 178 " params\n");
179 return -1; 179 return -EINVAL;
180 } 180 }
181 } 181 }
182 182
@@ -184,20 +184,18 @@ static int sample_type_check(struct perf_session *session)
184} 184}
185 185
186static struct perf_event_ops event_ops = { 186static struct perf_event_ops event_ops = {
187 .process_sample_event = process_sample_event, 187 .sample = process_sample_event,
188 .process_mmap_event = event__process_mmap, 188 .mmap = event__process_mmap,
189 .process_comm_event = event__process_comm, 189 .comm = event__process_comm,
190 .process_exit_event = event__process_task, 190 .exit = event__process_task,
191 .process_fork_event = event__process_task, 191 .fork = event__process_task,
192 .process_lost_event = event__process_lost, 192 .lost = event__process_lost,
193 .process_read_event = process_read_event, 193 .read = process_read_event,
194 .sample_type_check = sample_type_check,
195}; 194};
196 195
197
198static int __cmd_report(void) 196static int __cmd_report(void)
199{ 197{
200 int ret; 198 int ret = -EINVAL;
201 struct perf_session *session; 199 struct perf_session *session;
202 200
203 session = perf_session__new(input_name, O_RDONLY, force); 201 session = perf_session__new(input_name, O_RDONLY, force);
@@ -207,6 +205,10 @@ static int __cmd_report(void)
207 if (show_threads) 205 if (show_threads)
208 perf_read_values_init(&show_threads_values); 206 perf_read_values_init(&show_threads_values);
209 207
208 ret = perf_session__setup_sample_type(session);
209 if (ret)
210 goto out_delete;
211
210 ret = perf_session__process_events(session, &event_ops); 212 ret = perf_session__process_events(session, &event_ops);
211 if (ret) 213 if (ret)
212 goto out_delete; 214 goto out_delete;
@@ -243,11 +245,19 @@ out_delete:
243 245
244static int 246static int
245parse_callchain_opt(const struct option *opt __used, const char *arg, 247parse_callchain_opt(const struct option *opt __used, const char *arg,
246 int unset __used) 248 int unset)
247{ 249{
248 char *tok; 250 char *tok;
249 char *endptr; 251 char *endptr;
250 252
253 /*
254 * --no-call-graph
255 */
256 if (unset) {
257 dont_use_callchains = true;
258 return 0;
259 }
260
251 symbol_conf.use_callchain = true; 261 symbol_conf.use_callchain = true;
252 262
253 if (!arg) 263 if (!arg)
@@ -319,7 +329,7 @@ static const struct option options[] = {
319 "pretty printing style key: normal raw"), 329 "pretty printing style key: normal raw"),
320 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 330 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
321 "sort by key(s): pid, comm, dso, symbol, parent"), 331 "sort by key(s): pid, comm, dso, symbol, parent"),
322 OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths, 332 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
323 "Don't shorten the pathnames taking into account the cwd"), 333 "Don't shorten the pathnames taking into account the cwd"),
324 OPT_STRING('p', "parent", &parent_pattern, "regex", 334 OPT_STRING('p', "parent", &parent_pattern, "regex",
325 "regex filter to identify parent, see: '--sort parent'"), 335 "regex filter to identify parent, see: '--sort parent'"),
@@ -340,6 +350,8 @@ static const struct option options[] = {
340 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 350 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
341 "separator for columns, no spaces will be added between " 351 "separator for columns, no spaces will be added between "
342 "columns '.' is reserved."), 352 "columns '.' is reserved."),
353 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
354 "Only display entries resolved to a symbol"),
343 OPT_END() 355 OPT_END()
344}; 356};
345 357
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 80209df6cfe8..4f5a03e43444 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1621,11 +1621,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
1621 1621
1622 event__parse_sample(event, session->sample_type, &data); 1622 event__parse_sample(event, session->sample_type, &data);
1623 1623
1624 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 1624 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
1625 event->header.misc, 1625 data.pid, data.tid, data.ip, data.period);
1626 data.pid, data.tid,
1627 (void *)(long)data.ip,
1628 (long long)data.period);
1629 1626
1630 thread = perf_session__findnew(session, data.pid); 1627 thread = perf_session__findnew(session, data.pid);
1631 if (thread == NULL) { 1628 if (thread == NULL) {
@@ -1653,33 +1650,22 @@ static int process_lost_event(event_t *event __used,
1653 return 0; 1650 return 0;
1654} 1651}
1655 1652
1656static int sample_type_check(struct perf_session *session __used)
1657{
1658 if (!(session->sample_type & PERF_SAMPLE_RAW)) {
1659 fprintf(stderr,
1660 "No trace sample to read. Did you call perf record "
1661 "without -R?");
1662 return -1;
1663 }
1664
1665 return 0;
1666}
1667
1668static struct perf_event_ops event_ops = { 1653static struct perf_event_ops event_ops = {
1669 .process_sample_event = process_sample_event, 1654 .sample = process_sample_event,
1670 .process_comm_event = event__process_comm, 1655 .comm = event__process_comm,
1671 .process_lost_event = process_lost_event, 1656 .lost = process_lost_event,
1672 .sample_type_check = sample_type_check,
1673}; 1657};
1674 1658
1675static int read_events(void) 1659static int read_events(void)
1676{ 1660{
1677 int err; 1661 int err = -EINVAL;
1678 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); 1662 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
1679 if (session == NULL) 1663 if (session == NULL)
1680 return -ENOMEM; 1664 return -ENOMEM;
1681 1665
1682 err = perf_session__process_events(session, &event_ops); 1666 if (perf_session__has_traces(session, "record -R"))
1667 err = perf_session__process_events(session, &event_ops);
1668
1683 perf_session__delete(session); 1669 perf_session__delete(session);
1684 return err; 1670 return err;
1685} 1671}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c70d72003557..e8c85d5aec41 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,6 +44,7 @@
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/debug.h" 46#include "util/debug.h"
47#include "util/header.h"
47 48
48#include <sys/prctl.h> 49#include <sys/prctl.h>
49#include <math.h> 50#include <math.h>
@@ -79,6 +80,8 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS];
79 80
80static int event_scaled[MAX_COUNTERS]; 81static int event_scaled[MAX_COUNTERS];
81 82
83static volatile int done = 0;
84
82struct stats 85struct stats
83{ 86{
84 double n, mean, M2; 87 double n, mean, M2;
@@ -247,61 +250,64 @@ static int run_perf_stat(int argc __used, const char **argv)
247 unsigned long long t0, t1; 250 unsigned long long t0, t1;
248 int status = 0; 251 int status = 0;
249 int counter; 252 int counter;
250 int pid; 253 int pid = target_pid;
251 int child_ready_pipe[2], go_pipe[2]; 254 int child_ready_pipe[2], go_pipe[2];
255 const bool forks = (target_pid == -1 && argc > 0);
252 char buf; 256 char buf;
253 257
254 if (!system_wide) 258 if (!system_wide)
255 nr_cpus = 1; 259 nr_cpus = 1;
256 260
257 if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { 261 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
258 perror("failed to create pipes"); 262 perror("failed to create pipes");
259 exit(1); 263 exit(1);
260 } 264 }
261 265
262 if ((pid = fork()) < 0) 266 if (forks) {
263 perror("failed to fork"); 267 if ((pid = fork()) < 0)
268 perror("failed to fork");
269
270 if (!pid) {
271 close(child_ready_pipe[0]);
272 close(go_pipe[1]);
273 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
274
275 /*
276 * Do a dummy execvp to get the PLT entry resolved,
277 * so we avoid the resolver overhead on the real
278 * execvp call.
279 */
280 execvp("", (char **)argv);
281
282 /*
283 * Tell the parent we're ready to go
284 */
285 close(child_ready_pipe[1]);
286
287 /*
288 * Wait until the parent tells us to go.
289 */
290 if (read(go_pipe[0], &buf, 1) == -1)
291 perror("unable to read pipe");
292
293 execvp(argv[0], (char **)argv);
294
295 perror(argv[0]);
296 exit(-1);
297 }
264 298
265 if (!pid) { 299 child_pid = pid;
266 close(child_ready_pipe[0]);
267 close(go_pipe[1]);
268 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
269 300
270 /* 301 /*
271 * Do a dummy execvp to get the PLT entry resolved, 302 * Wait for the child to be ready to exec.
272 * so we avoid the resolver overhead on the real
273 * execvp call.
274 */
275 execvp("", (char **)argv);
276
277 /*
278 * Tell the parent we're ready to go
279 */ 303 */
280 close(child_ready_pipe[1]); 304 close(child_ready_pipe[1]);
281 305 close(go_pipe[0]);
282 /* 306 if (read(child_ready_pipe[0], &buf, 1) == -1)
283 * Wait until the parent tells us to go.
284 */
285 if (read(go_pipe[0], &buf, 1) == -1)
286 perror("unable to read pipe"); 307 perror("unable to read pipe");
287 308 close(child_ready_pipe[0]);
288 execvp(argv[0], (char **)argv);
289
290 perror(argv[0]);
291 exit(-1);
292 } 309 }
293 310
294 child_pid = pid;
295
296 /*
297 * Wait for the child to be ready to exec.
298 */
299 close(child_ready_pipe[1]);
300 close(go_pipe[0]);
301 if (read(child_ready_pipe[0], &buf, 1) == -1)
302 perror("unable to read pipe");
303 close(child_ready_pipe[0]);
304
305 for (counter = 0; counter < nr_counters; counter++) 311 for (counter = 0; counter < nr_counters; counter++)
306 create_perf_stat_counter(counter, pid); 312 create_perf_stat_counter(counter, pid);
307 313
@@ -310,8 +316,12 @@ static int run_perf_stat(int argc __used, const char **argv)
310 */ 316 */
311 t0 = rdclock(); 317 t0 = rdclock();
312 318
313 close(go_pipe[1]); 319 if (forks) {
314 wait(&status); 320 close(go_pipe[1]);
321 wait(&status);
322 } else {
323 while(!done);
324 }
315 325
316 t1 = rdclock(); 326 t1 = rdclock();
317 327
@@ -417,10 +427,13 @@ static void print_stat(int argc, const char **argv)
417 fflush(stdout); 427 fflush(stdout);
418 428
419 fprintf(stderr, "\n"); 429 fprintf(stderr, "\n");
420 fprintf(stderr, " Performance counter stats for \'%s", argv[0]); 430 fprintf(stderr, " Performance counter stats for ");
421 431 if(target_pid == -1) {
422 for (i = 1; i < argc; i++) 432 fprintf(stderr, "\'%s", argv[0]);
423 fprintf(stderr, " %s", argv[i]); 433 for (i = 1; i < argc; i++)
434 fprintf(stderr, " %s", argv[i]);
435 }else
436 fprintf(stderr, "task pid \'%d", target_pid);
424 437
425 fprintf(stderr, "\'"); 438 fprintf(stderr, "\'");
426 if (run_count > 1) 439 if (run_count > 1)
@@ -445,6 +458,9 @@ static volatile int signr = -1;
445 458
446static void skip_signal(int signo) 459static void skip_signal(int signo)
447{ 460{
461 if(target_pid != -1)
462 done = 1;
463
448 signr = signo; 464 signr = signo;
449} 465}
450 466
@@ -461,7 +477,7 @@ static void sig_atexit(void)
461} 477}
462 478
463static const char * const stat_usage[] = { 479static const char * const stat_usage[] = {
464 "perf stat [<options>] <command>", 480 "perf stat [<options>] [<command>]",
465 NULL 481 NULL
466}; 482};
467 483
@@ -492,7 +508,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
492 508
493 argc = parse_options(argc, argv, options, stat_usage, 509 argc = parse_options(argc, argv, options, stat_usage,
494 PARSE_OPT_STOP_AT_NON_OPTION); 510 PARSE_OPT_STOP_AT_NON_OPTION);
495 if (!argc) 511 if (!argc && target_pid == -1)
496 usage_with_options(stat_usage, options); 512 usage_with_options(stat_usage, options);
497 if (run_count <= 0) 513 if (run_count <= 0)
498 usage_with_options(stat_usage, options); 514 usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3f8bbcfb1e9b..0d4d8ff7914b 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1029,33 +1029,24 @@ static void process_samples(struct perf_session *session)
1029 } 1029 }
1030} 1030}
1031 1031
1032static int sample_type_check(struct perf_session *session)
1033{
1034 if (!(session->sample_type & PERF_SAMPLE_RAW)) {
1035 fprintf(stderr, "No trace samples found in the file.\n"
1036 "Have you used 'perf timechart record' to record it?\n");
1037 return -1;
1038 }
1039
1040 return 0;
1041}
1042
1043static struct perf_event_ops event_ops = { 1032static struct perf_event_ops event_ops = {
1044 .process_comm_event = process_comm_event, 1033 .comm = process_comm_event,
1045 .process_fork_event = process_fork_event, 1034 .fork = process_fork_event,
1046 .process_exit_event = process_exit_event, 1035 .exit = process_exit_event,
1047 .process_sample_event = queue_sample_event, 1036 .sample = queue_sample_event,
1048 .sample_type_check = sample_type_check,
1049}; 1037};
1050 1038
1051static int __cmd_timechart(void) 1039static int __cmd_timechart(void)
1052{ 1040{
1053 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0); 1041 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
1054 int ret; 1042 int ret = -EINVAL;
1055 1043
1056 if (session == NULL) 1044 if (session == NULL)
1057 return -ENOMEM; 1045 return -ENOMEM;
1058 1046
1047 if (!perf_session__has_traces(session, "timechart record"))
1048 goto out_delete;
1049
1059 ret = perf_session__process_events(session, &event_ops); 1050 ret = perf_session__process_events(session, &event_ops);
1060 if (ret) 1051 if (ret)
1061 goto out_delete; 1052 goto out_delete;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4b91d8cf00ec..31f2e597800c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -94,6 +94,7 @@ struct source_line {
94 94
95static char *sym_filter = NULL; 95static char *sym_filter = NULL;
96struct sym_entry *sym_filter_entry = NULL; 96struct sym_entry *sym_filter_entry = NULL;
97struct sym_entry *sym_filter_entry_sched = NULL;
97static int sym_pcnt_filter = 5; 98static int sym_pcnt_filter = 5;
98static int sym_counter = 0; 99static int sym_counter = 0;
99static int display_weighted = -1; 100static int display_weighted = -1;
@@ -201,10 +202,9 @@ static void parse_source(struct sym_entry *syme)
201 len = sym->end - sym->start; 202 len = sym->end - sym->start;
202 203
203 sprintf(command, 204 sprintf(command,
204 "objdump --start-address=0x%016Lx " 205 "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s",
205 "--stop-address=0x%016Lx -dS %s", 206 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
206 map->unmap_ip(map, sym->start), 207 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
207 map->unmap_ip(map, sym->end), path);
208 208
209 file = popen(command, "r"); 209 file = popen(command, "r");
210 if (!file) 210 if (!file)
@@ -215,7 +215,7 @@ static void parse_source(struct sym_entry *syme)
215 while (!feof(file)) { 215 while (!feof(file)) {
216 struct source_line *src; 216 struct source_line *src;
217 size_t dummy = 0; 217 size_t dummy = 0;
218 char *c; 218 char *c, *sep;
219 219
220 src = malloc(sizeof(struct source_line)); 220 src = malloc(sizeof(struct source_line));
221 assert(src != NULL); 221 assert(src != NULL);
@@ -234,14 +234,11 @@ static void parse_source(struct sym_entry *syme)
234 *source->lines_tail = src; 234 *source->lines_tail = src;
235 source->lines_tail = &src->next; 235 source->lines_tail = &src->next;
236 236
237 if (strlen(src->line)>8 && src->line[8] == ':') { 237 src->eip = strtoull(src->line, &sep, 16);
238 src->eip = strtoull(src->line, NULL, 16); 238 if (*sep == ':')
239 src->eip = map->unmap_ip(map, src->eip); 239 src->eip = map__objdump_2ip(map, src->eip);
240 } 240 else /* this line has no ip info (e.g. source line) */
241 if (strlen(src->line)>8 && src->line[16] == ':') { 241 src->eip = 0;
242 src->eip = strtoull(src->line, NULL, 16);
243 src->eip = map->unmap_ip(map, src->eip);
244 }
245 } 242 }
246 pclose(file); 243 pclose(file);
247out_assign: 244out_assign:
@@ -276,6 +273,9 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
276 goto out_unlock; 273 goto out_unlock;
277 274
278 for (line = syme->src->lines; line; line = line->next) { 275 for (line = syme->src->lines; line; line = line->next) {
276 /* skip lines without IP info */
277 if (line->eip == 0)
278 continue;
279 if (line->eip == ip) { 279 if (line->eip == ip) {
280 line->count[counter]++; 280 line->count[counter]++;
281 break; 281 break;
@@ -287,17 +287,20 @@ out_unlock:
287 pthread_mutex_unlock(&syme->src->lock); 287 pthread_mutex_unlock(&syme->src->lock);
288} 288}
289 289
290#define PATTERN_LEN (BITS_PER_LONG / 4 + 2)
291
290static void lookup_sym_source(struct sym_entry *syme) 292static void lookup_sym_source(struct sym_entry *syme)
291{ 293{
292 struct symbol *symbol = sym_entry__symbol(syme); 294 struct symbol *symbol = sym_entry__symbol(syme);
293 struct source_line *line; 295 struct source_line *line;
294 char pattern[PATH_MAX]; 296 char pattern[PATTERN_LEN + 1];
295 297
296 sprintf(pattern, "<%s>:", symbol->name); 298 sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4,
299 map__rip_2objdump(syme->map, symbol->start));
297 300
298 pthread_mutex_lock(&syme->src->lock); 301 pthread_mutex_lock(&syme->src->lock);
299 for (line = syme->src->lines; line; line = line->next) { 302 for (line = syme->src->lines; line; line = line->next) {
300 if (strstr(line->line, pattern)) { 303 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
301 syme->src->source = line; 304 syme->src->source = line;
302 break; 305 break;
303 } 306 }
@@ -667,7 +670,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
667 } 670 }
668 671
669 if (!found) { 672 if (!found) {
670 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); 673 fprintf(stderr, "Sorry, %s is not active.\n", buf);
671 sleep(1); 674 sleep(1);
672 return; 675 return;
673 } else 676 } else
@@ -695,11 +698,9 @@ static void print_mapped_keys(void)
695 698
696 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 699 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
697 700
698 if (symbol_conf.vmlinux_name) { 701 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
699 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 702 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
700 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 703 fprintf(stdout, "\t[S] stop annotation.\n");
701 fprintf(stdout, "\t[S] stop annotation.\n");
702 }
703 704
704 if (nr_counters > 1) 705 if (nr_counters > 1)
705 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 706 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
@@ -725,14 +726,13 @@ static int key_mapped(int c)
725 case 'Q': 726 case 'Q':
726 case 'K': 727 case 'K':
727 case 'U': 728 case 'U':
729 case 'F':
730 case 's':
731 case 'S':
728 return 1; 732 return 1;
729 case 'E': 733 case 'E':
730 case 'w': 734 case 'w':
731 return nr_counters > 1 ? 1 : 0; 735 return nr_counters > 1 ? 1 : 0;
732 case 'F':
733 case 's':
734 case 'S':
735 return symbol_conf.vmlinux_name ? 1 : 0;
736 default: 736 default:
737 break; 737 break;
738 } 738 }
@@ -910,8 +910,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
910 syme = symbol__priv(sym); 910 syme = symbol__priv(sym);
911 syme->map = map; 911 syme->map = map;
912 syme->src = NULL; 912 syme->src = NULL;
913 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 913
914 sym_filter_entry = syme; 914 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
915 /* schedule initial sym_filter_entry setup */
916 sym_filter_entry_sched = syme;
917 sym_filter = NULL;
918 }
915 919
916 for (i = 0; skip_symbols[i]; i++) { 920 for (i = 0; skip_symbols[i]; i++) {
917 if (!strcmp(skip_symbols[i], name)) { 921 if (!strcmp(skip_symbols[i], name)) {
@@ -934,8 +938,11 @@ static void event__process_sample(const event_t *self,
934 struct addr_location al; 938 struct addr_location al;
935 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 939 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
936 940
941 ++samples;
942
937 switch (origin) { 943 switch (origin) {
938 case PERF_RECORD_MISC_USER: 944 case PERF_RECORD_MISC_USER:
945 ++userspace_samples;
939 if (hide_user_symbols) 946 if (hide_user_symbols)
940 return; 947 return;
941 break; 948 break;
@@ -948,9 +955,38 @@ static void event__process_sample(const event_t *self,
948 } 955 }
949 956
950 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 957 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
951 al.sym == NULL || al.filtered) 958 al.filtered)
952 return; 959 return;
953 960
961 if (al.sym == NULL) {
962 /*
963 * As we do lazy loading of symtabs we only will know if the
964 * specified vmlinux file is invalid when we actually have a
965 * hit in kernel space and then try to load it. So if we get
966 * here and there are _no_ symbols in the DSO backing the
967 * kernel map, bail out.
968 *
969 * We may never get here, for instance, if we use -K/
970 * --hide-kernel-symbols, even if the user specifies an
971 * invalid --vmlinux ;-)
972 */
973 if (al.map == session->vmlinux_maps[MAP__FUNCTION] &&
974 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
975 pr_err("The %s file can't be used\n",
976 symbol_conf.vmlinux_name);
977 exit(1);
978 }
979
980 return;
981 }
982
983 /* let's see, whether we need to install initial sym_filter_entry */
984 if (sym_filter_entry_sched) {
985 sym_filter_entry = sym_filter_entry_sched;
986 sym_filter_entry_sched = NULL;
987 parse_source(sym_filter_entry);
988 }
989
954 syme = symbol__priv(al.sym); 990 syme = symbol__priv(al.sym);
955 if (!syme->skip) { 991 if (!syme->skip) {
956 syme->count[counter]++; 992 syme->count[counter]++;
@@ -960,9 +996,6 @@ static void event__process_sample(const event_t *self,
960 if (list_empty(&syme->node) || !syme->node.next) 996 if (list_empty(&syme->node) || !syme->node.next)
961 __list_insert_active_sym(syme); 997 __list_insert_active_sym(syme);
962 pthread_mutex_unlock(&active_symbols_lock); 998 pthread_mutex_unlock(&active_symbols_lock);
963 if (origin == PERF_RECORD_MISC_USER)
964 ++userspace_samples;
965 ++samples;
966 } 999 }
967} 1000}
968 1001
@@ -975,6 +1008,10 @@ static int event__process(event_t *event, struct perf_session *session)
975 case PERF_RECORD_MMAP: 1008 case PERF_RECORD_MMAP:
976 event__process_mmap(event, session); 1009 event__process_mmap(event, session);
977 break; 1010 break;
1011 case PERF_RECORD_FORK:
1012 case PERF_RECORD_EXIT:
1013 event__process_task(event, session);
1014 break;
978 default: 1015 default:
979 break; 1016 break;
980 } 1017 }
@@ -1244,7 +1281,7 @@ static const struct option options[] = {
1244 OPT_BOOLEAN('i', "inherit", &inherit, 1281 OPT_BOOLEAN('i', "inherit", &inherit,
1245 "child tasks inherit counters"), 1282 "child tasks inherit counters"),
1246 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1283 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1247 "symbol to annotate - requires -k option"), 1284 "symbol to annotate"),
1248 OPT_BOOLEAN('z', "zero", &zero, 1285 OPT_BOOLEAN('z', "zero", &zero,
1249 "zero history across updates"), 1286 "zero history across updates"),
1250 OPT_INTEGER('F', "freq", &freq, 1287 OPT_INTEGER('F', "freq", &freq,
@@ -1280,16 +1317,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1280 1317
1281 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1318 symbol_conf.priv_size = (sizeof(struct sym_entry) +
1282 (nr_counters + 1) * sizeof(unsigned long)); 1319 (nr_counters + 1) * sizeof(unsigned long));
1283 if (symbol_conf.vmlinux_name == NULL) 1320
1284 symbol_conf.try_vmlinux_path = true; 1321 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1285 if (symbol__init() < 0) 1322 if (symbol__init() < 0)
1286 return -1; 1323 return -1;
1287 1324
1288 if (delay_secs < 1) 1325 if (delay_secs < 1)
1289 delay_secs = 1; 1326 delay_secs = 1;
1290 1327
1291 parse_source(sym_filter_entry);
1292
1293 /* 1328 /*
1294 * User specified count overrides default frequency. 1329 * User specified count overrides default frequency.
1295 */ 1330 */
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 574a215e800b..5db687fc13de 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -44,6 +44,7 @@ static void setup_scripting(void)
44 perf_set_argv_exec_path(perf_exec_path()); 44 perf_set_argv_exec_path(perf_exec_path());
45 45
46 setup_perl_scripting(); 46 setup_perl_scripting();
47 setup_python_scripting();
47 48
48 scripting_ops = &default_scripting_ops; 49 scripting_ops = &default_scripting_ops;
49} 50}
@@ -75,11 +76,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
75 76
76 event__parse_sample(event, session->sample_type, &data); 77 event__parse_sample(event, session->sample_type, &data);
77 78
78 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 79 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
79 event->header.misc, 80 data.pid, data.tid, data.ip, data.period);
80 data.pid, data.tid,
81 (void *)(long)data.ip,
82 (long long)data.period);
83 81
84 thread = perf_session__findnew(session, event->ip.pid); 82 thread = perf_session__findnew(session, event->ip.pid);
85 if (thread == NULL) { 83 if (thread == NULL) {
@@ -103,22 +101,9 @@ static int process_sample_event(event_t *event, struct perf_session *session)
103 return 0; 101 return 0;
104} 102}
105 103
106static int sample_type_check(struct perf_session *session)
107{
108 if (!(session->sample_type & PERF_SAMPLE_RAW)) {
109 fprintf(stderr,
110 "No trace sample to read. Did you call perf record "
111 "without -R?");
112 return -1;
113 }
114
115 return 0;
116}
117
118static struct perf_event_ops event_ops = { 104static struct perf_event_ops event_ops = {
119 .process_sample_event = process_sample_event, 105 .sample = process_sample_event,
120 .process_comm_event = event__process_comm, 106 .comm = event__process_comm,
121 .sample_type_check = sample_type_check,
122}; 107};
123 108
124static int __cmd_trace(struct perf_session *session) 109static int __cmd_trace(struct perf_session *session)
@@ -235,9 +220,9 @@ static int parse_scriptname(const struct option *opt __used,
235 const char *script, *ext; 220 const char *script, *ext;
236 int len; 221 int len;
237 222
238 if (strcmp(str, "list") == 0) { 223 if (strcmp(str, "lang") == 0) {
239 list_available_languages(); 224 list_available_languages();
240 return 0; 225 exit(0);
241 } 226 }
242 227
243 script = strchr(str, ':'); 228 script = strchr(str, ':');
@@ -531,6 +516,8 @@ static const struct option options[] = {
531 parse_scriptname), 516 parse_scriptname),
532 OPT_STRING('g', "gen-script", &generate_script_lang, "lang", 517 OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
533 "generate perf-trace.xx script in specified language"), 518 "generate perf-trace.xx script in specified language"),
519 OPT_STRING('i', "input", &input_name, "file",
520 "input file name"),
534 521
535 OPT_END() 522 OPT_END()
536}; 523};
@@ -592,6 +579,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
592 if (session == NULL) 579 if (session == NULL)
593 return -ENOMEM; 580 return -ENOMEM;
594 581
582 if (!perf_session__has_traces(session, "record -R"))
583 return -EINVAL;
584
595 if (generate_script_lang) { 585 if (generate_script_lang) {
596 struct stat perf_stat; 586 struct stat perf_stat;
597 587
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 18035b1f16c7..10fe49e7048a 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -16,6 +16,7 @@ extern int check_pager_config(const char *cmd);
16 16
17extern int cmd_annotate(int argc, const char **argv, const char *prefix); 17extern int cmd_annotate(int argc, const char **argv, const char *prefix);
18extern int cmd_bench(int argc, const char **argv, const char *prefix); 18extern int cmd_bench(int argc, const char **argv, const char *prefix);
19extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
19extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); 20extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
20extern int cmd_diff(int argc, const char **argv, const char *prefix); 21extern int cmd_diff(int argc, const char **argv, const char *prefix);
21extern int cmd_help(int argc, const char **argv, const char *prefix); 22extern int cmd_help(int argc, const char **argv, const char *prefix);
@@ -30,5 +31,6 @@ extern int cmd_trace(int argc, const char **argv, const char *prefix);
30extern int cmd_version(int argc, const char **argv, const char *prefix); 31extern int cmd_version(int argc, const char **argv, const char *prefix);
31extern int cmd_probe(int argc, const char **argv, const char *prefix); 32extern int cmd_probe(int argc, const char **argv, const char *prefix);
32extern int cmd_kmem(int argc, const char **argv, const char *prefix); 33extern int cmd_kmem(int argc, const char **argv, const char *prefix);
34extern int cmd_lock(int argc, const char **argv, const char *prefix);
33 35
34#endif 36#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 71dc7c3fe7b2..9afcff2e3ae5 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,7 +3,9 @@
3# command name category [deprecated] [common] 3# command name category [deprecated] [common]
4# 4#
5perf-annotate mainporcelain common 5perf-annotate mainporcelain common
6perf-archive mainporcelain common
6perf-bench mainporcelain common 7perf-bench mainporcelain common
8perf-buildid-cache mainporcelain common
7perf-buildid-list mainporcelain common 9perf-buildid-list mainporcelain common
8perf-diff mainporcelain common 10perf-diff mainporcelain common
9perf-list mainporcelain common 11perf-list mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 8d0de5130db3..bd0bb1b1279b 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -101,10 +101,10 @@ enum hw_event_ids {
101 */ 101 */
102 PERF_COUNT_HW_CPU_CYCLES = 0, 102 PERF_COUNT_HW_CPU_CYCLES = 0,
103 PERF_COUNT_HW_INSTRUCTIONS = 1, 103 PERF_COUNT_HW_INSTRUCTIONS = 1,
104 PERF_COUNT_HW_CACHE_REFERENCES = 2, 104 PERF_COUNT_HW_CACHE_REFERENCES = 2,
105 PERF_COUNT_HW_CACHE_MISSES = 3, 105 PERF_COUNT_HW_CACHE_MISSES = 3,
106 PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, 106 PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
107 PERF_COUNT_HW_BRANCH_MISSES = 5, 107 PERF_COUNT_HW_BRANCH_MISSES = 5,
108 PERF_COUNT_HW_BUS_CYCLES = 6, 108 PERF_COUNT_HW_BUS_CYCLES = 6,
109}; 109};
110 110
@@ -131,8 +131,8 @@ software events, selected by 'event_id':
131 */ 131 */
132enum sw_event_ids { 132enum sw_event_ids {
133 PERF_COUNT_SW_CPU_CLOCK = 0, 133 PERF_COUNT_SW_CPU_CLOCK = 0,
134 PERF_COUNT_SW_TASK_CLOCK = 1, 134 PERF_COUNT_SW_TASK_CLOCK = 1,
135 PERF_COUNT_SW_PAGE_FAULTS = 2, 135 PERF_COUNT_SW_PAGE_FAULTS = 2,
136 PERF_COUNT_SW_CONTEXT_SWITCHES = 3, 136 PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
137 PERF_COUNT_SW_CPU_MIGRATIONS = 4, 137 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
138 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, 138 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
new file mode 100644
index 000000000000..45fbe2f07b15
--- /dev/null
+++ b/tools/perf/perf-archive.sh
@@ -0,0 +1,32 @@
1#!/bin/bash
2# perf archive
3# Arnaldo Carvalho de Melo <acme@redhat.com>
4
5PERF_DATA=perf.data
6if [ $# -ne 0 ] ; then
7 PERF_DATA=$1
8fi
9
10DEBUGDIR=~/.debug/
11BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
12
13perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS
14if [ ! -s $BUILDIDS ] ; then
15 echo "perf archive: no build-ids found"
16 rm -f $BUILDIDS
17 exit 1
18fi
19
20MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
21
22cut -d ' ' -f 1 $BUILDIDS | \
23while read build_id ; do
24 linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2}
25 filename=$(readlink -f $linkname)
26 echo ${linkname#$DEBUGDIR} >> $MANIFEST
27 echo ${filename#$DEBUGDIR} >> $MANIFEST
28done
29
30tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST
31rm -f $MANIFEST $BUILDIDS
32exit 0
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 873e55fab375..57cb107c1f13 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -48,7 +48,8 @@ int check_pager_config(const char *cmd)
48 return c.val; 48 return c.val;
49} 49}
50 50
51static void commit_pager_choice(void) { 51static void commit_pager_choice(void)
52{
52 switch (use_pager) { 53 switch (use_pager) {
53 case 0: 54 case 0:
54 setenv("PERF_PAGER", "cat", 1); 55 setenv("PERF_PAGER", "cat", 1);
@@ -70,7 +71,7 @@ static void set_debugfs_path(void)
70 "tracing/events"); 71 "tracing/events");
71} 72}
72 73
73static int handle_options(const char*** argv, int* argc, int* envchanged) 74static int handle_options(const char ***argv, int *argc, int *envchanged)
74{ 75{
75 int handled = 0; 76 int handled = 0;
76 77
@@ -109,7 +110,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
109 *envchanged = 1; 110 *envchanged = 1;
110 } else if (!strcmp(cmd, "--perf-dir")) { 111 } else if (!strcmp(cmd, "--perf-dir")) {
111 if (*argc < 2) { 112 if (*argc < 2) {
112 fprintf(stderr, "No directory given for --perf-dir.\n" ); 113 fprintf(stderr, "No directory given for --perf-dir.\n");
113 usage(perf_usage_string); 114 usage(perf_usage_string);
114 } 115 }
115 setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); 116 setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
@@ -124,7 +125,7 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
124 *envchanged = 1; 125 *envchanged = 1;
125 } else if (!strcmp(cmd, "--work-tree")) { 126 } else if (!strcmp(cmd, "--work-tree")) {
126 if (*argc < 2) { 127 if (*argc < 2) {
127 fprintf(stderr, "No directory given for --work-tree.\n" ); 128 fprintf(stderr, "No directory given for --work-tree.\n");
128 usage(perf_usage_string); 129 usage(perf_usage_string);
129 } 130 }
130 setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); 131 setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
@@ -168,7 +169,7 @@ static int handle_alias(int *argcp, const char ***argv)
168{ 169{
169 int envchanged = 0, ret = 0, saved_errno = errno; 170 int envchanged = 0, ret = 0, saved_errno = errno;
170 int count, option_count; 171 int count, option_count;
171 const char** new_argv; 172 const char **new_argv;
172 const char *alias_command; 173 const char *alias_command;
173 char *alias_string; 174 char *alias_string;
174 175
@@ -210,11 +211,11 @@ static int handle_alias(int *argcp, const char ***argv)
210 if (!strcmp(alias_command, new_argv[0])) 211 if (!strcmp(alias_command, new_argv[0]))
211 die("recursive alias: %s", alias_command); 212 die("recursive alias: %s", alias_command);
212 213
213 new_argv = realloc(new_argv, sizeof(char*) * 214 new_argv = realloc(new_argv, sizeof(char *) *
214 (count + *argcp + 1)); 215 (count + *argcp + 1));
215 /* insert after command name */ 216 /* insert after command name */
216 memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); 217 memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp);
217 new_argv[count+*argcp] = NULL; 218 new_argv[count + *argcp] = NULL;
218 219
219 *argv = new_argv; 220 *argv = new_argv;
220 *argcp += count - 1; 221 *argcp += count - 1;
@@ -285,6 +286,7 @@ static void handle_internal_command(int argc, const char **argv)
285{ 286{
286 const char *cmd = argv[0]; 287 const char *cmd = argv[0];
287 static struct cmd_struct commands[] = { 288 static struct cmd_struct commands[] = {
289 { "buildid-cache", cmd_buildid_cache, 0 },
288 { "buildid-list", cmd_buildid_list, 0 }, 290 { "buildid-list", cmd_buildid_list, 0 },
289 { "diff", cmd_diff, 0 }, 291 { "diff", cmd_diff, 0 },
290 { "help", cmd_help, 0 }, 292 { "help", cmd_help, 0 },
@@ -301,6 +303,7 @@ static void handle_internal_command(int argc, const char **argv)
301 { "sched", cmd_sched, 0 }, 303 { "sched", cmd_sched, 0 },
302 { "probe", cmd_probe, 0 }, 304 { "probe", cmd_probe, 0 },
303 { "kmem", cmd_kmem, 0 }, 305 { "kmem", cmd_kmem, 0 },
306 { "lock", cmd_lock, 0 },
304 }; 307 };
305 unsigned int i; 308 unsigned int i;
306 static const char ext[] = STRIP_EXTENSION; 309 static const char ext[] = STRIP_EXTENSION;
@@ -388,7 +391,7 @@ static int run_argv(int *argcp, const char ***argv)
388/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ 391/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
389static void get_debugfs_mntpt(void) 392static void get_debugfs_mntpt(void)
390{ 393{
391 const char *path = debugfs_find_mountpoint(); 394 const char *path = debugfs_mount(NULL);
392 395
393 if (path) 396 if (path)
394 strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); 397 strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
@@ -449,8 +452,8 @@ int main(int argc, const char **argv)
449 setup_path(); 452 setup_path();
450 453
451 while (1) { 454 while (1) {
452 static int done_help = 0; 455 static int done_help;
453 static int was_alias = 0; 456 static int was_alias;
454 457
455 was_alias = run_argv(&argc, &argv); 458 was_alias = run_argv(&argc, &argv);
456 if (errno != ENOENT) 459 if (errno != ENOENT)
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
index af78d9a52a7d..01a64ad693f2 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -31,13 +31,14 @@
31#include "EXTERN.h" 31#include "EXTERN.h"
32#include "perl.h" 32#include "perl.h"
33#include "XSUB.h" 33#include "XSUB.h"
34#include "../../../util/trace-event-perl.h" 34#include "../../../perf.h"
35#include "../../../util/trace-event.h"
35 36
36#ifndef PERL_UNUSED_VAR 37#ifndef PERL_UNUSED_VAR
37# define PERL_UNUSED_VAR(var) if (0) var = var 38# define PERL_UNUSED_VAR(var) if (0) var = var
38#endif 39#endif
39 40
40#line 41 "Context.c" 41#line 42 "Context.c"
41 42
42XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */ 43XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
43XS(XS_Perf__Trace__Context_common_pc) 44XS(XS_Perf__Trace__Context_common_pc)
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
index fb78006c165e..549cf0467d30 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -22,7 +22,8 @@
22#include "EXTERN.h" 22#include "EXTERN.h"
23#include "perl.h" 23#include "perl.h"
24#include "XSUB.h" 24#include "XSUB.h"
25#include "../../../util/trace-event-perl.h" 25#include "../../../perf.h"
26#include "../../../util/trace-event.h"
26 27
27MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context 28MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context
28PROTOTYPES: ENABLE 29PROTOTYPES: ENABLE
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index 052f132ced24..f869c48dc9b0 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -44,7 +44,7 @@ sub nsecs_secs {
44sub nsecs_nsecs { 44sub nsecs_nsecs {
45 my ($nsecs) = @_; 45 my ($nsecs) = @_;
46 46
47 return $nsecs - nsecs_secs($nsecs); 47 return $nsecs % $NSECS_PER_SEC;
48} 48}
49 49
50sub nsecs_str { 50sub nsecs_str {
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
index c7ec5de2f535..e6cb1474f8e8 100644
--- a/tools/perf/scripts/perl/bin/check-perf-trace-record
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -1,7 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry 2perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
3
4
5
6
7
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report
deleted file mode 100644
index 7fc4a033dd49..000000000000
--- a/tools/perf/scripts/perl/bin/check-perf-trace-report
+++ /dev/null
@@ -1,6 +0,0 @@
1#!/bin/bash
2# description: useless but exhaustive test script
3perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl
4
5
6
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record
new file mode 100644
index 000000000000..f8885d389e6f
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
new file mode 100644
index 000000000000..8bfc660e5056
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2# description: system-wide failed syscalls
3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $1
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
new file mode 100644
index 000000000000..c18e7e27a84b
--- /dev/null
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -0,0 +1,38 @@
1# failed system call counts
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Displays system-wide failed system call totals
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7
8use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
9use lib "./Perf-Trace-Util/lib";
10use Perf::Trace::Core;
11use Perf::Trace::Context;
12use Perf::Trace::Util;
13
14my %failed_syscalls;
15
16sub raw_syscalls::sys_exit
17{
18 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
19 $common_pid, $common_comm,
20 $id, $ret) = @_;
21
22 if ($ret < 0) {
23 $failed_syscalls{$common_comm}++;
24 }
25}
26
27sub trace_end
28{
29 printf("\nfailed syscalls by comm:\n\n");
30
31 printf("%-20s %10s\n", "comm", "# errors");
32 printf("%-20s %6s %10s\n", "--------------------", "----------");
33
34 foreach my $comm (sort {$failed_syscalls{$b} <=> $failed_syscalls{$a}}
35 keys %failed_syscalls) {
36 printf("%-20s %10s\n", $comm, $failed_syscalls{$comm});
37 }
38}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
new file mode 100644
index 000000000000..957085dd5d8d
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -0,0 +1,88 @@
1/*
2 * Context.c. Python interfaces for perf trace.
3 *
4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include <Python.h>
23#include "../../../perf.h"
24#include "../../../util/trace-event.h"
25
26PyMODINIT_FUNC initperf_trace_context(void);
27
28static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args)
29{
30 static struct scripting_context *scripting_context;
31 PyObject *context;
32 int retval;
33
34 if (!PyArg_ParseTuple(args, "O", &context))
35 return NULL;
36
37 scripting_context = PyCObject_AsVoidPtr(context);
38 retval = common_pc(scripting_context);
39
40 return Py_BuildValue("i", retval);
41}
42
43static PyObject *perf_trace_context_common_flags(PyObject *self,
44 PyObject *args)
45{
46 static struct scripting_context *scripting_context;
47 PyObject *context;
48 int retval;
49
50 if (!PyArg_ParseTuple(args, "O", &context))
51 return NULL;
52
53 scripting_context = PyCObject_AsVoidPtr(context);
54 retval = common_flags(scripting_context);
55
56 return Py_BuildValue("i", retval);
57}
58
59static PyObject *perf_trace_context_common_lock_depth(PyObject *self,
60 PyObject *args)
61{
62 static struct scripting_context *scripting_context;
63 PyObject *context;
64 int retval;
65
66 if (!PyArg_ParseTuple(args, "O", &context))
67 return NULL;
68
69 scripting_context = PyCObject_AsVoidPtr(context);
70 retval = common_lock_depth(scripting_context);
71
72 return Py_BuildValue("i", retval);
73}
74
75static PyMethodDef ContextMethods[] = {
76 { "common_pc", perf_trace_context_common_pc, METH_VARARGS,
77 "Get the common preempt count event field value."},
78 { "common_flags", perf_trace_context_common_flags, METH_VARARGS,
79 "Get the common flags event field value."},
80 { "common_lock_depth", perf_trace_context_common_lock_depth,
81 METH_VARARGS, "Get the common lock depth event field value."},
82 { NULL, NULL, 0, NULL}
83};
84
85PyMODINIT_FUNC initperf_trace_context(void)
86{
87 (void) Py_InitModule("perf_trace_context", ContextMethods);
88}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
new file mode 100644
index 000000000000..1dc464ee2ca8
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -0,0 +1,91 @@
1# Core.py - Python extension for perf trace, core functions
2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4#
5# This software may be distributed under the terms of the GNU General
6# Public License ("GPL") version 2 as published by the Free Software
7# Foundation.
8
9from collections import defaultdict
10
11def autodict():
12 return defaultdict(autodict)
13
14flag_fields = autodict()
15symbolic_fields = autodict()
16
17def define_flag_field(event_name, field_name, delim):
18 flag_fields[event_name][field_name]['delim'] = delim
19
20def define_flag_value(event_name, field_name, value, field_str):
21 flag_fields[event_name][field_name]['values'][value] = field_str
22
23def define_symbolic_field(event_name, field_name):
24 # nothing to do, really
25 pass
26
27def define_symbolic_value(event_name, field_name, value, field_str):
28 symbolic_fields[event_name][field_name]['values'][value] = field_str
29
30def flag_str(event_name, field_name, value):
31 string = ""
32
33 if flag_fields[event_name][field_name]:
34 print_delim = 0
35 keys = flag_fields[event_name][field_name]['values'].keys()
36 keys.sort()
37 for idx in keys:
38 if not value and not idx:
39 string += flag_fields[event_name][field_name]['values'][idx]
40 break
41 if idx and (value & idx) == idx:
42 if print_delim and flag_fields[event_name][field_name]['delim']:
43 string += " " + flag_fields[event_name][field_name]['delim'] + " "
44 string += flag_fields[event_name][field_name]['values'][idx]
45 print_delim = 1
46 value &= ~idx
47
48 return string
49
50def symbol_str(event_name, field_name, value):
51 string = ""
52
53 if symbolic_fields[event_name][field_name]:
54 keys = symbolic_fields[event_name][field_name]['values'].keys()
55 keys.sort()
56 for idx in keys:
57 if not value and not idx:
58 string = symbolic_fields[event_name][field_name]['values'][idx]
59 break
60 if (value == idx):
61 string = symbolic_fields[event_name][field_name]['values'][idx]
62 break
63
64 return string
65
66trace_flags = { 0x00: "NONE", \
67 0x01: "IRQS_OFF", \
68 0x02: "IRQS_NOSUPPORT", \
69 0x04: "NEED_RESCHED", \
70 0x08: "HARDIRQ", \
71 0x10: "SOFTIRQ" }
72
73def trace_flag_str(value):
74 string = ""
75 print_delim = 0
76
77 keys = trace_flags.keys()
78
79 for idx in keys:
80 if not value and not idx:
81 string += "NONE"
82 break
83
84 if idx and (value & idx) == idx:
85 if print_delim:
86 string += " | ";
87 string += trace_flags[idx]
88 print_delim = 1
89 value &= ~idx
90
91 return string
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
new file mode 100644
index 000000000000..83e91435ed09
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -0,0 +1,25 @@
1# Util.py - Python extension for perf trace, miscellaneous utility code
2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4#
5# This software may be distributed under the terms of the GNU General
6# Public License ("GPL") version 2 as published by the Free Software
7# Foundation.
8
9NSECS_PER_SEC = 1000000000
10
11def avg(total, n):
12 return total / n
13
14def nsecs(secs, nsecs):
15 return secs * NSECS_PER_SEC + nsecs
16
17def nsecs_secs(nsecs):
18 return nsecs / NSECS_PER_SEC
19
20def nsecs_nsecs(nsecs):
21 return nsecs % NSECS_PER_SEC
22
23def nsecs_str(nsecs):
24 str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
25 return str
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
new file mode 100644
index 000000000000..f8885d389e6f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
new file mode 100644
index 000000000000..1e0c0a860c87
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2# description: system-wide failed syscalls, by pid
3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $1
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
new file mode 100644
index 000000000000..45a8c50359da
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
new file mode 100644
index 000000000000..f8044d192271
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2# description: system-wide syscall counts, by pid
3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $1
diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record
new file mode 100644
index 000000000000..45a8c50359da
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
new file mode 100644
index 000000000000..a366aa61612f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2# description: system-wide syscall counts
3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py $1
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
new file mode 100644
index 000000000000..964d934395ff
--- /dev/null
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -0,0 +1,83 @@
1# perf trace event handlers, generated by perf trace -g python
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# This script tests basic functionality such as flag and symbol
6# strings, common_xxx() calls back into perf, begin, end, unhandled
7# events, etc. Basically, if this script runs successfully and
8# displays expected results, Python scripting support should be ok.
9
10import os
11import sys
12
13sys.path.append(os.environ['PERF_EXEC_PATH'] + \
14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
15
16from Core import *
17from perf_trace_context import *
18
19unhandled = autodict()
20
21def trace_begin():
22 print "trace_begin"
23 pass
24
25def trace_end():
26 print_unhandled()
27
28def irq__softirq_entry(event_name, context, common_cpu,
29 common_secs, common_nsecs, common_pid, common_comm,
30 vec):
31 print_header(event_name, common_cpu, common_secs, common_nsecs,
32 common_pid, common_comm)
33
34 print_uncommon(context)
35
36 print "vec=%s\n" % \
37 (symbol_str("irq__softirq_entry", "vec", vec)),
38
39def kmem__kmalloc(event_name, context, common_cpu,
40 common_secs, common_nsecs, common_pid, common_comm,
41 call_site, ptr, bytes_req, bytes_alloc,
42 gfp_flags):
43 print_header(event_name, common_cpu, common_secs, common_nsecs,
44 common_pid, common_comm)
45
46 print_uncommon(context)
47
48 print "call_site=%u, ptr=%u, bytes_req=%u, " \
49 "bytes_alloc=%u, gfp_flags=%s\n" % \
50 (call_site, ptr, bytes_req, bytes_alloc,
51
52 flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)),
53
54def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
55 common_pid, common_comm):
56 try:
57 unhandled[event_name] += 1
58 except TypeError:
59 unhandled[event_name] = 1
60
61def print_header(event_name, cpu, secs, nsecs, pid, comm):
62 print "%-20s %5u %05u.%09u %8u %-20s " % \
63 (event_name, cpu, secs, nsecs, pid, comm),
64
65# print trace fields not included in handler args
66def print_uncommon(context):
67 print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \
68 % (common_pc(context), trace_flag_str(common_flags(context)), \
69 common_lock_depth(context))
70
71def print_unhandled():
72 keys = unhandled.keys()
73 if not keys:
74 return
75
76 print "\nunhandled events:\n\n",
77
78 print "%-40s %10s\n" % ("event", "count"),
79 print "%-40s %10s\n" % ("----------------------------------------", \
80 "-----------"),
81
82 for event_name in keys:
83 print "%-40s %10d\n" % (event_name, unhandled[event_name])
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
new file mode 100644
index 000000000000..0ca02278fe69
--- /dev/null
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -0,0 +1,68 @@
1# failed system call counts, by pid
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Displays system-wide failed system call totals, broken down by pid.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7
8import os
9import sys
10
11sys.path.append(os.environ['PERF_EXEC_PATH'] + \
12 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
13
14from perf_trace_context import *
15from Core import *
16
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
18
19for_comm = None
20
21if len(sys.argv) > 2:
22 sys.exit(usage)
23
24if len(sys.argv) > 1:
25 for_comm = sys.argv[1]
26
27syscalls = autodict()
28
29def trace_begin():
30 pass
31
32def trace_end():
33 print_error_totals()
34
35def raw_syscalls__sys_exit(event_name, context, common_cpu,
36 common_secs, common_nsecs, common_pid, common_comm,
37 id, ret):
38 if for_comm is not None:
39 if common_comm != for_comm:
40 return
41
42 if ret < 0:
43 try:
44 syscalls[common_comm][common_pid][id][ret] += 1
45 except TypeError:
46 syscalls[common_comm][common_pid][id][ret] = 1
47
48def print_error_totals():
49 if for_comm is not None:
50 print "\nsyscall errors for %s:\n\n" % (for_comm),
51 else:
52 print "\nsyscall errors:\n\n",
53
54 print "%-30s %10s\n" % ("comm [pid]", "count"),
55 print "%-30s %10s\n" % ("------------------------------", \
56 "----------"),
57
58 comm_keys = syscalls.keys()
59 for comm in comm_keys:
60 pid_keys = syscalls[comm].keys()
61 for pid in pid_keys:
62 print "\n%s [%d]\n" % (comm, pid),
63 id_keys = syscalls[comm][pid].keys()
64 for id in id_keys:
65 print " syscall: %-16d\n" % (id),
66 ret_keys = syscalls[comm][pid][id].keys()
67 for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
68 print " err = %-20d %10d\n" % (ret, val),
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
new file mode 100644
index 000000000000..af722d6a4b3f
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -0,0 +1,64 @@
1# system call counts, by pid
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Displays system-wide system call totals, broken down by syscall.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7
8import os
9import sys
10
11sys.path.append(os.environ['PERF_EXEC_PATH'] + \
12 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
13
14from perf_trace_context import *
15from Core import *
16
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
18
19for_comm = None
20
21if len(sys.argv) > 2:
22 sys.exit(usage)
23
24if len(sys.argv) > 1:
25 for_comm = sys.argv[1]
26
27syscalls = autodict()
28
29def trace_begin():
30 pass
31
32def trace_end():
33 print_syscall_totals()
34
35def raw_syscalls__sys_enter(event_name, context, common_cpu,
36 common_secs, common_nsecs, common_pid, common_comm,
37 id, args):
38 if for_comm is not None:
39 if common_comm != for_comm:
40 return
41 try:
42 syscalls[common_comm][common_pid][id] += 1
43 except TypeError:
44 syscalls[common_comm][common_pid][id] = 1
45
46def print_syscall_totals():
47 if for_comm is not None:
48 print "\nsyscall events for %s:\n\n" % (for_comm),
49 else:
50 print "\nsyscall events by comm/pid:\n\n",
51
52 print "%-40s %10s\n" % ("comm [pid]/syscalls", "count"),
53 print "%-40s %10s\n" % ("----------------------------------------", \
54 "----------"),
55
56 comm_keys = syscalls.keys()
57 for comm in comm_keys:
58 pid_keys = syscalls[comm].keys()
59 for pid in pid_keys:
60 print "\n%s [%d]\n" % (comm, pid),
61 id_keys = syscalls[comm][pid].keys()
62 for id, val in sorted(syscalls[comm][pid].iteritems(), \
63 key = lambda(k, v): (v, k), reverse = True):
64 print " %-38d %10d\n" % (id, val),
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
new file mode 100644
index 000000000000..f977e85ff049
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -0,0 +1,58 @@
1# system call counts
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Displays system-wide system call totals, broken down by syscall.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7
8import os
9import sys
10
11sys.path.append(os.environ['PERF_EXEC_PATH'] + \
12 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
13
14from perf_trace_context import *
15from Core import *
16
17usage = "perf trace -s syscall-counts.py [comm]\n";
18
19for_comm = None
20
21if len(sys.argv) > 2:
22 sys.exit(usage)
23
24if len(sys.argv) > 1:
25 for_comm = sys.argv[1]
26
27syscalls = autodict()
28
29def trace_begin():
30 pass
31
32def trace_end():
33 print_syscall_totals()
34
35def raw_syscalls__sys_enter(event_name, context, common_cpu,
36 common_secs, common_nsecs, common_pid, common_comm,
37 id, args):
38 if for_comm is not None:
39 if common_comm != for_comm:
40 return
41 try:
42 syscalls[id] += 1
43 except TypeError:
44 syscalls[id] = 1
45
46def print_syscall_totals():
47 if for_comm is not None:
48 print "\nsyscall events for %s:\n\n" % (for_comm),
49 else:
50 print "\nsyscall events:\n\n",
51
52 print "%-40s %10s\n" % ("event", "count"),
53 print "%-40s %10s\n" % ("----------------------------------------", \
54 "-----------"),
55
56 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
57 reverse = True):
58 print "%-40d %10d\n" % (id, val),
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
new file mode 100644
index 000000000000..04904b35ba81
--- /dev/null
+++ b/tools/perf/util/build-id.c
@@ -0,0 +1,39 @@
1/*
2 * build-id.c
3 *
4 * build-id support
5 *
6 * Copyright (C) 2009, 2010 Red Hat Inc.
7 * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
8 */
9#include "build-id.h"
10#include "event.h"
11#include "symbol.h"
12#include <linux/kernel.h>
13
14static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
15{
16 struct addr_location al;
17 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
18 struct thread *thread = perf_session__findnew(session, event->ip.pid);
19
20 if (thread == NULL) {
21 pr_err("problem processing %d event, skipping it.\n",
22 event->header.type);
23 return -1;
24 }
25
26 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
27 event->ip.ip, &al);
28
29 if (al.map != NULL)
30 al.map->dso->hit = 1;
31
32 return 0;
33}
34
35struct perf_event_ops build_id__mark_dso_hit_ops = {
36 .sample = build_id__mark_dso_hit,
37 .mmap = event__process_mmap,
38 .fork = event__process_task,
39};
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
new file mode 100644
index 000000000000..1d981d63cf9a
--- /dev/null
+++ b/tools/perf/util/build-id.h
@@ -0,0 +1,8 @@
1#ifndef PERF_BUILD_ID_H_
2#define PERF_BUILD_ID_H_ 1
3
4#include "session.h"
5
6extern struct perf_event_ops build_id__mark_dso_hit_ops;
7
8#endif
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
deleted file mode 100644
index b557b836de3d..000000000000
--- a/tools/perf/util/data_map.c
+++ /dev/null
@@ -1,252 +0,0 @@
1#include "symbol.h"
2#include "util.h"
3#include "debug.h"
4#include "thread.h"
5#include "session.h"
6
7static int process_event_stub(event_t *event __used,
8 struct perf_session *session __used)
9{
10 dump_printf(": unhandled!\n");
11 return 0;
12}
13
14static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
15{
16 if (!handler->process_sample_event)
17 handler->process_sample_event = process_event_stub;
18 if (!handler->process_mmap_event)
19 handler->process_mmap_event = process_event_stub;
20 if (!handler->process_comm_event)
21 handler->process_comm_event = process_event_stub;
22 if (!handler->process_fork_event)
23 handler->process_fork_event = process_event_stub;
24 if (!handler->process_exit_event)
25 handler->process_exit_event = process_event_stub;
26 if (!handler->process_lost_event)
27 handler->process_lost_event = process_event_stub;
28 if (!handler->process_read_event)
29 handler->process_read_event = process_event_stub;
30 if (!handler->process_throttle_event)
31 handler->process_throttle_event = process_event_stub;
32 if (!handler->process_unthrottle_event)
33 handler->process_unthrottle_event = process_event_stub;
34}
35
36static const char *event__name[] = {
37 [0] = "TOTAL",
38 [PERF_RECORD_MMAP] = "MMAP",
39 [PERF_RECORD_LOST] = "LOST",
40 [PERF_RECORD_COMM] = "COMM",
41 [PERF_RECORD_EXIT] = "EXIT",
42 [PERF_RECORD_THROTTLE] = "THROTTLE",
43 [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
44 [PERF_RECORD_FORK] = "FORK",
45 [PERF_RECORD_READ] = "READ",
46 [PERF_RECORD_SAMPLE] = "SAMPLE",
47};
48
49unsigned long event__total[PERF_RECORD_MAX];
50
51void event__print_totals(void)
52{
53 int i;
54 for (i = 0; i < PERF_RECORD_MAX; ++i)
55 pr_info("%10s events: %10ld\n",
56 event__name[i], event__total[i]);
57}
58
59static int process_event(event_t *event, struct perf_session *session,
60 struct perf_event_ops *ops,
61 unsigned long offset, unsigned long head)
62{
63 trace_event(event);
64
65 if (event->header.type < PERF_RECORD_MAX) {
66 dump_printf("%p [%p]: PERF_RECORD_%s",
67 (void *)(offset + head),
68 (void *)(long)(event->header.size),
69 event__name[event->header.type]);
70 ++event__total[0];
71 ++event__total[event->header.type];
72 }
73
74 switch (event->header.type) {
75 case PERF_RECORD_SAMPLE:
76 return ops->process_sample_event(event, session);
77 case PERF_RECORD_MMAP:
78 return ops->process_mmap_event(event, session);
79 case PERF_RECORD_COMM:
80 return ops->process_comm_event(event, session);
81 case PERF_RECORD_FORK:
82 return ops->process_fork_event(event, session);
83 case PERF_RECORD_EXIT:
84 return ops->process_exit_event(event, session);
85 case PERF_RECORD_LOST:
86 return ops->process_lost_event(event, session);
87 case PERF_RECORD_READ:
88 return ops->process_read_event(event, session);
89 case PERF_RECORD_THROTTLE:
90 return ops->process_throttle_event(event, session);
91 case PERF_RECORD_UNTHROTTLE:
92 return ops->process_unthrottle_event(event, session);
93 default:
94 ops->total_unknown++;
95 return -1;
96 }
97}
98
99int perf_header__read_build_ids(int input, u64 offset, u64 size)
100{
101 struct build_id_event bev;
102 char filename[PATH_MAX];
103 u64 limit = offset + size;
104 int err = -1;
105
106 while (offset < limit) {
107 struct dso *dso;
108 ssize_t len;
109
110 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
111 goto out;
112
113 len = bev.header.size - sizeof(bev);
114 if (read(input, filename, len) != len)
115 goto out;
116
117 dso = dsos__findnew(filename);
118 if (dso != NULL)
119 dso__set_build_id(dso, &bev.build_id);
120
121 offset += bev.header.size;
122 }
123 err = 0;
124out:
125 return err;
126}
127
128static struct thread *perf_session__register_idle_thread(struct perf_session *self)
129{
130 struct thread *thread = perf_session__findnew(self, 0);
131
132 if (!thread || thread__set_comm(thread, "swapper")) {
133 pr_err("problem inserting idle task.\n");
134 thread = NULL;
135 }
136
137 return thread;
138}
139
140int perf_session__process_events(struct perf_session *self,
141 struct perf_event_ops *ops)
142{
143 int err;
144 unsigned long head, shift;
145 unsigned long offset = 0;
146 size_t page_size;
147 event_t *event;
148 uint32_t size;
149 char *buf;
150
151 if (perf_session__register_idle_thread(self) == NULL)
152 return -ENOMEM;
153
154 perf_event_ops__fill_defaults(ops);
155
156 page_size = getpagesize();
157
158 head = self->header.data_offset;
159 self->sample_type = perf_header__sample_type(&self->header);
160
161 err = -EINVAL;
162 if (ops->sample_type_check && ops->sample_type_check(self) < 0)
163 goto out_err;
164
165 if (!ops->full_paths) {
166 char bf[PATH_MAX];
167
168 if (getcwd(bf, sizeof(bf)) == NULL) {
169 err = -errno;
170out_getcwd_err:
171 pr_err("failed to get the current directory\n");
172 goto out_err;
173 }
174 self->cwd = strdup(bf);
175 if (self->cwd == NULL) {
176 err = -ENOMEM;
177 goto out_getcwd_err;
178 }
179 self->cwdlen = strlen(self->cwd);
180 }
181
182 shift = page_size * (head / page_size);
183 offset += shift;
184 head -= shift;
185
186remap:
187 buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
188 MAP_SHARED, self->fd, offset);
189 if (buf == MAP_FAILED) {
190 pr_err("failed to mmap file\n");
191 err = -errno;
192 goto out_err;
193 }
194
195more:
196 event = (event_t *)(buf + head);
197
198 size = event->header.size;
199 if (!size)
200 size = 8;
201
202 if (head + event->header.size >= page_size * self->mmap_window) {
203 int munmap_ret;
204
205 shift = page_size * (head / page_size);
206
207 munmap_ret = munmap(buf, page_size * self->mmap_window);
208 assert(munmap_ret == 0);
209
210 offset += shift;
211 head -= shift;
212 goto remap;
213 }
214
215 size = event->header.size;
216
217 dump_printf("\n%p [%p]: event: %d\n",
218 (void *)(offset + head),
219 (void *)(long)event->header.size,
220 event->header.type);
221
222 if (!size || process_event(event, self, ops, offset, head) < 0) {
223
224 dump_printf("%p [%p]: skipping unknown header type: %d\n",
225 (void *)(offset + head),
226 (void *)(long)(event->header.size),
227 event->header.type);
228
229 /*
230 * assume we lost track of the stream, check alignment, and
231 * increment a single u64 in the hope to catch on again 'soon'.
232 */
233
234 if (unlikely(head & 7))
235 head &= ~7ULL;
236
237 size = 8;
238 }
239
240 head += size;
241
242 if (offset + head >= self->header.data_offset + self->header.data_size)
243 goto done;
244
245 if (offset + head < self->size)
246 goto more;
247
248done:
249 err = 0;
250out_err:
251 return err;
252}
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 28d520d5a1fb..0905600c3851 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -9,6 +9,7 @@
9#include "color.h" 9#include "color.h"
10#include "event.h" 10#include "event.h"
11#include "debug.h" 11#include "debug.h"
12#include "util.h"
12 13
13int verbose = 0; 14int verbose = 0;
14int dump_trace = 0; 15int dump_trace = 0;
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index 06b73ee02c49..a88fefc0cc0a 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -106,16 +106,14 @@ int debugfs_valid_entry(const char *path)
106 return 0; 106 return 0;
107} 107}
108 108
109/* mount the debugfs somewhere */ 109/* mount the debugfs somewhere if it's not mounted */
110 110
111int debugfs_mount(const char *mountpoint) 111char *debugfs_mount(const char *mountpoint)
112{ 112{
113 char mountcmd[128];
114
115 /* see if it's already mounted */ 113 /* see if it's already mounted */
116 if (debugfs_find_mountpoint()) { 114 if (debugfs_find_mountpoint()) {
117 debugfs_premounted = 1; 115 debugfs_premounted = 1;
118 return 0; 116 return debugfs_mountpoint;
119 } 117 }
120 118
121 /* if not mounted and no argument */ 119 /* if not mounted and no argument */
@@ -127,13 +125,14 @@ int debugfs_mount(const char *mountpoint)
127 mountpoint = "/sys/kernel/debug"; 125 mountpoint = "/sys/kernel/debug";
128 } 126 }
129 127
128 if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
129 return NULL;
130
130 /* save the mountpoint */ 131 /* save the mountpoint */
131 strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); 132 strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
133 debugfs_found = 1;
132 134
133 /* mount it */ 135 return debugfs_mountpoint;
134 snprintf(mountcmd, sizeof(mountcmd),
135 "/bin/mount -t debugfs debugfs %s", mountpoint);
136 return system(mountcmd);
137} 136}
138 137
139/* umount the debugfs */ 138/* umount the debugfs */
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 3cd14f9ae784..83a02879745f 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -15,7 +15,7 @@
15extern const char *debugfs_find_mountpoint(void); 15extern const char *debugfs_find_mountpoint(void);
16extern int debugfs_valid_mountpoint(const char *debugfs); 16extern int debugfs_valid_mountpoint(const char *debugfs);
17extern int debugfs_valid_entry(const char *path); 17extern int debugfs_valid_entry(const char *path);
18extern int debugfs_mount(const char *mountpoint); 18extern char *debugfs_mount(const char *mountpoint);
19extern int debugfs_umount(void); 19extern int debugfs_umount(void);
20extern int debugfs_write(const char *entry, const char *value); 20extern int debugfs_write(const char *entry, const char *value);
21extern int debugfs_read(const char *entry, char *buffer, size_t size); 21extern int debugfs_read(const char *entry, char *buffer, size_t size);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8a9e6baa3099..705ec63548b4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,8 +8,7 @@
8#include "thread.h" 8#include "thread.h"
9 9
10static pid_t event__synthesize_comm(pid_t pid, int full, 10static pid_t event__synthesize_comm(pid_t pid, int full,
11 int (*process)(event_t *event, 11 event__handler_t process,
12 struct perf_session *session),
13 struct perf_session *session) 12 struct perf_session *session)
14{ 13{
15 event_t ev; 14 event_t ev;
@@ -91,8 +90,7 @@ out_failure:
91} 90}
92 91
93static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, 92static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
94 int (*process)(event_t *event, 93 event__handler_t process,
95 struct perf_session *session),
96 struct perf_session *session) 94 struct perf_session *session)
97{ 95{
98 char filename[PATH_MAX]; 96 char filename[PATH_MAX];
@@ -112,7 +110,10 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
112 while (1) { 110 while (1) {
113 char bf[BUFSIZ], *pbf = bf; 111 char bf[BUFSIZ], *pbf = bf;
114 event_t ev = { 112 event_t ev = {
115 .header = { .type = PERF_RECORD_MMAP }, 113 .header = {
114 .type = PERF_RECORD_MMAP,
115 .misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */
116 },
116 }; 117 };
117 int n; 118 int n;
118 size_t size; 119 size_t size;
@@ -156,9 +157,38 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
156 return 0; 157 return 0;
157} 158}
158 159
159int event__synthesize_thread(pid_t pid, 160int event__synthesize_modules(event__handler_t process,
160 int (*process)(event_t *event, 161 struct perf_session *session)
161 struct perf_session *session), 162{
163 struct rb_node *nd;
164
165 for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]);
166 nd; nd = rb_next(nd)) {
167 event_t ev;
168 size_t size;
169 struct map *pos = rb_entry(nd, struct map, rb_node);
170
171 if (pos->dso->kernel)
172 continue;
173
174 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
175 memset(&ev, 0, sizeof(ev));
176 ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
177 ev.mmap.header.type = PERF_RECORD_MMAP;
178 ev.mmap.header.size = (sizeof(ev.mmap) -
179 (sizeof(ev.mmap.filename) - size));
180 ev.mmap.start = pos->start;
181 ev.mmap.len = pos->end - pos->start;
182
183 memcpy(ev.mmap.filename, pos->dso->long_name,
184 pos->dso->long_name_len + 1);
185 process(&ev, session);
186 }
187
188 return 0;
189}
190
191int event__synthesize_thread(pid_t pid, event__handler_t process,
162 struct perf_session *session) 192 struct perf_session *session)
163{ 193{
164 pid_t tgid = event__synthesize_comm(pid, 1, process, session); 194 pid_t tgid = event__synthesize_comm(pid, 1, process, session);
@@ -167,8 +197,7 @@ int event__synthesize_thread(pid_t pid,
167 return event__synthesize_mmap_events(pid, tgid, process, session); 197 return event__synthesize_mmap_events(pid, tgid, process, session);
168} 198}
169 199
170void event__synthesize_threads(int (*process)(event_t *event, 200void event__synthesize_threads(event__handler_t process,
171 struct perf_session *session),
172 struct perf_session *session) 201 struct perf_session *session)
173{ 202{
174 DIR *proc; 203 DIR *proc;
@@ -189,6 +218,59 @@ void event__synthesize_threads(int (*process)(event_t *event,
189 closedir(proc); 218 closedir(proc);
190} 219}
191 220
221struct process_symbol_args {
222 const char *name;
223 u64 start;
224};
225
226static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
227{
228 struct process_symbol_args *args = arg;
229
230 /*
231 * Must be a function or at least an alias, as in PARISC64, where "_text" is
232 * an 'A' to the same address as "_stext".
233 */
234 if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
235 type == 'A') || strcmp(name, args->name))
236 return 0;
237
238 args->start = start;
239 return 1;
240}
241
242int event__synthesize_kernel_mmap(event__handler_t process,
243 struct perf_session *session,
244 const char *symbol_name)
245{
246 size_t size;
247 event_t ev = {
248 .header = {
249 .type = PERF_RECORD_MMAP,
250 .misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
251 },
252 };
253 /*
254 * We should get this from /sys/kernel/sections/.text, but till that is
255 * available use this, and after it is use this as a fallback for older
256 * kernels.
257 */
258 struct process_symbol_args args = { .name = symbol_name, };
259
260 if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0)
261 return -ENOENT;
262
263 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
264 "[kernel.kallsyms.%s]", symbol_name) + 1;
265 size = ALIGN(size, sizeof(u64));
266 ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size));
267 ev.mmap.pgoff = args.start;
268 ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start;
269 ev.mmap.len = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ;
270
271 return process(&ev, session);
272}
273
192static void thread__comm_adjust(struct thread *self) 274static void thread__comm_adjust(struct thread *self)
193{ 275{
194 char *comm = self->comm; 276 char *comm = self->comm;
@@ -240,22 +322,88 @@ int event__process_lost(event_t *self, struct perf_session *session)
240 322
241int event__process_mmap(event_t *self, struct perf_session *session) 323int event__process_mmap(event_t *self, struct perf_session *session)
242{ 324{
243 struct thread *thread = perf_session__findnew(session, self->mmap.pid); 325 struct thread *thread;
244 struct map *map = map__new(&self->mmap, MAP__FUNCTION, 326 struct map *map;
245 session->cwd, session->cwdlen); 327
328 dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
329 self->mmap.pid, self->mmap.tid, self->mmap.start,
330 self->mmap.len, self->mmap.pgoff, self->mmap.filename);
331
332 if (self->mmap.pid == 0) {
333 static const char kmmap_prefix[] = "[kernel.kallsyms.";
334
335 if (self->mmap.filename[0] == '/') {
336 char short_module_name[1024];
337 char *name = strrchr(self->mmap.filename, '/'), *dot;
338
339 if (name == NULL)
340 goto out_problem;
341
342 ++name; /* skip / */
343 dot = strrchr(name, '.');
344 if (dot == NULL)
345 goto out_problem;
346
347 snprintf(short_module_name, sizeof(short_module_name),
348 "[%.*s]", (int)(dot - name), name);
349 strxfrchar(short_module_name, '-', '_');
350
351 map = perf_session__new_module_map(session,
352 self->mmap.start,
353 self->mmap.filename);
354 if (map == NULL)
355 goto out_problem;
356
357 name = strdup(short_module_name);
358 if (name == NULL)
359 goto out_problem;
360
361 map->dso->short_name = name;
362 map->end = map->start + self->mmap.len;
363 } else if (memcmp(self->mmap.filename, kmmap_prefix,
364 sizeof(kmmap_prefix) - 1) == 0) {
365 const char *symbol_name = (self->mmap.filename +
366 sizeof(kmmap_prefix) - 1);
367 /*
368 * Should be there already, from the build-id table in
369 * the header.
370 */
371 struct dso *kernel = __dsos__findnew(&dsos__kernel,
372 "[kernel.kallsyms]");
373 if (kernel == NULL)
374 goto out_problem;
375
376 kernel->kernel = 1;
377 if (__perf_session__create_kernel_maps(session, kernel) < 0)
378 goto out_problem;
379
380 session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
381 session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
382 /*
383 * Be a bit paranoid here, some perf.data file came with
384 * a zero sized synthesized MMAP event for the kernel.
385 */
386 if (session->vmlinux_maps[MAP__FUNCTION]->end == 0)
387 session->vmlinux_maps[MAP__FUNCTION]->end = ~0UL;
388
389 perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
390 self->mmap.pgoff);
391 }
392 return 0;
393 }
246 394
247 dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", 395 thread = perf_session__findnew(session, self->mmap.pid);
248 self->mmap.pid, self->mmap.tid, 396 map = map__new(&self->mmap, MAP__FUNCTION,
249 (void *)(long)self->mmap.start, 397 session->cwd, session->cwdlen);
250 (void *)(long)self->mmap.len,
251 (void *)(long)self->mmap.pgoff,
252 self->mmap.filename);
253 398
254 if (thread == NULL || map == NULL) 399 if (thread == NULL || map == NULL)
255 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); 400 goto out_problem;
256 else
257 thread__insert_map(thread, map);
258 401
402 thread__insert_map(thread, map);
403 return 0;
404
405out_problem:
406 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
259 return 0; 407 return 0;
260} 408}
261 409
@@ -284,11 +432,10 @@ int event__process_task(event_t *self, struct perf_session *session)
284 return 0; 432 return 0;
285} 433}
286 434
287void thread__find_addr_location(struct thread *self, 435void thread__find_addr_map(struct thread *self,
288 struct perf_session *session, u8 cpumode, 436 struct perf_session *session, u8 cpumode,
289 enum map_type type, u64 addr, 437 enum map_type type, u64 addr,
290 struct addr_location *al, 438 struct addr_location *al)
291 symbol_filter_t filter)
292{ 439{
293 struct map_groups *mg = &self->mg; 440 struct map_groups *mg = &self->mg;
294 441
@@ -303,7 +450,6 @@ void thread__find_addr_location(struct thread *self,
303 else { 450 else {
304 al->level = 'H'; 451 al->level = 'H';
305 al->map = NULL; 452 al->map = NULL;
306 al->sym = NULL;
307 return; 453 return;
308 } 454 }
309try_again: 455try_again:
@@ -322,11 +468,21 @@ try_again:
322 mg = &session->kmaps; 468 mg = &session->kmaps;
323 goto try_again; 469 goto try_again;
324 } 470 }
325 al->sym = NULL; 471 } else
326 } else {
327 al->addr = al->map->map_ip(al->map, al->addr); 472 al->addr = al->map->map_ip(al->map, al->addr);
328 al->sym = map__find_symbol(al->map, session, al->addr, filter); 473}
329 } 474
475void thread__find_addr_location(struct thread *self,
476 struct perf_session *session, u8 cpumode,
477 enum map_type type, u64 addr,
478 struct addr_location *al,
479 symbol_filter_t filter)
480{
481 thread__find_addr_map(self, session, cpumode, type, addr, al);
482 if (al->map != NULL)
483 al->sym = map__find_symbol(al->map, al->addr, filter);
484 else
485 al->sym = NULL;
330} 486}
331 487
332static void dso__calc_col_width(struct dso *self) 488static void dso__calc_col_width(struct dso *self)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 690a96d0467c..50a7132887f5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,10 +1,10 @@
1#ifndef __PERF_RECORD_H 1#ifndef __PERF_RECORD_H
2#define __PERF_RECORD_H 2#define __PERF_RECORD_H
3 3
4#include <limits.h>
5
4#include "../perf.h" 6#include "../perf.h"
5#include "util.h" 7#include "map.h"
6#include <linux/list.h>
7#include <linux/rbtree.h>
8 8
9/* 9/*
10 * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * 10 * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -101,74 +101,19 @@ struct events_stats {
101 101
102void event__print_totals(void); 102void event__print_totals(void);
103 103
104enum map_type {
105 MAP__FUNCTION = 0,
106 MAP__VARIABLE,
107};
108
109#define MAP__NR_TYPES (MAP__VARIABLE + 1)
110
111struct map {
112 union {
113 struct rb_node rb_node;
114 struct list_head node;
115 };
116 u64 start;
117 u64 end;
118 enum map_type type;
119 u64 pgoff;
120 u64 (*map_ip)(struct map *, u64);
121 u64 (*unmap_ip)(struct map *, u64);
122 struct dso *dso;
123};
124
125static inline u64 map__map_ip(struct map *map, u64 ip)
126{
127 return ip - map->start + map->pgoff;
128}
129
130static inline u64 map__unmap_ip(struct map *map, u64 ip)
131{
132 return ip + map->start - map->pgoff;
133}
134
135static inline u64 identity__map_ip(struct map *map __used, u64 ip)
136{
137 return ip;
138}
139
140struct symbol;
141
142typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
143
144void map__init(struct map *self, enum map_type type,
145 u64 start, u64 end, u64 pgoff, struct dso *dso);
146struct map *map__new(struct mmap_event *event, enum map_type,
147 char *cwd, int cwdlen);
148void map__delete(struct map *self);
149struct map *map__clone(struct map *self);
150int map__overlap(struct map *l, struct map *r);
151size_t map__fprintf(struct map *self, FILE *fp);
152
153struct perf_session; 104struct perf_session;
154 105
155int map__load(struct map *self, struct perf_session *session, 106typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
156 symbol_filter_t filter); 107
157struct symbol *map__find_symbol(struct map *self, struct perf_session *session, 108int event__synthesize_thread(pid_t pid, event__handler_t process,
158 u64 addr, symbol_filter_t filter);
159struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
160 struct perf_session *session,
161 symbol_filter_t filter);
162void map__fixup_start(struct map *self);
163void map__fixup_end(struct map *self);
164
165int event__synthesize_thread(pid_t pid,
166 int (*process)(event_t *event,
167 struct perf_session *session),
168 struct perf_session *session); 109 struct perf_session *session);
169void event__synthesize_threads(int (*process)(event_t *event, 110void event__synthesize_threads(event__handler_t process,
170 struct perf_session *session),
171 struct perf_session *session); 111 struct perf_session *session);
112int event__synthesize_kernel_mmap(event__handler_t process,
113 struct perf_session *session,
114 const char *symbol_name);
115int event__synthesize_modules(event__handler_t process,
116 struct perf_session *session);
172 117
173int event__process_comm(event_t *self, struct perf_session *session); 118int event__process_comm(event_t *self, struct perf_session *session);
174int event__process_lost(event_t *self, struct perf_session *session); 119int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8a0bca55106f..6c9aa16ee51f 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,8 +1,12 @@
1#define _FILE_OFFSET_BITS 64
2
1#include <sys/types.h> 3#include <sys/types.h>
4#include <byteswap.h>
2#include <unistd.h> 5#include <unistd.h>
3#include <stdio.h> 6#include <stdio.h>
4#include <stdlib.h> 7#include <stdlib.h>
5#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/kernel.h>
6 10
7#include "util.h" 11#include "util.h"
8#include "header.h" 12#include "header.h"
@@ -105,24 +109,28 @@ struct perf_trace_event_type {
105static int event_count; 109static int event_count;
106static struct perf_trace_event_type *events; 110static struct perf_trace_event_type *events;
107 111
108void perf_header__push_event(u64 id, const char *name) 112int perf_header__push_event(u64 id, const char *name)
109{ 113{
110 if (strlen(name) > MAX_EVENT_NAME) 114 if (strlen(name) > MAX_EVENT_NAME)
111 pr_warning("Event %s will be truncated\n", name); 115 pr_warning("Event %s will be truncated\n", name);
112 116
113 if (!events) { 117 if (!events) {
114 events = malloc(sizeof(struct perf_trace_event_type)); 118 events = malloc(sizeof(struct perf_trace_event_type));
115 if (!events) 119 if (events == NULL)
116 die("nomem"); 120 return -ENOMEM;
117 } else { 121 } else {
118 events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type)); 122 struct perf_trace_event_type *nevents;
119 if (!events) 123
120 die("nomem"); 124 nevents = realloc(events, (event_count + 1) * sizeof(*events));
125 if (nevents == NULL)
126 return -ENOMEM;
127 events = nevents;
121 } 128 }
122 memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); 129 memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
123 events[event_count].event_id = id; 130 events[event_count].event_id = id;
124 strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); 131 strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
125 event_count++; 132 event_count++;
133 return 0;
126} 134}
127 135
128char *perf_header__find_event(u64 id) 136char *perf_header__find_event(u64 id)
@@ -169,31 +177,48 @@ static int do_write(int fd, const void *buf, size_t size)
169 return 0; 177 return 0;
170} 178}
171 179
172static int __dsos__write_buildid_table(struct list_head *head, int fd) 180#define NAME_ALIGN 64
181
182static int write_padded(int fd, const void *bf, size_t count,
183 size_t count_aligned)
173{ 184{
174#define NAME_ALIGN 64
175 struct dso *pos;
176 static const char zero_buf[NAME_ALIGN]; 185 static const char zero_buf[NAME_ALIGN];
186 int err = do_write(fd, bf, count);
187
188 if (!err)
189 err = do_write(fd, zero_buf, count_aligned - count);
190
191 return err;
192}
177 193
178 list_for_each_entry(pos, head, node) { 194#define dsos__for_each_with_build_id(pos, head) \
195 list_for_each_entry(pos, head, node) \
196 if (!pos->has_build_id) \
197 continue; \
198 else
199
200static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
201{
202 struct dso *pos;
203
204 dsos__for_each_with_build_id(pos, head) {
179 int err; 205 int err;
180 struct build_id_event b; 206 struct build_id_event b;
181 size_t len; 207 size_t len;
182 208
183 if (!pos->has_build_id) 209 if (!pos->hit)
184 continue; 210 continue;
185 len = pos->long_name_len + 1; 211 len = pos->long_name_len + 1;
186 len = ALIGN(len, NAME_ALIGN); 212 len = ALIGN(len, NAME_ALIGN);
187 memset(&b, 0, sizeof(b)); 213 memset(&b, 0, sizeof(b));
188 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); 214 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
215 b.header.misc = misc;
189 b.header.size = sizeof(b) + len; 216 b.header.size = sizeof(b) + len;
190 err = do_write(fd, &b, sizeof(b)); 217 err = do_write(fd, &b, sizeof(b));
191 if (err < 0) 218 if (err < 0)
192 return err; 219 return err;
193 err = do_write(fd, pos->long_name, pos->long_name_len + 1); 220 err = write_padded(fd, pos->long_name,
194 if (err < 0) 221 pos->long_name_len + 1, len);
195 return err;
196 err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
197 if (err < 0) 222 if (err < 0)
198 return err; 223 return err;
199 } 224 }
@@ -203,12 +228,143 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
203 228
204static int dsos__write_buildid_table(int fd) 229static int dsos__write_buildid_table(int fd)
205{ 230{
206 int err = __dsos__write_buildid_table(&dsos__kernel, fd); 231 int err = __dsos__write_buildid_table(&dsos__kernel,
232 PERF_RECORD_MISC_KERNEL, fd);
207 if (err == 0) 233 if (err == 0)
208 err = __dsos__write_buildid_table(&dsos__user, fd); 234 err = __dsos__write_buildid_table(&dsos__user,
235 PERF_RECORD_MISC_USER, fd);
209 return err; 236 return err;
210} 237}
211 238
239int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
240 const char *name, bool is_kallsyms)
241{
242 const size_t size = PATH_MAX;
243 char *filename = malloc(size),
244 *linkname = malloc(size), *targetname;
245 int len, err = -1;
246
247 if (filename == NULL || linkname == NULL)
248 goto out_free;
249
250 len = snprintf(filename, size, "%s%s%s",
251 debugdir, is_kallsyms ? "/" : "", name);
252 if (mkdir_p(filename, 0755))
253 goto out_free;
254
255 snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
256
257 if (access(filename, F_OK)) {
258 if (is_kallsyms) {
259 if (copyfile("/proc/kallsyms", filename))
260 goto out_free;
261 } else if (link(name, filename) && copyfile(name, filename))
262 goto out_free;
263 }
264
265 len = snprintf(linkname, size, "%s/.build-id/%.2s",
266 debugdir, sbuild_id);
267
268 if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
269 goto out_free;
270
271 snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
272 targetname = filename + strlen(debugdir) - 5;
273 memcpy(targetname, "../..", 5);
274
275 if (symlink(targetname, linkname) == 0)
276 err = 0;
277out_free:
278 free(filename);
279 free(linkname);
280 return err;
281}
282
283static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
284 const char *name, const char *debugdir,
285 bool is_kallsyms)
286{
287 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
288
289 build_id__sprintf(build_id, build_id_size, sbuild_id);
290
291 return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
292}
293
294int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
295{
296 const size_t size = PATH_MAX;
297 char *filename = malloc(size),
298 *linkname = malloc(size);
299 int err = -1;
300
301 if (filename == NULL || linkname == NULL)
302 goto out_free;
303
304 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
305 debugdir, sbuild_id, sbuild_id + 2);
306
307 if (access(linkname, F_OK))
308 goto out_free;
309
310 if (readlink(linkname, filename, size) < 0)
311 goto out_free;
312
313 if (unlink(linkname))
314 goto out_free;
315
316 /*
317 * Since the link is relative, we must make it absolute:
318 */
319 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
320 debugdir, sbuild_id, filename);
321
322 if (unlink(linkname))
323 goto out_free;
324
325 err = 0;
326out_free:
327 free(filename);
328 free(linkname);
329 return err;
330}
331
332static int dso__cache_build_id(struct dso *self, const char *debugdir)
333{
334 bool is_kallsyms = self->kernel && self->long_name[0] != '/';
335
336 return build_id_cache__add_b(self->build_id, sizeof(self->build_id),
337 self->long_name, debugdir, is_kallsyms);
338}
339
340static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
341{
342 struct dso *pos;
343 int err = 0;
344
345 dsos__for_each_with_build_id(pos, head)
346 if (dso__cache_build_id(pos, debugdir))
347 err = -1;
348
349 return err;
350}
351
352static int dsos__cache_build_ids(void)
353{
354 int err_kernel, err_user;
355 char debugdir[PATH_MAX];
356
357 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
358 DEBUG_CACHE_DIR);
359
360 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
361 return -1;
362
363 err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir);
364 err_user = __dsos__cache_build_ids(&dsos__user, debugdir);
365 return err_kernel || err_user ? -1 : 0;
366}
367
212static int perf_header__adds_write(struct perf_header *self, int fd) 368static int perf_header__adds_write(struct perf_header *self, int fd)
213{ 369{
214 int nr_sections; 370 int nr_sections;
@@ -217,7 +373,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
217 u64 sec_start; 373 u64 sec_start;
218 int idx = 0, err; 374 int idx = 0, err;
219 375
220 if (dsos__read_build_ids()) 376 if (dsos__read_build_ids(true))
221 perf_header__set_feat(self, HEADER_BUILD_ID); 377 perf_header__set_feat(self, HEADER_BUILD_ID);
222 378
223 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 379 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
@@ -257,7 +413,9 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
257 pr_debug("failed to write buildid table\n"); 413 pr_debug("failed to write buildid table\n");
258 goto out_free; 414 goto out_free;
259 } 415 }
260 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; 416 buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
417 buildid_sec->offset;
418 dsos__cache_build_ids();
261 } 419 }
262 420
263 lseek(fd, sec_start, SEEK_SET); 421 lseek(fd, sec_start, SEEK_SET);
@@ -360,30 +518,43 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
360 return 0; 518 return 0;
361} 519}
362 520
363static void do_read(int fd, void *buf, size_t size) 521static int do_read(int fd, void *buf, size_t size)
364{ 522{
365 while (size) { 523 while (size) {
366 int ret = read(fd, buf, size); 524 int ret = read(fd, buf, size);
367 525
368 if (ret < 0) 526 if (ret <= 0)
369 die("failed to read"); 527 return -1;
370 if (ret == 0)
371 die("failed to read: missing data");
372 528
373 size -= ret; 529 size -= ret;
374 buf += ret; 530 buf += ret;
375 } 531 }
532
533 return 0;
534}
535
536static int perf_header__getbuffer64(struct perf_header *self,
537 int fd, void *buf, size_t size)
538{
539 if (do_read(fd, buf, size))
540 return -1;
541
542 if (self->needs_swap)
543 mem_bswap_64(buf, size);
544
545 return 0;
376} 546}
377 547
378int perf_header__process_sections(struct perf_header *self, int fd, 548int perf_header__process_sections(struct perf_header *self, int fd,
379 int (*process)(struct perf_file_section *self, 549 int (*process)(struct perf_file_section *self,
550 struct perf_header *ph,
380 int feat, int fd)) 551 int feat, int fd))
381{ 552{
382 struct perf_file_section *feat_sec; 553 struct perf_file_section *feat_sec;
383 int nr_sections; 554 int nr_sections;
384 int sec_size; 555 int sec_size;
385 int idx = 0; 556 int idx = 0;
386 int err = 0, feat = 1; 557 int err = -1, feat = 1;
387 558
388 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 559 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
389 if (!nr_sections) 560 if (!nr_sections)
@@ -397,33 +568,45 @@ int perf_header__process_sections(struct perf_header *self, int fd,
397 568
398 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 569 lseek(fd, self->data_offset + self->data_size, SEEK_SET);
399 570
400 do_read(fd, feat_sec, sec_size); 571 if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
572 goto out_free;
401 573
574 err = 0;
402 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { 575 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
403 if (perf_header__has_feat(self, feat)) { 576 if (perf_header__has_feat(self, feat)) {
404 struct perf_file_section *sec = &feat_sec[idx++]; 577 struct perf_file_section *sec = &feat_sec[idx++];
405 578
406 err = process(sec, feat, fd); 579 err = process(sec, self, feat, fd);
407 if (err < 0) 580 if (err < 0)
408 break; 581 break;
409 } 582 }
410 ++feat; 583 ++feat;
411 } 584 }
412 585out_free:
413 free(feat_sec); 586 free(feat_sec);
414 return err; 587 return err;
415}; 588}
416 589
417int perf_file_header__read(struct perf_file_header *self, 590int perf_file_header__read(struct perf_file_header *self,
418 struct perf_header *ph, int fd) 591 struct perf_header *ph, int fd)
419{ 592{
420 lseek(fd, 0, SEEK_SET); 593 lseek(fd, 0, SEEK_SET);
421 do_read(fd, self, sizeof(*self));
422 594
423 if (self->magic != PERF_MAGIC || 595 if (do_read(fd, self, sizeof(*self)) ||
424 self->attr_size != sizeof(struct perf_file_attr)) 596 memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
425 return -1; 597 return -1;
426 598
599 if (self->attr_size != sizeof(struct perf_file_attr)) {
600 u64 attr_size = bswap_64(self->attr_size);
601
602 if (attr_size != sizeof(struct perf_file_attr))
603 return -1;
604
605 mem_bswap_64(self, offsetof(struct perf_file_header,
606 adds_features));
607 ph->needs_swap = true;
608 }
609
427 if (self->size != sizeof(*self)) { 610 if (self->size != sizeof(*self)) {
428 /* Support the previous format */ 611 /* Support the previous format */
429 if (self->size == offsetof(typeof(*self), adds_features)) 612 if (self->size == offsetof(typeof(*self), adds_features))
@@ -433,19 +616,31 @@ int perf_file_header__read(struct perf_file_header *self,
433 } 616 }
434 617
435 memcpy(&ph->adds_features, &self->adds_features, 618 memcpy(&ph->adds_features, &self->adds_features,
436 sizeof(self->adds_features)); 619 sizeof(ph->adds_features));
620 /*
621 * FIXME: hack that assumes that if we need swap the perf.data file
622 * may be coming from an arch with a different word-size, ergo different
623 * DEFINE_BITMAP format, investigate more later, but for now its mostly
624 * safe to assume that we have a build-id section. Trace files probably
625 * have several other issues in this realm anyway...
626 */
627 if (ph->needs_swap) {
628 memset(&ph->adds_features, 0, sizeof(ph->adds_features));
629 perf_header__set_feat(ph, HEADER_BUILD_ID);
630 }
437 631
438 ph->event_offset = self->event_types.offset; 632 ph->event_offset = self->event_types.offset;
439 ph->event_size = self->event_types.size; 633 ph->event_size = self->event_types.size;
440 ph->data_offset = self->data.offset; 634 ph->data_offset = self->data.offset;
441 ph->data_size = self->data.size; 635 ph->data_size = self->data.size;
442 return 0; 636 return 0;
443} 637}
444 638
445static int perf_file_section__process(struct perf_file_section *self, 639static int perf_file_section__process(struct perf_file_section *self,
640 struct perf_header *ph,
446 int feat, int fd) 641 int feat, int fd)
447{ 642{
448 if (lseek(fd, self->offset, SEEK_SET) < 0) { 643 if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) {
449 pr_debug("Failed to lseek to %Ld offset for feature %d, " 644 pr_debug("Failed to lseek to %Ld offset for feature %d, "
450 "continuing...\n", self->offset, feat); 645 "continuing...\n", self->offset, feat);
451 return 0; 646 return 0;
@@ -457,7 +652,7 @@ static int perf_file_section__process(struct perf_file_section *self,
457 break; 652 break;
458 653
459 case HEADER_BUILD_ID: 654 case HEADER_BUILD_ID:
460 if (perf_header__read_build_ids(fd, self->offset, self->size)) 655 if (perf_header__read_build_ids(ph, fd, self->offset, self->size))
461 pr_debug("Failed to read buildids, continuing...\n"); 656 pr_debug("Failed to read buildids, continuing...\n");
462 break; 657 break;
463 default: 658 default:
@@ -469,7 +664,7 @@ static int perf_file_section__process(struct perf_file_section *self,
469 664
470int perf_header__read(struct perf_header *self, int fd) 665int perf_header__read(struct perf_header *self, int fd)
471{ 666{
472 struct perf_file_header f_header; 667 struct perf_file_header f_header;
473 struct perf_file_attr f_attr; 668 struct perf_file_attr f_attr;
474 u64 f_id; 669 u64 f_id;
475 int nr_attrs, nr_ids, i, j; 670 int nr_attrs, nr_ids, i, j;
@@ -486,7 +681,9 @@ int perf_header__read(struct perf_header *self, int fd)
486 struct perf_header_attr *attr; 681 struct perf_header_attr *attr;
487 off_t tmp; 682 off_t tmp;
488 683
489 do_read(fd, &f_attr, sizeof(f_attr)); 684 if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr)))
685 goto out_errno;
686
490 tmp = lseek(fd, 0, SEEK_CUR); 687 tmp = lseek(fd, 0, SEEK_CUR);
491 688
492 attr = perf_header_attr__new(&f_attr.attr); 689 attr = perf_header_attr__new(&f_attr.attr);
@@ -497,7 +694,8 @@ int perf_header__read(struct perf_header *self, int fd)
497 lseek(fd, f_attr.ids.offset, SEEK_SET); 694 lseek(fd, f_attr.ids.offset, SEEK_SET);
498 695
499 for (j = 0; j < nr_ids; j++) { 696 for (j = 0; j < nr_ids; j++) {
500 do_read(fd, &f_id, sizeof(f_id)); 697 if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id)))
698 goto out_errno;
501 699
502 if (perf_header_attr__add_id(attr, f_id) < 0) { 700 if (perf_header_attr__add_id(attr, f_id) < 0) {
503 perf_header_attr__delete(attr); 701 perf_header_attr__delete(attr);
@@ -517,7 +715,9 @@ int perf_header__read(struct perf_header *self, int fd)
517 events = malloc(f_header.event_types.size); 715 events = malloc(f_header.event_types.size);
518 if (events == NULL) 716 if (events == NULL)
519 return -ENOMEM; 717 return -ENOMEM;
520 do_read(fd, events, f_header.event_types.size); 718 if (perf_header__getbuffer64(self, fd, events,
719 f_header.event_types.size))
720 goto out_errno;
521 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); 721 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
522 } 722 }
523 723
@@ -527,6 +727,8 @@ int perf_header__read(struct perf_header *self, int fd)
527 727
528 self->frozen = 1; 728 self->frozen = 1;
529 return 0; 729 return 0;
730out_errno:
731 return -errno;
530} 732}
531 733
532u64 perf_header__sample_type(struct perf_header *header) 734u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d118d05d3abe..82a6af72d4cc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -5,6 +5,7 @@
5#include <sys/types.h> 5#include <sys/types.h>
6#include <stdbool.h> 6#include <stdbool.h>
7#include "types.h" 7#include "types.h"
8#include "event.h"
8 9
9#include <linux/bitmap.h> 10#include <linux/bitmap.h>
10 11
@@ -52,6 +53,7 @@ struct perf_header {
52 u64 data_size; 53 u64 data_size;
53 u64 event_offset; 54 u64 event_offset;
54 u64 event_size; 55 u64 event_size;
56 bool needs_swap;
55 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 57 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
56}; 58};
57 59
@@ -64,7 +66,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit);
64int perf_header__add_attr(struct perf_header *self, 66int perf_header__add_attr(struct perf_header *self,
65 struct perf_header_attr *attr); 67 struct perf_header_attr *attr);
66 68
67void perf_header__push_event(u64 id, const char *name); 69int perf_header__push_event(u64 id, const char *name);
68char *perf_header__find_event(u64 id); 70char *perf_header__find_event(u64 id);
69 71
70struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); 72struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
@@ -80,6 +82,11 @@ bool perf_header__has_feat(const struct perf_header *self, int feat);
80 82
81int perf_header__process_sections(struct perf_header *self, int fd, 83int perf_header__process_sections(struct perf_header *self, int fd,
82 int (*process)(struct perf_file_section *self, 84 int (*process)(struct perf_file_section *self,
85 struct perf_header *ph,
83 int feat, int fd)); 86 int feat, int fd));
84 87
88int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
89 const char *name, bool is_kallsyms);
90int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
91
85#endif /* __PERF_HEADER_H */ 92#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/include/linux/hash.h b/tools/perf/util/include/linux/hash.h
new file mode 100644
index 000000000000..201f57397997
--- /dev/null
+++ b/tools/perf/util/include/linux/hash.h
@@ -0,0 +1,5 @@
1#include "../../../../include/linux/hash.h"
2
3#ifndef PERF_HASH_H
4#define PERF_HASH_H
5#endif
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index 21c0274c02fa..f2611655ab51 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -101,5 +101,6 @@ simple_strtoul(const char *nptr, char **endptr, int base)
101 eprintf(n, pr_fmt(fmt), ##__VA_ARGS__) 101 eprintf(n, pr_fmt(fmt), ##__VA_ARGS__)
102#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) 102#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
103#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) 103#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
104#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
104 105
105#endif 106#endif
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c4d55a0da2ea..e509cd59c67d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -5,6 +5,11 @@
5#include <stdio.h> 5#include <stdio.h>
6#include "debug.h" 6#include "debug.h"
7 7
8const char *map_type__name[MAP__NR_TYPES] = {
9 [MAP__FUNCTION] = "Functions",
10 [MAP__VARIABLE] = "Variables",
11};
12
8static inline int is_anon_memory(const char *filename) 13static inline int is_anon_memory(const char *filename)
9{ 14{
10 return strcmp(filename, "//anon") == 0; 15 return strcmp(filename, "//anon") == 0;
@@ -68,8 +73,13 @@ struct map *map__new(struct mmap_event *event, enum map_type type,
68 map__init(self, type, event->start, event->start + event->len, 73 map__init(self, type, event->start, event->start + event->len,
69 event->pgoff, dso); 74 event->pgoff, dso);
70 75
71 if (self->dso == vdso || anon) 76 if (anon) {
77set_identity:
72 self->map_ip = self->unmap_ip = identity__map_ip; 78 self->map_ip = self->unmap_ip = identity__map_ip;
79 } else if (strcmp(filename, "[vdso]") == 0) {
80 dso__set_loaded(dso, self->type);
81 goto set_identity;
82 }
73 } 83 }
74 return self; 84 return self;
75out_delete: 85out_delete:
@@ -104,8 +114,7 @@ void map__fixup_end(struct map *self)
104 114
105#define DSO__DELETED "(deleted)" 115#define DSO__DELETED "(deleted)"
106 116
107int map__load(struct map *self, struct perf_session *session, 117int map__load(struct map *self, symbol_filter_t filter)
108 symbol_filter_t filter)
109{ 118{
110 const char *name = self->dso->long_name; 119 const char *name = self->dso->long_name;
111 int nr; 120 int nr;
@@ -113,7 +122,7 @@ int map__load(struct map *self, struct perf_session *session,
113 if (dso__loaded(self->dso, self->type)) 122 if (dso__loaded(self->dso, self->type))
114 return 0; 123 return 0;
115 124
116 nr = dso__load(self->dso, self, session, filter); 125 nr = dso__load(self->dso, self, filter);
117 if (nr < 0) { 126 if (nr < 0) {
118 if (self->dso->has_build_id) { 127 if (self->dso->has_build_id) {
119 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 128 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -144,24 +153,29 @@ int map__load(struct map *self, struct perf_session *session,
144 153
145 return -1; 154 return -1;
146 } 155 }
156 /*
157 * Only applies to the kernel, as its symtabs aren't relative like the
158 * module ones.
159 */
160 if (self->dso->kernel)
161 map__reloc_vmlinux(self);
147 162
148 return 0; 163 return 0;
149} 164}
150 165
151struct symbol *map__find_symbol(struct map *self, struct perf_session *session, 166struct symbol *map__find_symbol(struct map *self, u64 addr,
152 u64 addr, symbol_filter_t filter) 167 symbol_filter_t filter)
153{ 168{
154 if (map__load(self, session, filter) < 0) 169 if (map__load(self, filter) < 0)
155 return NULL; 170 return NULL;
156 171
157 return dso__find_symbol(self->dso, self->type, addr); 172 return dso__find_symbol(self->dso, self->type, addr);
158} 173}
159 174
160struct symbol *map__find_symbol_by_name(struct map *self, const char *name, 175struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
161 struct perf_session *session,
162 symbol_filter_t filter) 176 symbol_filter_t filter)
163{ 177{
164 if (map__load(self, session, filter) < 0) 178 if (map__load(self, filter) < 0)
165 return NULL; 179 return NULL;
166 180
167 if (!dso__sorted_by_name(self->dso, self->type)) 181 if (!dso__sorted_by_name(self->dso, self->type))
@@ -201,3 +215,23 @@ size_t map__fprintf(struct map *self, FILE *fp)
201 return fprintf(fp, " %Lx-%Lx %Lx %s\n", 215 return fprintf(fp, " %Lx-%Lx %Lx %s\n",
202 self->start, self->end, self->pgoff, self->dso->name); 216 self->start, self->end, self->pgoff, self->dso->name);
203} 217}
218
219/*
220 * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
221 * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
222 */
223u64 map__rip_2objdump(struct map *map, u64 rip)
224{
225 u64 addr = map->dso->adjust_symbols ?
226 map->unmap_ip(map, rip) : /* RIP -> IP */
227 rip;
228 return addr;
229}
230
231u64 map__objdump_2ip(struct map *map, u64 addr)
232{
233 u64 ip = map->dso->adjust_symbols ?
234 addr :
235 map->unmap_ip(map, addr); /* RIP -> IP */
236 return ip;
237}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000000..b756368076c6
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,94 @@
1#ifndef __PERF_MAP_H
2#define __PERF_MAP_H
3
4#include <linux/compiler.h>
5#include <linux/list.h>
6#include <linux/rbtree.h>
7#include <linux/types.h>
8
9enum map_type {
10 MAP__FUNCTION = 0,
11 MAP__VARIABLE,
12};
13
14#define MAP__NR_TYPES (MAP__VARIABLE + 1)
15
16extern const char *map_type__name[MAP__NR_TYPES];
17
18struct dso;
19struct ref_reloc_sym;
20struct map_groups;
21
22struct map {
23 union {
24 struct rb_node rb_node;
25 struct list_head node;
26 };
27 u64 start;
28 u64 end;
29 enum map_type type;
30 u64 pgoff;
31
32 /* ip -> dso rip */
33 u64 (*map_ip)(struct map *, u64);
34 /* dso rip -> ip */
35 u64 (*unmap_ip)(struct map *, u64);
36
37 struct dso *dso;
38};
39
40struct kmap {
41 struct ref_reloc_sym *ref_reloc_sym;
42 struct map_groups *kmaps;
43};
44
45static inline struct kmap *map__kmap(struct map *self)
46{
47 return (struct kmap *)(self + 1);
48}
49
50static inline u64 map__map_ip(struct map *map, u64 ip)
51{
52 return ip - map->start + map->pgoff;
53}
54
55static inline u64 map__unmap_ip(struct map *map, u64 ip)
56{
57 return ip + map->start - map->pgoff;
58}
59
60static inline u64 identity__map_ip(struct map *map __used, u64 ip)
61{
62 return ip;
63}
64
65
66/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
67u64 map__rip_2objdump(struct map *map, u64 rip);
68u64 map__objdump_2ip(struct map *map, u64 addr);
69
70struct symbol;
71struct mmap_event;
72
73typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
74
75void map__init(struct map *self, enum map_type type,
76 u64 start, u64 end, u64 pgoff, struct dso *dso);
77struct map *map__new(struct mmap_event *event, enum map_type,
78 char *cwd, int cwdlen);
79void map__delete(struct map *self);
80struct map *map__clone(struct map *self);
81int map__overlap(struct map *l, struct map *r);
82size_t map__fprintf(struct map *self, FILE *fp);
83
84int map__load(struct map *self, symbol_filter_t filter);
85struct symbol *map__find_symbol(struct map *self,
86 u64 addr, symbol_filter_t filter);
87struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
88 symbol_filter_t filter);
89void map__fixup_start(struct map *self);
90void map__fixup_end(struct map *self);
91
92void map__reloc_vmlinux(struct map *self);
93
94#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5bc0fb016b2..05d0c5c2030c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -450,7 +450,8 @@ parse_single_tracepoint_event(char *sys_name,
450/* sys + ':' + event + ':' + flags*/ 450/* sys + ':' + event + ':' + flags*/
451#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) 451#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
452static enum event_result 452static enum event_result
453parse_subsystem_tracepoint_event(char *sys_name, char *flags) 453parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
454 char *flags)
454{ 455{
455 char evt_path[MAXPATHLEN]; 456 char evt_path[MAXPATHLEN];
456 struct dirent *evt_ent; 457 struct dirent *evt_ent;
@@ -474,6 +475,9 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
474 || !strcmp(evt_ent->d_name, "filter")) 475 || !strcmp(evt_ent->d_name, "filter"))
475 continue; 476 continue;
476 477
478 if (!strglobmatch(evt_ent->d_name, evt_exp))
479 continue;
480
477 len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name, 481 len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
478 evt_ent->d_name, flags ? ":" : "", 482 evt_ent->d_name, flags ? ":" : "",
479 flags ?: ""); 483 flags ?: "");
@@ -522,9 +526,10 @@ static enum event_result parse_tracepoint_event(const char **strp,
522 if (evt_length >= MAX_EVENT_LENGTH) 526 if (evt_length >= MAX_EVENT_LENGTH)
523 return EVT_FAILED; 527 return EVT_FAILED;
524 528
525 if (!strcmp(evt_name, "*")) { 529 if (strpbrk(evt_name, "*?")) {
526 *strp = evt_name + evt_length; 530 *strp = evt_name + evt_length;
527 return parse_subsystem_tracepoint_event(sys_name, flags); 531 return parse_multiple_tracepoint_event(sys_name, evt_name,
532 flags);
528 } else 533 } else
529 return parse_single_tracepoint_event(sys_name, evt_name, 534 return parse_single_tracepoint_event(sys_name, evt_name,
530 evt_length, flags, 535 evt_length, flags,
@@ -753,11 +758,11 @@ modifier:
753 return ret; 758 return ret;
754} 759}
755 760
756static void store_event_type(const char *orgname) 761static int store_event_type(const char *orgname)
757{ 762{
758 char filename[PATH_MAX], *c; 763 char filename[PATH_MAX], *c;
759 FILE *file; 764 FILE *file;
760 int id; 765 int id, n;
761 766
762 sprintf(filename, "%s/", debugfs_path); 767 sprintf(filename, "%s/", debugfs_path);
763 strncat(filename, orgname, strlen(orgname)); 768 strncat(filename, orgname, strlen(orgname));
@@ -769,11 +774,14 @@ static void store_event_type(const char *orgname)
769 774
770 file = fopen(filename, "r"); 775 file = fopen(filename, "r");
771 if (!file) 776 if (!file)
772 return; 777 return 0;
773 if (fscanf(file, "%i", &id) < 1) 778 n = fscanf(file, "%i", &id);
774 die("cannot store event ID");
775 fclose(file); 779 fclose(file);
776 perf_header__push_event(id, orgname); 780 if (n < 1) {
781 pr_err("cannot store event ID\n");
782 return -EINVAL;
783 }
784 return perf_header__push_event(id, orgname);
777} 785}
778 786
779int parse_events(const struct option *opt __used, const char *str, int unset __used) 787int parse_events(const struct option *opt __used, const char *str, int unset __used)
@@ -782,7 +790,8 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
782 enum event_result ret; 790 enum event_result ret;
783 791
784 if (strchr(str, ':')) 792 if (strchr(str, ':'))
785 store_event_type(str); 793 if (store_event_type(str) < 0)
794 return -1;
786 795
787 for (;;) { 796 for (;;) {
788 if (nr_counters == MAX_COUNTERS) 797 if (nr_counters == MAX_COUNTERS)
@@ -835,11 +844,12 @@ int parse_filter(const struct option *opt __used, const char *str,
835} 844}
836 845
837static const char * const event_type_descriptors[] = { 846static const char * const event_type_descriptors[] = {
838 "",
839 "Hardware event", 847 "Hardware event",
840 "Software event", 848 "Software event",
841 "Tracepoint event", 849 "Tracepoint event",
842 "Hardware cache event", 850 "Hardware cache event",
851 "Raw hardware event descriptor",
852 "Hardware breakpoint",
843}; 853};
844 854
845/* 855/*
@@ -872,7 +882,7 @@ static void print_tracepoint_events(void)
872 snprintf(evt_path, MAXPATHLEN, "%s:%s", 882 snprintf(evt_path, MAXPATHLEN, "%s:%s",
873 sys_dirent.d_name, evt_dirent.d_name); 883 sys_dirent.d_name, evt_dirent.d_name);
874 printf(" %-42s [%s]\n", evt_path, 884 printf(" %-42s [%s]\n", evt_path,
875 event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); 885 event_type_descriptors[PERF_TYPE_TRACEPOINT]);
876 } 886 }
877 closedir(evt_dir); 887 closedir(evt_dir);
878 } 888 }
@@ -892,9 +902,7 @@ void print_events(void)
892 printf("List of pre-defined events (to be used in -e):\n"); 902 printf("List of pre-defined events (to be used in -e):\n");
893 903
894 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { 904 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
895 type = syms->type + 1; 905 type = syms->type;
896 if (type >= ARRAY_SIZE(event_type_descriptors))
897 type = 0;
898 906
899 if (type != prev_type) 907 if (type != prev_type)
900 printf("\n"); 908 printf("\n");
@@ -919,17 +927,19 @@ void print_events(void)
919 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { 927 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
920 printf(" %-42s [%s]\n", 928 printf(" %-42s [%s]\n",
921 event_cache_name(type, op, i), 929 event_cache_name(type, op, i),
922 event_type_descriptors[4]); 930 event_type_descriptors[PERF_TYPE_HW_CACHE]);
923 } 931 }
924 } 932 }
925 } 933 }
926 934
927 printf("\n"); 935 printf("\n");
928 printf(" %-42s [raw hardware event descriptor]\n", 936 printf(" %-42s [%s]\n",
929 "rNNN"); 937 "rNNN", event_type_descriptors[PERF_TYPE_RAW]);
930 printf("\n"); 938 printf("\n");
931 939
932 printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]"); 940 printf(" %-42s [%s]\n",
941 "mem:<addr>[:access]",
942 event_type_descriptors[PERF_TYPE_BREAKPOINT]);
933 printf("\n"); 943 printf("\n");
934 944
935 print_tracepoint_events(); 945 print_tracepoint_events();
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fde17b090a47..8f0568849691 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -37,6 +37,8 @@
37#include "string.h" 37#include "string.h"
38#include "strlist.h" 38#include "strlist.h"
39#include "debug.h" 39#include "debug.h"
40#include "cache.h"
41#include "color.h"
40#include "parse-events.h" /* For debugfs_path */ 42#include "parse-events.h" /* For debugfs_path */
41#include "probe-event.h" 43#include "probe-event.h"
42 44
@@ -62,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
62 return ret; 64 return ret;
63} 65}
64 66
67void parse_line_range_desc(const char *arg, struct line_range *lr)
68{
69 const char *ptr;
70 char *tmp;
71 /*
72 * <Syntax>
73 * SRC:SLN[+NUM|-ELN]
74 * FUNC[:SLN[+NUM|-ELN]]
75 */
76 ptr = strchr(arg, ':');
77 if (ptr) {
78 lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
79 if (*tmp == '+')
80 lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
81 &tmp, 0);
82 else if (*tmp == '-')
83 lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
84 else
85 lr->end = 0;
86 pr_debug("Line range is %u to %u\n", lr->start, lr->end);
87 if (lr->end && lr->start > lr->end)
88 semantic_error("Start line must be smaller"
89 " than end line.");
90 if (*tmp != '\0')
91 semantic_error("Tailing with invalid character '%d'.",
92 *tmp);
93 tmp = strndup(arg, (ptr - arg));
94 } else
95 tmp = strdup(arg);
96
97 if (strchr(tmp, '.'))
98 lr->file = tmp;
99 else
100 lr->function = tmp;
101}
102
65/* Check the name is good for event/group */ 103/* Check the name is good for event/group */
66static bool check_event_name(const char *name) 104static bool check_event_name(const char *name)
67{ 105{
@@ -370,7 +408,7 @@ static int open_kprobe_events(int flags, int mode)
370 if (ret < 0) { 408 if (ret < 0) {
371 if (errno == ENOENT) 409 if (errno == ENOENT)
372 die("kprobe_events file does not exist -" 410 die("kprobe_events file does not exist -"
373 " please rebuild with CONFIG_KPROBE_TRACER."); 411 " please rebuild with CONFIG_KPROBE_EVENT.");
374 else 412 else
375 die("Could not open kprobe_events file: %s", 413 die("Could not open kprobe_events file: %s",
376 strerror(errno)); 414 strerror(errno));
@@ -457,6 +495,8 @@ void show_perf_probe_events(void)
457 struct strlist *rawlist; 495 struct strlist *rawlist;
458 struct str_node *ent; 496 struct str_node *ent;
459 497
498 setup_pager();
499
460 memset(&pp, 0, sizeof(pp)); 500 memset(&pp, 0, sizeof(pp));
461 fd = open_kprobe_events(O_RDONLY, 0); 501 fd = open_kprobe_events(O_RDONLY, 0);
462 rawlist = get_trace_kprobe_event_rawlist(fd); 502 rawlist = get_trace_kprobe_event_rawlist(fd);
@@ -678,3 +718,66 @@ void del_trace_kprobe_events(struct strlist *dellist)
678 close(fd); 718 close(fd);
679} 719}
680 720
721#define LINEBUF_SIZE 256
722
723static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
724{
725 char buf[LINEBUF_SIZE];
726 const char *color = PERF_COLOR_BLUE;
727
728 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
729 goto error;
730 if (!skip) {
731 if (show_num)
732 fprintf(stdout, "%7u %s", l, buf);
733 else
734 color_fprintf(stdout, color, " %s", buf);
735 }
736
737 while (strlen(buf) == LINEBUF_SIZE - 1 &&
738 buf[LINEBUF_SIZE - 2] != '\n') {
739 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
740 goto error;
741 if (!skip) {
742 if (show_num)
743 fprintf(stdout, "%s", buf);
744 else
745 color_fprintf(stdout, color, "%s", buf);
746 }
747 }
748 return;
749error:
750 if (feof(fp))
751 die("Source file is shorter than expected.");
752 else
753 die("File read error: %s", strerror(errno));
754}
755
756void show_line_range(struct line_range *lr)
757{
758 unsigned int l = 1;
759 struct line_node *ln;
760 FILE *fp;
761
762 setup_pager();
763
764 if (lr->function)
765 fprintf(stdout, "<%s:%d>\n", lr->function,
766 lr->start - lr->offset);
767 else
768 fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
769
770 fp = fopen(lr->path, "r");
771 if (fp == NULL)
772 die("Failed to open %s: %s", lr->path, strerror(errno));
773 /* Skip to starting line number */
774 while (l < lr->start)
775 show_one_line(fp, l++, true, false);
776
777 list_for_each_entry(ln, &lr->line_list, list) {
778 while (ln->line > l)
779 show_one_line(fp, (l++) - lr->offset, false, false);
780 show_one_line(fp, (l++) - lr->offset, false, true);
781 }
782 fclose(fp);
783}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499118c0..711287d4baea 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
5#include "probe-finder.h" 5#include "probe-finder.h"
6#include "strlist.h" 6#include "strlist.h"
7 7
8extern void parse_line_range_desc(const char *arg, struct line_range *lr);
8extern void parse_perf_probe_event(const char *str, struct probe_point *pp, 9extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
9 bool *need_dwarf); 10 bool *need_dwarf);
10extern int synthesize_perf_probe_point(struct probe_point *pp); 11extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes,
15 bool force_add); 16 bool force_add);
16extern void del_trace_kprobe_events(struct strlist *dellist); 17extern void del_trace_kprobe_events(struct strlist *dellist);
17extern void show_perf_probe_events(void); 18extern void show_perf_probe_events(void);
19extern void show_line_range(struct line_range *lr);
18 20
19/* Maximum index number of event-name postfix */ 21/* Maximum index number of event-name postfix */
20#define MAX_EVENT_INDEX 1024 22#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4b852c0d16a5..1b2124d12f68 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
140 return found; 140 return found;
141} 141}
142 142
143static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
144{
145 Dwarf_Signed cnt, i;
146 char **srcs;
147 int ret = 0;
148
149 if (!buf || !fno)
150 return -EINVAL;
151
152 ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
153 if (ret == DW_DLV_OK) {
154 if ((Dwarf_Unsigned)cnt > fno - 1) {
155 *buf = strdup(srcs[fno - 1]);
156 ret = 0;
157 pr_debug("found filename: %s\n", *buf);
158 } else
159 ret = -ENOENT;
160 for (i = 0; i < cnt; i++)
161 dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
162 dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
163 } else
164 ret = -EINVAL;
165 return ret;
166}
167
143/* Compare diename and tname */ 168/* Compare diename and tname */
144static int die_compare_name(Dwarf_Die dw_die, const char *tname) 169static int die_compare_name(Dwarf_Die dw_die, const char *tname)
145{ 170{
@@ -402,11 +427,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
402 } else if (op == DW_OP_regx) { 427 } else if (op == DW_OP_regx) {
403 regn = loc->lr_number; 428 regn = loc->lr_number;
404 } else 429 } else
405 die("Dwarf_OP %d is not supported.\n", op); 430 die("Dwarf_OP %d is not supported.", op);
406 431
407 regs = get_arch_regstr(regn); 432 regs = get_arch_regstr(regn);
408 if (!regs) 433 if (!regs)
409 die("%lld exceeds max register number.\n", regn); 434 die("%lld exceeds max register number.", regn);
410 435
411 if (deref) 436 if (deref)
412 ret = snprintf(pf->buf, pf->len, 437 ret = snprintf(pf->buf, pf->len,
@@ -438,7 +463,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
438 return ; 463 return ;
439error: 464error:
440 die("Failed to find the location of %s at this address.\n" 465 die("Failed to find the location of %s at this address.\n"
441 " Perhaps, it has been optimized out.\n", pf->var); 466 " Perhaps, it has been optimized out.", pf->var);
442} 467}
443 468
444static int variable_callback(struct die_link *dlink, void *data) 469static int variable_callback(struct die_link *dlink, void *data)
@@ -476,7 +501,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
476 /* Search child die for local variables and parameters. */ 501 /* Search child die for local variables and parameters. */
477 ret = search_die_from_children(sp_die, variable_callback, pf); 502 ret = search_die_from_children(sp_die, variable_callback, pf);
478 if (!ret) 503 if (!ret)
479 die("Failed to find '%s' in this function.\n", pf->var); 504 die("Failed to find '%s' in this function.", pf->var);
480} 505}
481 506
482/* Get a frame base on the address */ 507/* Get a frame base on the address */
@@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data)
567} 592}
568 593
569/* Find probe point from its line number */ 594/* Find probe point from its line number */
570static void find_by_line(struct probe_finder *pf) 595static void find_probe_point_by_line(struct probe_finder *pf)
571{ 596{
572 Dwarf_Signed cnt, i, clm; 597 Dwarf_Signed cnt, i, clm;
573 Dwarf_Line *lines; 598 Dwarf_Line *lines;
@@ -602,7 +627,7 @@ static void find_by_line(struct probe_finder *pf)
602 ret = search_die_from_children(pf->cu_die, 627 ret = search_die_from_children(pf->cu_die,
603 probeaddr_callback, pf); 628 probeaddr_callback, pf);
604 if (ret == 0) 629 if (ret == 0)
605 die("Probe point is not found in subprograms.\n"); 630 die("Probe point is not found in subprograms.");
606 /* Continuing, because target line might be inlined. */ 631 /* Continuing, because target line might be inlined. */
607 } 632 }
608 dwarf_srclines_dealloc(__dw_debug, lines, cnt); 633 dwarf_srclines_dealloc(__dw_debug, lines, cnt);
@@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
626 pf->fno = die_get_decl_file(dlink->die); 651 pf->fno = die_get_decl_file(dlink->die);
627 pf->lno = die_get_decl_line(dlink->die) 652 pf->lno = die_get_decl_line(dlink->die)
628 + pp->line; 653 + pp->line;
629 find_by_line(pf); 654 find_probe_point_by_line(pf);
630 return 1; 655 return 1;
631 } 656 }
632 if (die_inlined_subprogram(dlink->die)) { 657 if (die_inlined_subprogram(dlink->die)) {
@@ -661,7 +686,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
661 !die_inlined_subprogram(lk->die)) 686 !die_inlined_subprogram(lk->die))
662 goto found; 687 goto found;
663 } 688 }
664 die("Failed to find real subprogram.\n"); 689 die("Failed to find real subprogram.");
665found: 690found:
666 /* Get offset from subprogram */ 691 /* Get offset from subprogram */
667 ret = die_within_subprogram(lk->die, pf->addr, &offs); 692 ret = die_within_subprogram(lk->die, pf->addr, &offs);
@@ -673,7 +698,7 @@ found:
673 return 0; 698 return 0;
674} 699}
675 700
676static void find_by_func(struct probe_finder *pf) 701static void find_probe_point_by_func(struct probe_finder *pf)
677{ 702{
678 search_die_from_children(pf->cu_die, probefunc_callback, pf); 703 search_die_from_children(pf->cu_die, probefunc_callback, pf);
679} 704}
@@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp)
714 if (ret == DW_DLV_NO_ENTRY) 739 if (ret == DW_DLV_NO_ENTRY)
715 pf.cu_base = 0; 740 pf.cu_base = 0;
716 if (pp->function) 741 if (pp->function)
717 find_by_func(&pf); 742 find_probe_point_by_func(&pf);
718 else { 743 else {
719 pf.lno = pp->line; 744 pf.lno = pp->line;
720 find_by_line(&pf); 745 find_probe_point_by_line(&pf);
721 } 746 }
722 } 747 }
723 dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE); 748 dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp)
728 return pp->found; 753 return pp->found;
729} 754}
730 755
756
757static void line_range_add_line(struct line_range *lr, unsigned int line)
758{
759 struct line_node *ln;
760 struct list_head *p;
761
762 /* Reverse search, because new line will be the last one */
763 list_for_each_entry_reverse(ln, &lr->line_list, list) {
764 if (ln->line < line) {
765 p = &ln->list;
766 goto found;
767 } else if (ln->line == line) /* Already exist */
768 return ;
769 }
770 /* List is empty, or the smallest entry */
771 p = &lr->line_list;
772found:
773 pr_debug("Debug: add a line %u\n", line);
774 ln = zalloc(sizeof(struct line_node));
775 DIE_IF(ln == NULL);
776 ln->line = line;
777 INIT_LIST_HEAD(&ln->list);
778 list_add(&ln->list, p);
779}
780
781/* Find line range from its line number */
782static void find_line_range_by_line(struct line_finder *lf)
783{
784 Dwarf_Signed cnt, i;
785 Dwarf_Line *lines;
786 Dwarf_Unsigned lineno = 0;
787 Dwarf_Unsigned fno;
788 Dwarf_Addr addr;
789 int ret;
790
791 ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
792 DIE_IF(ret != DW_DLV_OK);
793
794 for (i = 0; i < cnt; i++) {
795 ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
796 DIE_IF(ret != DW_DLV_OK);
797 if (fno != lf->fno)
798 continue;
799
800 ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
801 DIE_IF(ret != DW_DLV_OK);
802 if (lf->lno_s > lineno || lf->lno_e < lineno)
803 continue;
804
805 /* Filter line in the function address range */
806 if (lf->addr_s && lf->addr_e) {
807 ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
808 DIE_IF(ret != DW_DLV_OK);
809 if (lf->addr_s > addr || lf->addr_e <= addr)
810 continue;
811 }
812 line_range_add_line(lf->lr, (unsigned int)lineno);
813 }
814 dwarf_srclines_dealloc(__dw_debug, lines, cnt);
815 if (!list_empty(&lf->lr->line_list))
816 lf->found = 1;
817}
818
819/* Search function from function name */
820static int linefunc_callback(struct die_link *dlink, void *data)
821{
822 struct line_finder *lf = (struct line_finder *)data;
823 struct line_range *lr = lf->lr;
824 Dwarf_Half tag;
825 int ret;
826
827 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
828 DIE_IF(ret == DW_DLV_ERROR);
829 if (tag == DW_TAG_subprogram &&
830 die_compare_name(dlink->die, lr->function) == 0) {
831 /* Get the address range of this function */
832 ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
833 if (ret == DW_DLV_OK)
834 ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
835 DIE_IF(ret == DW_DLV_ERROR);
836 if (ret == DW_DLV_NO_ENTRY) {
837 lf->addr_s = 0;
838 lf->addr_e = 0;
839 }
840
841 lf->fno = die_get_decl_file(dlink->die);
842 lr->offset = die_get_decl_line(dlink->die);;
843 lf->lno_s = lr->offset + lr->start;
844 if (!lr->end)
845 lf->lno_e = (Dwarf_Unsigned)-1;
846 else
847 lf->lno_e = lr->offset + lr->end;
848 lr->start = lf->lno_s;
849 lr->end = lf->lno_e;
850 find_line_range_by_line(lf);
851 /* If we find a target function, this should be end. */
852 lf->found = 1;
853 return 1;
854 }
855 return 0;
856}
857
858static void find_line_range_by_func(struct line_finder *lf)
859{
860 search_die_from_children(lf->cu_die, linefunc_callback, lf);
861}
862
863int find_line_range(int fd, struct line_range *lr)
864{
865 Dwarf_Half addr_size = 0;
866 Dwarf_Unsigned next_cuh = 0;
867 int ret;
868 struct line_finder lf = {.lr = lr};
869
870 ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
871 if (ret != DW_DLV_OK)
872 return -ENOENT;
873
874 while (!lf.found) {
875 /* Search CU (Compilation Unit) */
876 ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
877 &addr_size, &next_cuh, &__dw_error);
878 DIE_IF(ret == DW_DLV_ERROR);
879 if (ret == DW_DLV_NO_ENTRY)
880 break;
881
882 /* Get the DIE(Debugging Information Entry) of this CU */
883 ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
884 DIE_IF(ret != DW_DLV_OK);
885
886 /* Check if target file is included. */
887 if (lr->file)
888 lf.fno = cu_find_fileno(lf.cu_die, lr->file);
889
890 if (!lr->file || lf.fno) {
891 if (lr->function)
892 find_line_range_by_func(&lf);
893 else {
894 lf.lno_s = lr->start;
895 if (!lr->end)
896 lf.lno_e = (Dwarf_Unsigned)-1;
897 else
898 lf.lno_e = lr->end;
899 find_line_range_by_line(&lf);
900 }
901 /* Get the real file path */
902 if (lf.found)
903 cu_get_filename(lf.cu_die, lf.fno, &lr->path);
904 }
905 dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
906 }
907 ret = dwarf_finish(__dw_debug, &__dw_error);
908 DIE_IF(ret != DW_DLV_OK);
909 return lf.found;
910}
911
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index a4086aaddb73..972b386116f1 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -1,6 +1,8 @@
1#ifndef _PROBE_FINDER_H 1#ifndef _PROBE_FINDER_H
2#define _PROBE_FINDER_H 2#define _PROBE_FINDER_H
3 3
4#include "util.h"
5
4#define MAX_PATH_LEN 256 6#define MAX_PATH_LEN 256
5#define MAX_PROBE_BUFFER 1024 7#define MAX_PROBE_BUFFER 1024
6#define MAX_PROBES 128 8#define MAX_PROBES 128
@@ -32,8 +34,26 @@ struct probe_point {
32 char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ 34 char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
33}; 35};
34 36
37/* Line number container */
38struct line_node {
39 struct list_head list;
40 unsigned int line;
41};
42
43/* Line range */
44struct line_range {
45 char *file; /* File name */
46 char *function; /* Function name */
47 unsigned int start; /* Start line number */
48 unsigned int end; /* End line number */
49 unsigned int offset; /* Start line offset */
50 char *path; /* Real path name */
51 struct list_head line_list; /* Visible lines */
52};
53
35#ifndef NO_LIBDWARF 54#ifndef NO_LIBDWARF
36extern int find_probepoint(int fd, struct probe_point *pp); 55extern int find_probepoint(int fd, struct probe_point *pp);
56extern int find_line_range(int fd, struct line_range *lr);
37 57
38/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */ 58/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
39#ifndef _MIPS_SZLONG 59#ifndef _MIPS_SZLONG
@@ -60,6 +80,19 @@ struct probe_finder {
60 char *buf; /* Current output buffer */ 80 char *buf; /* Current output buffer */
61 int len; /* Length of output buffer */ 81 int len; /* Length of output buffer */
62}; 82};
83
84struct line_finder {
85 struct line_range *lr; /* Target line range */
86
87 Dwarf_Unsigned fno; /* File number */
88 Dwarf_Unsigned lno_s; /* Start line number */
89 Dwarf_Unsigned lno_e; /* End line number */
90 Dwarf_Addr addr_s; /* Start address */
91 Dwarf_Addr addr_e; /* End address */
92 Dwarf_Die cu_die; /* Current CU */
93 int found;
94};
95
63#endif /* NO_LIBDWARF */ 96#endif /* NO_LIBDWARF */
64 97
65#endif /*_PROBE_FINDER_H */ 98#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 6d6d76b8a21e..5376378e0cfc 100644
--- a/tools/perf/util/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,10 +25,16 @@
25#include <ctype.h> 25#include <ctype.h>
26#include <errno.h> 26#include <errno.h>
27 27
28#include "../perf.h" 28#include "../../perf.h"
29#include "util.h" 29#include "../util.h"
30#include "trace-event.h" 30#include "../trace-event.h"
31#include "trace-event-perl.h" 31
32#include <EXTERN.h>
33#include <perl.h>
34
35void boot_Perf__Trace__Context(pTHX_ CV *cv);
36void boot_DynaLoader(pTHX_ CV *cv);
37typedef PerlInterpreter * INTERP;
32 38
33void xs_init(pTHX); 39void xs_init(pTHX);
34 40
@@ -49,7 +55,7 @@ INTERP my_perl;
49 55
50struct event *events[FTRACE_MAX_EVENT]; 56struct event *events[FTRACE_MAX_EVENT];
51 57
52static struct scripting_context *scripting_context; 58extern struct scripting_context *scripting_context;
53 59
54static char *cur_field_name; 60static char *cur_field_name;
55static int zero_flag_atom; 61static int zero_flag_atom;
@@ -239,33 +245,6 @@ static inline struct event *find_cache_event(int type)
239 return event; 245 return event;
240} 246}
241 247
242int common_pc(struct scripting_context *context)
243{
244 int pc;
245
246 pc = parse_common_pc(context->event_data);
247
248 return pc;
249}
250
251int common_flags(struct scripting_context *context)
252{
253 int flags;
254
255 flags = parse_common_flags(context->event_data);
256
257 return flags;
258}
259
260int common_lock_depth(struct scripting_context *context)
261{
262 int lock_depth;
263
264 lock_depth = parse_common_lock_depth(context->event_data);
265
266 return lock_depth;
267}
268
269static void perl_process_event(int cpu, void *data, 248static void perl_process_event(int cpu, void *data,
270 int size __unused, 249 int size __unused,
271 unsigned long long nsecs, char *comm) 250 unsigned long long nsecs, char *comm)
@@ -587,75 +566,3 @@ struct scripting_ops perl_scripting_ops = {
587 .process_event = perl_process_event, 566 .process_event = perl_process_event,
588 .generate_script = perl_generate_script, 567 .generate_script = perl_generate_script,
589}; 568};
590
591static void print_unsupported_msg(void)
592{
593 fprintf(stderr, "Perl scripting not supported."
594 " Install libperl and rebuild perf to enable it.\n"
595 "For example:\n # apt-get install libperl-dev (ubuntu)"
596 "\n # yum install perl-ExtUtils-Embed (Fedora)"
597 "\n etc.\n");
598}
599
600static int perl_start_script_unsupported(const char *script __unused,
601 int argc __unused,
602 const char **argv __unused)
603{
604 print_unsupported_msg();
605
606 return -1;
607}
608
609static int perl_stop_script_unsupported(void)
610{
611 return 0;
612}
613
614static void perl_process_event_unsupported(int cpu __unused,
615 void *data __unused,
616 int size __unused,
617 unsigned long long nsecs __unused,
618 char *comm __unused)
619{
620}
621
622static int perl_generate_script_unsupported(const char *outfile __unused)
623{
624 print_unsupported_msg();
625
626 return -1;
627}
628
629struct scripting_ops perl_scripting_unsupported_ops = {
630 .name = "Perl",
631 .start_script = perl_start_script_unsupported,
632 .stop_script = perl_stop_script_unsupported,
633 .process_event = perl_process_event_unsupported,
634 .generate_script = perl_generate_script_unsupported,
635};
636
637static void register_perl_scripting(struct scripting_ops *scripting_ops)
638{
639 int err;
640 err = script_spec_register("Perl", scripting_ops);
641 if (err)
642 die("error registering Perl script extension");
643
644 err = script_spec_register("pl", scripting_ops);
645 if (err)
646 die("error registering pl script extension");
647
648 scripting_context = malloc(sizeof(struct scripting_context));
649}
650
651#ifdef NO_LIBPERL
652void setup_perl_scripting(void)
653{
654 register_perl_scripting(&perl_scripting_unsupported_ops);
655}
656#else
657void setup_perl_scripting(void)
658{
659 register_perl_scripting(&perl_scripting_ops);
660}
661#endif
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
new file mode 100644
index 000000000000..33a414bbba3e
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,573 @@
1/*
2 * trace-event-python. Feed trace events to an embedded Python interpreter.
3 *
4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include <Python.h>
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <ctype.h>
28#include <errno.h>
29
30#include "../../perf.h"
31#include "../util.h"
32#include "../trace-event.h"
33
34PyMODINIT_FUNC initperf_trace_context(void);
35
36#define FTRACE_MAX_EVENT \
37 ((1 << (sizeof(unsigned short) * 8)) - 1)
38
39struct event *events[FTRACE_MAX_EVENT];
40
41#define MAX_FIELDS 64
42#define N_COMMON_FIELDS 7
43
44extern struct scripting_context *scripting_context;
45
46static char *cur_field_name;
47static int zero_flag_atom;
48
49static PyObject *main_module, *main_dict;
50
51static void handler_call_die(const char *handler_name)
52{
53 PyErr_Print();
54 Py_FatalError("problem in Python trace event handler");
55}
56
57static void define_value(enum print_arg_type field_type,
58 const char *ev_name,
59 const char *field_name,
60 const char *field_value,
61 const char *field_str)
62{
63 const char *handler_name = "define_flag_value";
64 PyObject *handler, *t, *retval;
65 unsigned long long value;
66 unsigned n = 0;
67
68 if (field_type == PRINT_SYMBOL)
69 handler_name = "define_symbolic_value";
70
71 t = PyTuple_New(4);
72 if (!t)
73 Py_FatalError("couldn't create Python tuple");
74
75 value = eval_flag(field_value);
76
77 PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
78 PyTuple_SetItem(t, n++, PyString_FromString(field_name));
79 PyTuple_SetItem(t, n++, PyInt_FromLong(value));
80 PyTuple_SetItem(t, n++, PyString_FromString(field_str));
81
82 handler = PyDict_GetItemString(main_dict, handler_name);
83 if (handler && PyCallable_Check(handler)) {
84 retval = PyObject_CallObject(handler, t);
85 if (retval == NULL)
86 handler_call_die(handler_name);
87 }
88
89 Py_DECREF(t);
90}
91
92static void define_values(enum print_arg_type field_type,
93 struct print_flag_sym *field,
94 const char *ev_name,
95 const char *field_name)
96{
97 define_value(field_type, ev_name, field_name, field->value,
98 field->str);
99
100 if (field->next)
101 define_values(field_type, field->next, ev_name, field_name);
102}
103
104static void define_field(enum print_arg_type field_type,
105 const char *ev_name,
106 const char *field_name,
107 const char *delim)
108{
109 const char *handler_name = "define_flag_field";
110 PyObject *handler, *t, *retval;
111 unsigned n = 0;
112
113 if (field_type == PRINT_SYMBOL)
114 handler_name = "define_symbolic_field";
115
116 if (field_type == PRINT_FLAGS)
117 t = PyTuple_New(3);
118 else
119 t = PyTuple_New(2);
120 if (!t)
121 Py_FatalError("couldn't create Python tuple");
122
123 PyTuple_SetItem(t, n++, PyString_FromString(ev_name));
124 PyTuple_SetItem(t, n++, PyString_FromString(field_name));
125 if (field_type == PRINT_FLAGS)
126 PyTuple_SetItem(t, n++, PyString_FromString(delim));
127
128 handler = PyDict_GetItemString(main_dict, handler_name);
129 if (handler && PyCallable_Check(handler)) {
130 retval = PyObject_CallObject(handler, t);
131 if (retval == NULL)
132 handler_call_die(handler_name);
133 }
134
135 Py_DECREF(t);
136}
137
138static void define_event_symbols(struct event *event,
139 const char *ev_name,
140 struct print_arg *args)
141{
142 switch (args->type) {
143 case PRINT_NULL:
144 break;
145 case PRINT_ATOM:
146 define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
147 args->atom.atom);
148 zero_flag_atom = 0;
149 break;
150 case PRINT_FIELD:
151 if (cur_field_name)
152 free(cur_field_name);
153 cur_field_name = strdup(args->field.name);
154 break;
155 case PRINT_FLAGS:
156 define_event_symbols(event, ev_name, args->flags.field);
157 define_field(PRINT_FLAGS, ev_name, cur_field_name,
158 args->flags.delim);
159 define_values(PRINT_FLAGS, args->flags.flags, ev_name,
160 cur_field_name);
161 break;
162 case PRINT_SYMBOL:
163 define_event_symbols(event, ev_name, args->symbol.field);
164 define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
165 define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
166 cur_field_name);
167 break;
168 case PRINT_STRING:
169 break;
170 case PRINT_TYPE:
171 define_event_symbols(event, ev_name, args->typecast.item);
172 break;
173 case PRINT_OP:
174 if (strcmp(args->op.op, ":") == 0)
175 zero_flag_atom = 1;
176 define_event_symbols(event, ev_name, args->op.left);
177 define_event_symbols(event, ev_name, args->op.right);
178 break;
179 default:
180 /* we should warn... */
181 return;
182 }
183
184 if (args->next)
185 define_event_symbols(event, ev_name, args->next);
186}
187
188static inline struct event *find_cache_event(int type)
189{
190 static char ev_name[256];
191 struct event *event;
192
193 if (events[type])
194 return events[type];
195
196 events[type] = event = trace_find_event(type);
197 if (!event)
198 return NULL;
199
200 sprintf(ev_name, "%s__%s", event->system, event->name);
201
202 define_event_symbols(event, ev_name, event->print_fmt.args);
203
204 return event;
205}
206
207static void python_process_event(int cpu, void *data,
208 int size __unused,
209 unsigned long long nsecs, char *comm)
210{
211 PyObject *handler, *retval, *context, *t;
212 static char handler_name[256];
213 struct format_field *field;
214 unsigned long long val;
215 unsigned long s, ns;
216 struct event *event;
217 unsigned n = 0;
218 int type;
219 int pid;
220
221 t = PyTuple_New(MAX_FIELDS);
222 if (!t)
223 Py_FatalError("couldn't create Python tuple");
224
225 type = trace_parse_common_type(data);
226
227 event = find_cache_event(type);
228 if (!event)
229 die("ug! no event found for type %d", type);
230
231 pid = trace_parse_common_pid(data);
232
233 sprintf(handler_name, "%s__%s", event->system, event->name);
234
235 s = nsecs / NSECS_PER_SEC;
236 ns = nsecs - s * NSECS_PER_SEC;
237
238 scripting_context->event_data = data;
239
240 context = PyCObject_FromVoidPtr(scripting_context, NULL);
241
242 PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
243 PyTuple_SetItem(t, n++,
244 PyCObject_FromVoidPtr(scripting_context, NULL));
245 PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
246 PyTuple_SetItem(t, n++, PyInt_FromLong(s));
247 PyTuple_SetItem(t, n++, PyInt_FromLong(ns));
248 PyTuple_SetItem(t, n++, PyInt_FromLong(pid));
249 PyTuple_SetItem(t, n++, PyString_FromString(comm));
250
251 for (field = event->format.fields; field; field = field->next) {
252 if (field->flags & FIELD_IS_STRING) {
253 int offset;
254 if (field->flags & FIELD_IS_DYNAMIC) {
255 offset = *(int *)(data + field->offset);
256 offset &= 0xffff;
257 } else
258 offset = field->offset;
259 PyTuple_SetItem(t, n++,
260 PyString_FromString((char *)data + offset));
261 } else { /* FIELD_IS_NUMERIC */
262 val = read_size(data + field->offset, field->size);
263 if (field->flags & FIELD_IS_SIGNED) {
264 PyTuple_SetItem(t, n++, PyInt_FromLong(val));
265 } else {
266 PyTuple_SetItem(t, n++, PyInt_FromLong(val));
267 }
268 }
269 }
270
271 if (_PyTuple_Resize(&t, n) == -1)
272 Py_FatalError("error resizing Python tuple");
273
274 handler = PyDict_GetItemString(main_dict, handler_name);
275 if (handler && PyCallable_Check(handler)) {
276 retval = PyObject_CallObject(handler, t);
277 if (retval == NULL)
278 handler_call_die(handler_name);
279 } else {
280 handler = PyDict_GetItemString(main_dict, "trace_unhandled");
281 if (handler && PyCallable_Check(handler)) {
282 if (_PyTuple_Resize(&t, N_COMMON_FIELDS) == -1)
283 Py_FatalError("error resizing Python tuple");
284
285 retval = PyObject_CallObject(handler, t);
286 if (retval == NULL)
287 handler_call_die("trace_unhandled");
288 }
289 }
290
291 Py_DECREF(t);
292}
293
294static int run_start_sub(void)
295{
296 PyObject *handler, *retval;
297 int err = 0;
298
299 main_module = PyImport_AddModule("__main__");
300 if (main_module == NULL)
301 return -1;
302 Py_INCREF(main_module);
303
304 main_dict = PyModule_GetDict(main_module);
305 if (main_dict == NULL) {
306 err = -1;
307 goto error;
308 }
309 Py_INCREF(main_dict);
310
311 handler = PyDict_GetItemString(main_dict, "trace_begin");
312 if (handler == NULL || !PyCallable_Check(handler))
313 goto out;
314
315 retval = PyObject_CallObject(handler, NULL);
316 if (retval == NULL)
317 handler_call_die("trace_begin");
318
319 Py_DECREF(retval);
320 return err;
321error:
322 Py_XDECREF(main_dict);
323 Py_XDECREF(main_module);
324out:
325 return err;
326}
327
328/*
329 * Start trace script
330 */
331static int python_start_script(const char *script, int argc, const char **argv)
332{
333 const char **command_line;
334 char buf[PATH_MAX];
335 int i, err = 0;
336 FILE *fp;
337
338 command_line = malloc((argc + 1) * sizeof(const char *));
339 command_line[0] = script;
340 for (i = 1; i < argc + 1; i++)
341 command_line[i] = argv[i - 1];
342
343 Py_Initialize();
344
345 initperf_trace_context();
346
347 PySys_SetArgv(argc + 1, (char **)command_line);
348
349 fp = fopen(script, "r");
350 if (!fp) {
351 sprintf(buf, "Can't open python script \"%s\"", script);
352 perror(buf);
353 err = -1;
354 goto error;
355 }
356
357 err = PyRun_SimpleFile(fp, script);
358 if (err) {
359 fprintf(stderr, "Error running python script %s\n", script);
360 goto error;
361 }
362
363 err = run_start_sub();
364 if (err) {
365 fprintf(stderr, "Error starting python script %s\n", script);
366 goto error;
367 }
368
369 free(command_line);
370 fprintf(stderr, "perf trace started with Python script %s\n\n",
371 script);
372
373 return err;
374error:
375 Py_Finalize();
376 free(command_line);
377
378 return err;
379}
380
381/*
382 * Stop trace script
383 */
384static int python_stop_script(void)
385{
386 PyObject *handler, *retval;
387 int err = 0;
388
389 handler = PyDict_GetItemString(main_dict, "trace_end");
390 if (handler == NULL || !PyCallable_Check(handler))
391 goto out;
392
393 retval = PyObject_CallObject(handler, NULL);
394 if (retval == NULL)
395 handler_call_die("trace_end");
396 else
397 Py_DECREF(retval);
398out:
399 Py_XDECREF(main_dict);
400 Py_XDECREF(main_module);
401 Py_Finalize();
402
403 fprintf(stderr, "\nperf trace Python script stopped\n");
404
405 return err;
406}
407
408static int python_generate_script(const char *outfile)
409{
410 struct event *event = NULL;
411 struct format_field *f;
412 char fname[PATH_MAX];
413 int not_first, count;
414 FILE *ofp;
415
416 sprintf(fname, "%s.py", outfile);
417 ofp = fopen(fname, "w");
418 if (ofp == NULL) {
419 fprintf(stderr, "couldn't open %s\n", fname);
420 return -1;
421 }
422 fprintf(ofp, "# perf trace event handlers, "
423 "generated by perf trace -g python\n");
424
425 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
426 " License version 2\n\n");
427
428 fprintf(ofp, "# The common_* event handler fields are the most useful "
429 "fields common to\n");
430
431 fprintf(ofp, "# all events. They don't necessarily correspond to "
432 "the 'common_*' fields\n");
433
434 fprintf(ofp, "# in the format files. Those fields not available as "
435 "handler params can\n");
436
437 fprintf(ofp, "# be retrieved using Python functions of the form "
438 "common_*(context).\n");
439
440 fprintf(ofp, "# See the perf-trace-python Documentation for the list "
441 "of available functions.\n\n");
442
443 fprintf(ofp, "import os\n");
444 fprintf(ofp, "import sys\n\n");
445
446 fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
447 fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
448 fprintf(ofp, "\nfrom perf_trace_context import *\n");
449 fprintf(ofp, "from Core import *\n\n\n");
450
451 fprintf(ofp, "def trace_begin():\n");
452 fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
453
454 fprintf(ofp, "def trace_end():\n");
455 fprintf(ofp, "\tprint \"in trace_end\"\n\n");
456
457 while ((event = trace_find_next_event(event))) {
458 fprintf(ofp, "def %s__%s(", event->system, event->name);
459 fprintf(ofp, "event_name, ");
460 fprintf(ofp, "context, ");
461 fprintf(ofp, "common_cpu,\n");
462 fprintf(ofp, "\tcommon_secs, ");
463 fprintf(ofp, "common_nsecs, ");
464 fprintf(ofp, "common_pid, ");
465 fprintf(ofp, "common_comm,\n\t");
466
467 not_first = 0;
468 count = 0;
469
470 for (f = event->format.fields; f; f = f->next) {
471 if (not_first++)
472 fprintf(ofp, ", ");
473 if (++count % 5 == 0)
474 fprintf(ofp, "\n\t");
475
476 fprintf(ofp, "%s", f->name);
477 }
478 fprintf(ofp, "):\n");
479
480 fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
481 "common_secs, common_nsecs,\n\t\t\t"
482 "common_pid, common_comm)\n\n");
483
484 fprintf(ofp, "\t\tprint \"");
485
486 not_first = 0;
487 count = 0;
488
489 for (f = event->format.fields; f; f = f->next) {
490 if (not_first++)
491 fprintf(ofp, ", ");
492 if (count && count % 3 == 0) {
493 fprintf(ofp, "\" \\\n\t\t\"");
494 }
495 count++;
496
497 fprintf(ofp, "%s=", f->name);
498 if (f->flags & FIELD_IS_STRING ||
499 f->flags & FIELD_IS_FLAG ||
500 f->flags & FIELD_IS_SYMBOLIC)
501 fprintf(ofp, "%%s");
502 else if (f->flags & FIELD_IS_SIGNED)
503 fprintf(ofp, "%%d");
504 else
505 fprintf(ofp, "%%u");
506 }
507
508 fprintf(ofp, "\\n\" %% \\\n\t\t(");
509
510 not_first = 0;
511 count = 0;
512
513 for (f = event->format.fields; f; f = f->next) {
514 if (not_first++)
515 fprintf(ofp, ", ");
516
517 if (++count % 5 == 0)
518 fprintf(ofp, "\n\t\t");
519
520 if (f->flags & FIELD_IS_FLAG) {
521 if ((count - 1) % 5 != 0) {
522 fprintf(ofp, "\n\t\t");
523 count = 4;
524 }
525 fprintf(ofp, "flag_str(\"");
526 fprintf(ofp, "%s__%s\", ", event->system,
527 event->name);
528 fprintf(ofp, "\"%s\", %s)", f->name,
529 f->name);
530 } else if (f->flags & FIELD_IS_SYMBOLIC) {
531 if ((count - 1) % 5 != 0) {
532 fprintf(ofp, "\n\t\t");
533 count = 4;
534 }
535 fprintf(ofp, "symbol_str(\"");
536 fprintf(ofp, "%s__%s\", ", event->system,
537 event->name);
538 fprintf(ofp, "\"%s\", %s)", f->name,
539 f->name);
540 } else
541 fprintf(ofp, "%s", f->name);
542 }
543
544 fprintf(ofp, "),\n\n");
545 }
546
547 fprintf(ofp, "def trace_unhandled(event_name, context, "
548 "common_cpu, common_secs, common_nsecs,\n\t\t"
549 "common_pid, common_comm):\n");
550
551 fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
552 "common_secs, common_nsecs,\n\t\tcommon_pid, "
553 "common_comm)\n\n");
554
555 fprintf(ofp, "def print_header("
556 "event_name, cpu, secs, nsecs, pid, comm):\n"
557 "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
558 "(event_name, cpu, secs, nsecs, pid, comm),\n");
559
560 fclose(ofp);
561
562 fprintf(stderr, "generated Python script: %s\n", fname);
563
564 return 0;
565}
566
567struct scripting_ops python_scripting_ops = {
568 .name = "Python",
569 .start_script = python_start_script,
570 .stop_script = python_stop_script,
571 .process_event = python_process_event,
572 .generate_script = python_generate_script,
573};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce3a6c8abe76..0de7258e70a5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,8 @@
1#define _FILE_OFFSET_BITS 64
2
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2 4
5#include <byteswap.h>
3#include <unistd.h> 6#include <unistd.h>
4#include <sys/types.h> 7#include <sys/types.h>
5 8
@@ -49,6 +52,11 @@ out_close:
49 return -1; 52 return -1;
50} 53}
51 54
55static inline int perf_session__create_kernel_maps(struct perf_session *self)
56{
57 return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps);
58}
59
52struct perf_session *perf_session__new(const char *filename, int mode, bool force) 60struct perf_session *perf_session__new(const char *filename, int mode, bool force)
53{ 61{
54 size_t len = filename ? strlen(filename) + 1 : 0; 62 size_t len = filename ? strlen(filename) + 1 : 0;
@@ -66,13 +74,22 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
66 self->mmap_window = 32; 74 self->mmap_window = 32;
67 self->cwd = NULL; 75 self->cwd = NULL;
68 self->cwdlen = 0; 76 self->cwdlen = 0;
77 self->unknown_events = 0;
69 map_groups__init(&self->kmaps); 78 map_groups__init(&self->kmaps);
70 79
71 if (perf_session__create_kernel_maps(self) < 0) 80 if (mode == O_RDONLY) {
72 goto out_delete; 81 if (perf_session__open(self, force) < 0)
82 goto out_delete;
83 } else if (mode == O_WRONLY) {
84 /*
85 * In O_RDONLY mode this will be performed when reading the
86 * kernel MMAP event, in event__process_mmap().
87 */
88 if (perf_session__create_kernel_maps(self) < 0)
89 goto out_delete;
90 }
73 91
74 if (mode == O_RDONLY && perf_session__open(self, force) < 0) 92 self->sample_type = perf_header__sample_type(&self->header);
75 goto out_delete;
76out: 93out:
77 return self; 94 return self;
78out_free: 95out_free:
@@ -148,3 +165,409 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
148 165
149 return syms; 166 return syms;
150} 167}
168
169static int process_event_stub(event_t *event __used,
170 struct perf_session *session __used)
171{
172 dump_printf(": unhandled!\n");
173 return 0;
174}
175
176static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
177{
178 if (handler->sample == NULL)
179 handler->sample = process_event_stub;
180 if (handler->mmap == NULL)
181 handler->mmap = process_event_stub;
182 if (handler->comm == NULL)
183 handler->comm = process_event_stub;
184 if (handler->fork == NULL)
185 handler->fork = process_event_stub;
186 if (handler->exit == NULL)
187 handler->exit = process_event_stub;
188 if (handler->lost == NULL)
189 handler->lost = process_event_stub;
190 if (handler->read == NULL)
191 handler->read = process_event_stub;
192 if (handler->throttle == NULL)
193 handler->throttle = process_event_stub;
194 if (handler->unthrottle == NULL)
195 handler->unthrottle = process_event_stub;
196}
197
198static const char *event__name[] = {
199 [0] = "TOTAL",
200 [PERF_RECORD_MMAP] = "MMAP",
201 [PERF_RECORD_LOST] = "LOST",
202 [PERF_RECORD_COMM] = "COMM",
203 [PERF_RECORD_EXIT] = "EXIT",
204 [PERF_RECORD_THROTTLE] = "THROTTLE",
205 [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
206 [PERF_RECORD_FORK] = "FORK",
207 [PERF_RECORD_READ] = "READ",
208 [PERF_RECORD_SAMPLE] = "SAMPLE",
209};
210
211unsigned long event__total[PERF_RECORD_MAX];
212
213void event__print_totals(void)
214{
215 int i;
216 for (i = 0; i < PERF_RECORD_MAX; ++i)
217 pr_info("%10s events: %10ld\n",
218 event__name[i], event__total[i]);
219}
220
221void mem_bswap_64(void *src, int byte_size)
222{
223 u64 *m = src;
224
225 while (byte_size > 0) {
226 *m = bswap_64(*m);
227 byte_size -= sizeof(u64);
228 ++m;
229 }
230}
231
232static void event__all64_swap(event_t *self)
233{
234 struct perf_event_header *hdr = &self->header;
235 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
236}
237
238static void event__comm_swap(event_t *self)
239{
240 self->comm.pid = bswap_32(self->comm.pid);
241 self->comm.tid = bswap_32(self->comm.tid);
242}
243
244static void event__mmap_swap(event_t *self)
245{
246 self->mmap.pid = bswap_32(self->mmap.pid);
247 self->mmap.tid = bswap_32(self->mmap.tid);
248 self->mmap.start = bswap_64(self->mmap.start);
249 self->mmap.len = bswap_64(self->mmap.len);
250 self->mmap.pgoff = bswap_64(self->mmap.pgoff);
251}
252
253static void event__task_swap(event_t *self)
254{
255 self->fork.pid = bswap_32(self->fork.pid);
256 self->fork.tid = bswap_32(self->fork.tid);
257 self->fork.ppid = bswap_32(self->fork.ppid);
258 self->fork.ptid = bswap_32(self->fork.ptid);
259 self->fork.time = bswap_64(self->fork.time);
260}
261
262static void event__read_swap(event_t *self)
263{
264 self->read.pid = bswap_32(self->read.pid);
265 self->read.tid = bswap_32(self->read.tid);
266 self->read.value = bswap_64(self->read.value);
267 self->read.time_enabled = bswap_64(self->read.time_enabled);
268 self->read.time_running = bswap_64(self->read.time_running);
269 self->read.id = bswap_64(self->read.id);
270}
271
272typedef void (*event__swap_op)(event_t *self);
273
274static event__swap_op event__swap_ops[] = {
275 [PERF_RECORD_MMAP] = event__mmap_swap,
276 [PERF_RECORD_COMM] = event__comm_swap,
277 [PERF_RECORD_FORK] = event__task_swap,
278 [PERF_RECORD_EXIT] = event__task_swap,
279 [PERF_RECORD_LOST] = event__all64_swap,
280 [PERF_RECORD_READ] = event__read_swap,
281 [PERF_RECORD_SAMPLE] = event__all64_swap,
282 [PERF_RECORD_MAX] = NULL,
283};
284
285static int perf_session__process_event(struct perf_session *self,
286 event_t *event,
287 struct perf_event_ops *ops,
288 u64 offset, u64 head)
289{
290 trace_event(event);
291
292 if (event->header.type < PERF_RECORD_MAX) {
293 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
294 offset + head, event->header.size,
295 event__name[event->header.type]);
296 ++event__total[0];
297 ++event__total[event->header.type];
298 }
299
300 if (self->header.needs_swap && event__swap_ops[event->header.type])
301 event__swap_ops[event->header.type](event);
302
303 switch (event->header.type) {
304 case PERF_RECORD_SAMPLE:
305 return ops->sample(event, self);
306 case PERF_RECORD_MMAP:
307 return ops->mmap(event, self);
308 case PERF_RECORD_COMM:
309 return ops->comm(event, self);
310 case PERF_RECORD_FORK:
311 return ops->fork(event, self);
312 case PERF_RECORD_EXIT:
313 return ops->exit(event, self);
314 case PERF_RECORD_LOST:
315 return ops->lost(event, self);
316 case PERF_RECORD_READ:
317 return ops->read(event, self);
318 case PERF_RECORD_THROTTLE:
319 return ops->throttle(event, self);
320 case PERF_RECORD_UNTHROTTLE:
321 return ops->unthrottle(event, self);
322 default:
323 self->unknown_events++;
324 return -1;
325 }
326}
327
328void perf_event_header__bswap(struct perf_event_header *self)
329{
330 self->type = bswap_32(self->type);
331 self->misc = bswap_16(self->misc);
332 self->size = bswap_16(self->size);
333}
334
335int perf_header__read_build_ids(struct perf_header *self,
336 int input, u64 offset, u64 size)
337{
338 struct build_id_event bev;
339 char filename[PATH_MAX];
340 u64 limit = offset + size;
341 int err = -1;
342
343 while (offset < limit) {
344 struct dso *dso;
345 ssize_t len;
346 struct list_head *head = &dsos__user;
347
348 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
349 goto out;
350
351 if (self->needs_swap)
352 perf_event_header__bswap(&bev.header);
353
354 len = bev.header.size - sizeof(bev);
355 if (read(input, filename, len) != len)
356 goto out;
357
358 if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
359 head = &dsos__kernel;
360
361 dso = __dsos__findnew(head, filename);
362 if (dso != NULL) {
363 dso__set_build_id(dso, &bev.build_id);
364 if (head == &dsos__kernel && filename[0] == '[')
365 dso->kernel = 1;
366 }
367
368 offset += bev.header.size;
369 }
370 err = 0;
371out:
372 return err;
373}
374
375static struct thread *perf_session__register_idle_thread(struct perf_session *self)
376{
377 struct thread *thread = perf_session__findnew(self, 0);
378
379 if (thread == NULL || thread__set_comm(thread, "swapper")) {
380 pr_err("problem inserting idle task.\n");
381 thread = NULL;
382 }
383
384 return thread;
385}
386
387int __perf_session__process_events(struct perf_session *self,
388 u64 data_offset, u64 data_size,
389 u64 file_size, struct perf_event_ops *ops)
390{
391 int err, mmap_prot, mmap_flags;
392 u64 head, shift;
393 u64 offset = 0;
394 size_t page_size;
395 event_t *event;
396 uint32_t size;
397 char *buf;
398
399 perf_event_ops__fill_defaults(ops);
400
401 page_size = sysconf(_SC_PAGESIZE);
402
403 head = data_offset;
404 shift = page_size * (head / page_size);
405 offset += shift;
406 head -= shift;
407
408 mmap_prot = PROT_READ;
409 mmap_flags = MAP_SHARED;
410
411 if (self->header.needs_swap) {
412 mmap_prot |= PROT_WRITE;
413 mmap_flags = MAP_PRIVATE;
414 }
415remap:
416 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
417 mmap_flags, self->fd, offset);
418 if (buf == MAP_FAILED) {
419 pr_err("failed to mmap file\n");
420 err = -errno;
421 goto out_err;
422 }
423
424more:
425 event = (event_t *)(buf + head);
426
427 if (self->header.needs_swap)
428 perf_event_header__bswap(&event->header);
429 size = event->header.size;
430 if (size == 0)
431 size = 8;
432
433 if (head + event->header.size >= page_size * self->mmap_window) {
434 int munmap_ret;
435
436 shift = page_size * (head / page_size);
437
438 munmap_ret = munmap(buf, page_size * self->mmap_window);
439 assert(munmap_ret == 0);
440
441 offset += shift;
442 head -= shift;
443 goto remap;
444 }
445
446 size = event->header.size;
447
448 dump_printf("\n%#Lx [%#x]: event: %d\n",
449 offset + head, event->header.size, event->header.type);
450
451 if (size == 0 ||
452 perf_session__process_event(self, event, ops, offset, head) < 0) {
453 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
454 offset + head, event->header.size,
455 event->header.type);
456 /*
457 * assume we lost track of the stream, check alignment, and
458 * increment a single u64 in the hope to catch on again 'soon'.
459 */
460 if (unlikely(head & 7))
461 head &= ~7ULL;
462
463 size = 8;
464 }
465
466 head += size;
467
468 if (offset + head >= data_offset + data_size)
469 goto done;
470
471 if (offset + head < file_size)
472 goto more;
473done:
474 err = 0;
475out_err:
476 return err;
477}
478
479int perf_session__process_events(struct perf_session *self,
480 struct perf_event_ops *ops)
481{
482 int err;
483
484 if (perf_session__register_idle_thread(self) == NULL)
485 return -ENOMEM;
486
487 if (!symbol_conf.full_paths) {
488 char bf[PATH_MAX];
489
490 if (getcwd(bf, sizeof(bf)) == NULL) {
491 err = -errno;
492out_getcwd_err:
493 pr_err("failed to get the current directory\n");
494 goto out_err;
495 }
496 self->cwd = strdup(bf);
497 if (self->cwd == NULL) {
498 err = -ENOMEM;
499 goto out_getcwd_err;
500 }
501 self->cwdlen = strlen(self->cwd);
502 }
503
504 err = __perf_session__process_events(self, self->header.data_offset,
505 self->header.data_size,
506 self->size, ops);
507out_err:
508 return err;
509}
510
511bool perf_session__has_traces(struct perf_session *self, const char *msg)
512{
513 if (!(self->sample_type & PERF_SAMPLE_RAW)) {
514 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
515 return false;
516 }
517
518 return true;
519}
520
521int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
522 const char *symbol_name,
523 u64 addr)
524{
525 char *bracket;
526 enum map_type i;
527
528 self->ref_reloc_sym.name = strdup(symbol_name);
529 if (self->ref_reloc_sym.name == NULL)
530 return -ENOMEM;
531
532 bracket = strchr(self->ref_reloc_sym.name, ']');
533 if (bracket)
534 *bracket = '\0';
535
536 self->ref_reloc_sym.addr = addr;
537
538 for (i = 0; i < MAP__NR_TYPES; ++i) {
539 struct kmap *kmap = map__kmap(self->vmlinux_maps[i]);
540 kmap->ref_reloc_sym = &self->ref_reloc_sym;
541 }
542
543 return 0;
544}
545
546static u64 map__reloc_map_ip(struct map *map, u64 ip)
547{
548 return ip + (s64)map->pgoff;
549}
550
551static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
552{
553 return ip - (s64)map->pgoff;
554}
555
556void map__reloc_vmlinux(struct map *self)
557{
558 struct kmap *kmap = map__kmap(self);
559 s64 reloc;
560
561 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
562 return;
563
564 reloc = (kmap->ref_reloc_sym->unrelocated_addr -
565 kmap->ref_reloc_sym->addr);
566
567 if (!reloc)
568 return;
569
570 self->map_ip = map__reloc_map_ip;
571 self->unmap_ip = map__reloc_unmap_ip;
572 self->pgoff = reloc;
573}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 32eaa1bada06..31950fcd8a4d 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -3,13 +3,13 @@
3 3
4#include "event.h" 4#include "event.h"
5#include "header.h" 5#include "header.h"
6#include "symbol.h"
6#include "thread.h" 7#include "thread.h"
7#include <linux/rbtree.h> 8#include <linux/rbtree.h>
8#include "../../../include/linux/perf_event.h" 9#include "../../../include/linux/perf_event.h"
9 10
10struct ip_callchain; 11struct ip_callchain;
11struct thread; 12struct thread;
12struct symbol;
13 13
14struct perf_session { 14struct perf_session {
15 struct perf_header header; 15 struct perf_header header;
@@ -18,10 +18,13 @@ struct perf_session {
18 struct map_groups kmaps; 18 struct map_groups kmaps;
19 struct rb_root threads; 19 struct rb_root threads;
20 struct thread *last_match; 20 struct thread *last_match;
21 struct map *vmlinux_maps[MAP__NR_TYPES];
21 struct events_stats events_stats; 22 struct events_stats events_stats;
22 unsigned long event_total[PERF_RECORD_MAX]; 23 unsigned long event_total[PERF_RECORD_MAX];
24 unsigned long unknown_events;
23 struct rb_root hists; 25 struct rb_root hists;
24 u64 sample_type; 26 u64 sample_type;
27 struct ref_reloc_sym ref_reloc_sym;
25 int fd; 28 int fd;
26 int cwdlen; 29 int cwdlen;
27 char *cwd; 30 char *cwd;
@@ -31,23 +34,25 @@ struct perf_session {
31typedef int (*event_op)(event_t *self, struct perf_session *session); 34typedef int (*event_op)(event_t *self, struct perf_session *session);
32 35
33struct perf_event_ops { 36struct perf_event_ops {
34 event_op process_sample_event; 37 event_op sample,
35 event_op process_mmap_event; 38 mmap,
36 event_op process_comm_event; 39 comm,
37 event_op process_fork_event; 40 fork,
38 event_op process_exit_event; 41 exit,
39 event_op process_lost_event; 42 lost,
40 event_op process_read_event; 43 read,
41 event_op process_throttle_event; 44 throttle,
42 event_op process_unthrottle_event; 45 unthrottle;
43 int (*sample_type_check)(struct perf_session *session);
44 unsigned long total_unknown;
45 bool full_paths;
46}; 46};
47 47
48struct perf_session *perf_session__new(const char *filename, int mode, bool force); 48struct perf_session *perf_session__new(const char *filename, int mode, bool force);
49void perf_session__delete(struct perf_session *self); 49void perf_session__delete(struct perf_session *self);
50 50
51void perf_event_header__bswap(struct perf_event_header *self);
52
53int __perf_session__process_events(struct perf_session *self,
54 u64 data_offset, u64 data_size, u64 size,
55 struct perf_event_ops *ops);
51int perf_session__process_events(struct perf_session *self, 56int perf_session__process_events(struct perf_session *self,
52 struct perf_event_ops *event_ops); 57 struct perf_event_ops *event_ops);
53 58
@@ -56,6 +61,28 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
56 struct ip_callchain *chain, 61 struct ip_callchain *chain,
57 struct symbol **parent); 62 struct symbol **parent);
58 63
59int perf_header__read_build_ids(int input, u64 offset, u64 file_size); 64bool perf_session__has_traces(struct perf_session *self, const char *msg);
65
66int perf_header__read_build_ids(struct perf_header *self, int input,
67 u64 offset, u64 file_size);
68
69int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
70 const char *symbol_name,
71 u64 addr);
72
73void mem_bswap_64(void *src, int byte_size);
74
75static inline int __perf_session__create_kernel_maps(struct perf_session *self,
76 struct dso *kernel)
77{
78 return __map_groups__create_kernel_maps(&self->kmaps,
79 self->vmlinux_maps, kernel);
80}
60 81
82static inline struct map *
83 perf_session__new_module_map(struct perf_session *self,
84 u64 start, const char *filename)
85{
86 return map_groups__new_module(&self->kmaps, start, filename);
87}
61#endif /* __PERF_SESSION_H */ 88#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 5352d7dccc61..c397d4f6f748 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -227,16 +227,73 @@ fail:
227 return NULL; 227 return NULL;
228} 228}
229 229
230/* Glob expression pattern matching */ 230/* Character class matching */
231static bool __match_charclass(const char *pat, char c, const char **npat)
232{
233 bool complement = false, ret = true;
234
235 if (*pat == '!') {
236 complement = true;
237 pat++;
238 }
239 if (*pat++ == c) /* First character is special */
240 goto end;
241
242 while (*pat && *pat != ']') { /* Matching */
243 if (*pat == '-' && *(pat + 1) != ']') { /* Range */
244 if (*(pat - 1) <= c && c <= *(pat + 1))
245 goto end;
246 if (*(pat - 1) > *(pat + 1))
247 goto error;
248 pat += 2;
249 } else if (*pat++ == c)
250 goto end;
251 }
252 if (!*pat)
253 goto error;
254 ret = false;
255
256end:
257 while (*pat && *pat != ']') /* Searching closing */
258 pat++;
259 if (!*pat)
260 goto error;
261 *npat = pat + 1;
262 return complement ? !ret : ret;
263
264error:
265 return false;
266}
267
268/**
269 * strglobmatch - glob expression pattern matching
270 * @str: the target string to match
271 * @pat: the pattern string to match
272 *
273 * This returns true if the @str matches @pat. @pat can includes wildcards
274 * ('*','?') and character classes ([CHARS], complementation and ranges are
275 * also supported). Also, this supports escape character ('\') to use special
276 * characters as normal character.
277 *
278 * Note: if @pat syntax is broken, this always returns false.
279 */
231bool strglobmatch(const char *str, const char *pat) 280bool strglobmatch(const char *str, const char *pat)
232{ 281{
233 while (*str && *pat && *pat != '*') { 282 while (*str && *pat && *pat != '*') {
234 if (*pat == '?') { 283 if (*pat == '?') { /* Matches any single character */
235 str++; 284 str++;
236 pat++; 285 pat++;
237 } else 286 continue;
238 if (*str++ != *pat++) 287 } else if (*pat == '[') /* Character classes/Ranges */
288 if (__match_charclass(pat + 1, *str, &pat)) {
289 str++;
290 continue;
291 } else
239 return false; 292 return false;
293 else if (*pat == '\\') /* Escaped char match as normal char */
294 pat++;
295 if (*str++ != *pat++)
296 return false;
240 } 297 }
241 /* Check wild card */ 298 /* Check wild card */
242 if (*pat == '*') { 299 if (*pat == '*') {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab92763edb03..323c0aea0a91 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,6 +1,5 @@
1#include "util.h" 1#include "util.h"
2#include "../perf.h" 2#include "../perf.h"
3#include "session.h"
4#include "sort.h" 3#include "sort.h"
5#include "string.h" 4#include "string.h"
6#include "symbol.h" 5#include "symbol.h"
@@ -22,6 +21,7 @@
22enum dso_origin { 21enum dso_origin {
23 DSO__ORIG_KERNEL = 0, 22 DSO__ORIG_KERNEL = 0,
24 DSO__ORIG_JAVA_JIT, 23 DSO__ORIG_JAVA_JIT,
24 DSO__ORIG_BUILD_ID_CACHE,
25 DSO__ORIG_FEDORA, 25 DSO__ORIG_FEDORA,
26 DSO__ORIG_UBUNTU, 26 DSO__ORIG_UBUNTU,
27 DSO__ORIG_BUILDID, 27 DSO__ORIG_BUILDID,
@@ -33,7 +33,7 @@ enum dso_origin {
33static void dsos__add(struct list_head *head, struct dso *dso); 33static void dsos__add(struct list_head *head, struct dso *dso);
34static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 34static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
35static int dso__load_kernel_sym(struct dso *self, struct map *map, 35static int dso__load_kernel_sym(struct dso *self, struct map *map,
36 struct perf_session *session, symbol_filter_t filter); 36 symbol_filter_t filter);
37static int vmlinux_path__nr_entries; 37static int vmlinux_path__nr_entries;
38static char **vmlinux_path; 38static char **vmlinux_path;
39 39
@@ -53,17 +53,12 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type)
53 return self->sorted_by_name & (1 << type); 53 return self->sorted_by_name & (1 << type);
54} 54}
55 55
56static void dso__set_loaded(struct dso *self, enum map_type type)
57{
58 self->loaded |= (1 << type);
59}
60
61static void dso__set_sorted_by_name(struct dso *self, enum map_type type) 56static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
62{ 57{
63 self->sorted_by_name |= (1 << type); 58 self->sorted_by_name |= (1 << type);
64} 59}
65 60
66static bool symbol_type__is_a(char symbol_type, enum map_type map_type) 61bool symbol_type__is_a(char symbol_type, enum map_type map_type)
67{ 62{
68 switch (map_type) { 63 switch (map_type) {
69 case MAP__FUNCTION: 64 case MAP__FUNCTION:
@@ -142,14 +137,14 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name)
142 self->start = start; 137 self->start = start;
143 self->end = len ? start + len - 1 : start; 138 self->end = len ? start + len - 1 : start;
144 139
145 pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); 140 pr_debug4("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
146 141
147 memcpy(self->name, name, namelen); 142 memcpy(self->name, name, namelen);
148 143
149 return self; 144 return self;
150} 145}
151 146
152static void symbol__delete(struct symbol *self) 147void symbol__delete(struct symbol *self)
153{ 148{
154 free(((void *)self) - symbol_conf.priv_size); 149 free(((void *)self) - symbol_conf.priv_size);
155} 150}
@@ -160,7 +155,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
160 self->start, self->end, self->name); 155 self->start, self->end, self->name);
161} 156}
162 157
163static void dso__set_long_name(struct dso *self, char *name) 158void dso__set_long_name(struct dso *self, char *name)
164{ 159{
165 if (name == NULL) 160 if (name == NULL)
166 return; 161 return;
@@ -175,7 +170,7 @@ static void dso__set_basename(struct dso *self)
175 170
176struct dso *dso__new(const char *name) 171struct dso *dso__new(const char *name)
177{ 172{
178 struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); 173 struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1);
179 174
180 if (self != NULL) { 175 if (self != NULL) {
181 int i; 176 int i;
@@ -344,10 +339,10 @@ void dso__sort_by_name(struct dso *self, enum map_type type)
344 &self->symbols[type]); 339 &self->symbols[type]);
345} 340}
346 341
347int build_id__sprintf(u8 *self, int len, char *bf) 342int build_id__sprintf(const u8 *self, int len, char *bf)
348{ 343{
349 char *bid = bf; 344 char *bid = bf;
350 u8 *raw = self; 345 const u8 *raw = self;
351 int i; 346 int i;
352 347
353 for (i = 0; i < len; ++i) { 348 for (i = 0; i < len; ++i) {
@@ -372,6 +367,10 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
372 struct rb_node *nd; 367 struct rb_node *nd;
373 size_t ret = fprintf(fp, "dso: %s (", self->short_name); 368 size_t ret = fprintf(fp, "dso: %s (", self->short_name);
374 369
370 if (self->short_name != self->long_name)
371 ret += fprintf(fp, "%s, ", self->long_name);
372 ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
373 self->loaded ? "" : "NOT ");
375 ret += dso__fprintf_buildid(self, fp); 374 ret += dso__fprintf_buildid(self, fp);
376 ret += fprintf(fp, ")\n"); 375 ret += fprintf(fp, ")\n");
377 for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) { 376 for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
@@ -382,24 +381,20 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
382 return ret; 381 return ret;
383} 382}
384 383
385/* 384int kallsyms__parse(const char *filename, void *arg,
386 * Loads the function entries in /proc/kallsyms into kernel_map->dso, 385 int (*process_symbol)(void *arg, const char *name,
387 * so that we can in the next step set the symbol ->end address and then 386 char type, u64 start))
388 * call kernel_maps__split_kallsyms.
389 */
390static int dso__load_all_kallsyms(struct dso *self, struct map *map)
391{ 387{
392 char *line = NULL; 388 char *line = NULL;
393 size_t n; 389 size_t n;
394 struct rb_root *root = &self->symbols[map->type]; 390 int err = 0;
395 FILE *file = fopen("/proc/kallsyms", "r"); 391 FILE *file = fopen(filename, "r");
396 392
397 if (file == NULL) 393 if (file == NULL)
398 goto out_failure; 394 goto out_failure;
399 395
400 while (!feof(file)) { 396 while (!feof(file)) {
401 u64 start; 397 u64 start;
402 struct symbol *sym;
403 int line_len, len; 398 int line_len, len;
404 char symbol_type; 399 char symbol_type;
405 char *symbol_name; 400 char *symbol_name;
@@ -420,43 +415,72 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map)
420 continue; 415 continue;
421 416
422 symbol_type = toupper(line[len]); 417 symbol_type = toupper(line[len]);
423 if (!symbol_type__is_a(symbol_type, map->type))
424 continue;
425
426 symbol_name = line + len + 2; 418 symbol_name = line + len + 2;
427 /*
428 * Will fix up the end later, when we have all symbols sorted.
429 */
430 sym = symbol__new(start, 0, symbol_name);
431 419
432 if (sym == NULL) 420 err = process_symbol(arg, symbol_name, symbol_type, start);
433 goto out_delete_line; 421 if (err)
434 /* 422 break;
435 * We will pass the symbols to the filter later, in
436 * map__split_kallsyms, when we have split the maps per module
437 */
438 symbols__insert(root, sym);
439 } 423 }
440 424
441 free(line); 425 free(line);
442 fclose(file); 426 fclose(file);
427 return err;
443 428
444 return 0;
445
446out_delete_line:
447 free(line);
448out_failure: 429out_failure:
449 return -1; 430 return -1;
450} 431}
451 432
433struct process_kallsyms_args {
434 struct map *map;
435 struct dso *dso;
436};
437
438static int map__process_kallsym_symbol(void *arg, const char *name,
439 char type, u64 start)
440{
441 struct symbol *sym;
442 struct process_kallsyms_args *a = arg;
443 struct rb_root *root = &a->dso->symbols[a->map->type];
444
445 if (!symbol_type__is_a(type, a->map->type))
446 return 0;
447
448 /*
449 * Will fix up the end later, when we have all symbols sorted.
450 */
451 sym = symbol__new(start, 0, name);
452
453 if (sym == NULL)
454 return -ENOMEM;
455 /*
456 * We will pass the symbols to the filter later, in
457 * map__split_kallsyms, when we have split the maps per module
458 */
459 symbols__insert(root, sym);
460 return 0;
461}
462
463/*
464 * Loads the function entries in /proc/kallsyms into kernel_map->dso,
465 * so that we can in the next step set the symbol ->end address and then
466 * call kernel_maps__split_kallsyms.
467 */
468static int dso__load_all_kallsyms(struct dso *self, const char *filename,
469 struct map *map)
470{
471 struct process_kallsyms_args args = { .map = map, .dso = self, };
472 return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
473}
474
452/* 475/*
453 * Split the symbols into maps, making sure there are no overlaps, i.e. the 476 * Split the symbols into maps, making sure there are no overlaps, i.e. the
454 * kernel range is broken in several maps, named [kernel].N, as we don't have 477 * kernel range is broken in several maps, named [kernel].N, as we don't have
455 * the original ELF section names vmlinux have. 478 * the original ELF section names vmlinux have.
456 */ 479 */
457static int dso__split_kallsyms(struct dso *self, struct map *map, 480static int dso__split_kallsyms(struct dso *self, struct map *map,
458 struct perf_session *session, symbol_filter_t filter) 481 symbol_filter_t filter)
459{ 482{
483 struct map_groups *kmaps = map__kmap(map)->kmaps;
460 struct map *curr_map = map; 484 struct map *curr_map = map;
461 struct symbol *pos; 485 struct symbol *pos;
462 int count = 0; 486 int count = 0;
@@ -477,13 +501,17 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
477 501
478 *module++ = '\0'; 502 *module++ = '\0';
479 503
480 if (strcmp(self->name, module)) { 504 if (strcmp(curr_map->dso->short_name, module)) {
481 curr_map = map_groups__find_by_name(&session->kmaps, map->type, module); 505 curr_map = map_groups__find_by_name(kmaps, map->type, module);
482 if (curr_map == NULL) { 506 if (curr_map == NULL) {
483 pr_debug("/proc/{kallsyms,modules} " 507 pr_debug("/proc/{kallsyms,modules} "
484 "inconsistency!\n"); 508 "inconsistency while looking "
509 "for \"%s\" module!\n", module);
485 return -1; 510 return -1;
486 } 511 }
512
513 if (curr_map->dso->loaded)
514 goto discard_symbol;
487 } 515 }
488 /* 516 /*
489 * So that we look just like we get from .ko files, 517 * So that we look just like we get from .ko files,
@@ -503,13 +531,13 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
503 return -1; 531 return -1;
504 532
505 curr_map = map__new2(pos->start, dso, map->type); 533 curr_map = map__new2(pos->start, dso, map->type);
506 if (map == NULL) { 534 if (curr_map == NULL) {
507 dso__delete(dso); 535 dso__delete(dso);
508 return -1; 536 return -1;
509 } 537 }
510 538
511 curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; 539 curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
512 map_groups__insert(&session->kmaps, curr_map); 540 map_groups__insert(kmaps, curr_map);
513 ++kernel_range; 541 ++kernel_range;
514 } 542 }
515 543
@@ -528,17 +556,16 @@ discard_symbol: rb_erase(&pos->rb_node, root);
528 return count; 556 return count;
529} 557}
530 558
531 559int dso__load_kallsyms(struct dso *self, const char *filename,
532static int dso__load_kallsyms(struct dso *self, struct map *map, 560 struct map *map, symbol_filter_t filter)
533 struct perf_session *session, symbol_filter_t filter)
534{ 561{
535 if (dso__load_all_kallsyms(self, map) < 0) 562 if (dso__load_all_kallsyms(self, filename, map) < 0)
536 return -1; 563 return -1;
537 564
538 symbols__fixup_end(&self->symbols[map->type]); 565 symbols__fixup_end(&self->symbols[map->type]);
539 self->origin = DSO__ORIG_KERNEL; 566 self->origin = DSO__ORIG_KERNEL;
540 567
541 return dso__split_kallsyms(self, map, session, filter); 568 return dso__split_kallsyms(self, map, filter);
542} 569}
543 570
544static int dso__load_perf_map(struct dso *self, struct map *map, 571static int dso__load_perf_map(struct dso *self, struct map *map,
@@ -864,10 +891,10 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type
864 } 891 }
865} 892}
866 893
867static int dso__load_sym(struct dso *self, struct map *map, 894static int dso__load_sym(struct dso *self, struct map *map, const char *name,
868 struct perf_session *session, const char *name, int fd, 895 int fd, symbol_filter_t filter, int kmodule)
869 symbol_filter_t filter, int kernel, int kmodule)
870{ 896{
897 struct kmap *kmap = self->kernel ? map__kmap(map) : NULL;
871 struct map *curr_map = map; 898 struct map *curr_map = map;
872 struct dso *curr_dso = self; 899 struct dso *curr_dso = self;
873 size_t dso_name_len = strlen(self->short_name); 900 size_t dso_name_len = strlen(self->short_name);
@@ -924,7 +951,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
924 nr_syms = shdr.sh_size / shdr.sh_entsize; 951 nr_syms = shdr.sh_size / shdr.sh_entsize;
925 952
926 memset(&sym, 0, sizeof(sym)); 953 memset(&sym, 0, sizeof(sym));
927 if (!kernel) { 954 if (!self->kernel) {
928 self->adjust_symbols = (ehdr.e_type == ET_EXEC || 955 self->adjust_symbols = (ehdr.e_type == ET_EXEC ||
929 elf_section_by_name(elf, &ehdr, &shdr, 956 elf_section_by_name(elf, &ehdr, &shdr,
930 ".gnu.prelink_undo", 957 ".gnu.prelink_undo",
@@ -933,11 +960,15 @@ static int dso__load_sym(struct dso *self, struct map *map,
933 960
934 elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { 961 elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
935 struct symbol *f; 962 struct symbol *f;
936 const char *elf_name; 963 const char *elf_name = elf_sym__name(&sym, symstrs);
937 char *demangled = NULL; 964 char *demangled = NULL;
938 int is_label = elf_sym__is_label(&sym); 965 int is_label = elf_sym__is_label(&sym);
939 const char *section_name; 966 const char *section_name;
940 967
968 if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
969 strcmp(elf_name, kmap->ref_reloc_sym->name) == 0)
970 kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
971
941 if (!is_label && !elf_sym__is_a(&sym, map->type)) 972 if (!is_label && !elf_sym__is_a(&sym, map->type))
942 continue; 973 continue;
943 974
@@ -950,10 +981,9 @@ static int dso__load_sym(struct dso *self, struct map *map,
950 if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) 981 if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
951 continue; 982 continue;
952 983
953 elf_name = elf_sym__name(&sym, symstrs);
954 section_name = elf_sec__name(&shdr, secstrs); 984 section_name = elf_sec__name(&shdr, secstrs);
955 985
956 if (kernel || kmodule) { 986 if (self->kernel || kmodule) {
957 char dso_name[PATH_MAX]; 987 char dso_name[PATH_MAX];
958 988
959 if (strcmp(section_name, 989 if (strcmp(section_name,
@@ -969,7 +999,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
969 snprintf(dso_name, sizeof(dso_name), 999 snprintf(dso_name, sizeof(dso_name),
970 "%s%s", self->short_name, section_name); 1000 "%s%s", self->short_name, section_name);
971 1001
972 curr_map = map_groups__find_by_name(&session->kmaps, map->type, dso_name); 1002 curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name);
973 if (curr_map == NULL) { 1003 if (curr_map == NULL) {
974 u64 start = sym.st_value; 1004 u64 start = sym.st_value;
975 1005
@@ -980,7 +1010,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
980 if (curr_dso == NULL) 1010 if (curr_dso == NULL)
981 goto out_elf_end; 1011 goto out_elf_end;
982 curr_map = map__new2(start, curr_dso, 1012 curr_map = map__new2(start, curr_dso,
983 MAP__FUNCTION); 1013 map->type);
984 if (curr_map == NULL) { 1014 if (curr_map == NULL) {
985 dso__delete(curr_dso); 1015 dso__delete(curr_dso);
986 goto out_elf_end; 1016 goto out_elf_end;
@@ -988,8 +1018,9 @@ static int dso__load_sym(struct dso *self, struct map *map,
988 curr_map->map_ip = identity__map_ip; 1018 curr_map->map_ip = identity__map_ip;
989 curr_map->unmap_ip = identity__map_ip; 1019 curr_map->unmap_ip = identity__map_ip;
990 curr_dso->origin = DSO__ORIG_KERNEL; 1020 curr_dso->origin = DSO__ORIG_KERNEL;
991 map_groups__insert(&session->kmaps, curr_map); 1021 map_groups__insert(kmap->kmaps, curr_map);
992 dsos__add(&dsos__kernel, curr_dso); 1022 dsos__add(&dsos__kernel, curr_dso);
1023 dso__set_loaded(curr_dso, map->type);
993 } else 1024 } else
994 curr_dso = curr_map->dso; 1025 curr_dso = curr_map->dso;
995 1026
@@ -997,9 +1028,10 @@ static int dso__load_sym(struct dso *self, struct map *map,
997 } 1028 }
998 1029
999 if (curr_dso->adjust_symbols) { 1030 if (curr_dso->adjust_symbols) {
1000 pr_debug2("adjusting symbol: st_value: %Lx sh_addr: " 1031 pr_debug4("%s: adjusting symbol: st_value: %#Lx "
1001 "%Lx sh_offset: %Lx\n", (u64)sym.st_value, 1032 "sh_addr: %#Lx sh_offset: %#Lx\n", __func__,
1002 (u64)shdr.sh_addr, (u64)shdr.sh_offset); 1033 (u64)sym.st_value, (u64)shdr.sh_addr,
1034 (u64)shdr.sh_offset);
1003 sym.st_value -= shdr.sh_addr - shdr.sh_offset; 1035 sym.st_value -= shdr.sh_addr - shdr.sh_offset;
1004 } 1036 }
1005 /* 1037 /*
@@ -1027,8 +1059,16 @@ new_symbol:
1027 /* 1059 /*
1028 * For misannotated, zeroed, ASM function sizes. 1060 * For misannotated, zeroed, ASM function sizes.
1029 */ 1061 */
1030 if (nr > 0) 1062 if (nr > 0) {
1031 symbols__fixup_end(&self->symbols[map->type]); 1063 symbols__fixup_end(&self->symbols[map->type]);
1064 if (kmap) {
1065 /*
1066 * We need to fixup this here too because we create new
1067 * maps here, for things like vsyscall sections.
1068 */
1069 __map_groups__fixup_end(kmap->kmaps, map->type);
1070 }
1071 }
1032 err = nr; 1072 err = nr;
1033out_elf_end: 1073out_elf_end:
1034 elf_end(elf); 1074 elf_end(elf);
@@ -1041,25 +1081,28 @@ static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
1041 return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; 1081 return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
1042} 1082}
1043 1083
1044static bool __dsos__read_build_ids(struct list_head *head) 1084static bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
1045{ 1085{
1046 bool have_build_id = false; 1086 bool have_build_id = false;
1047 struct dso *pos; 1087 struct dso *pos;
1048 1088
1049 list_for_each_entry(pos, head, node) 1089 list_for_each_entry(pos, head, node) {
1090 if (with_hits && !pos->hit)
1091 continue;
1050 if (filename__read_build_id(pos->long_name, pos->build_id, 1092 if (filename__read_build_id(pos->long_name, pos->build_id,
1051 sizeof(pos->build_id)) > 0) { 1093 sizeof(pos->build_id)) > 0) {
1052 have_build_id = true; 1094 have_build_id = true;
1053 pos->has_build_id = true; 1095 pos->has_build_id = true;
1054 } 1096 }
1097 }
1055 1098
1056 return have_build_id; 1099 return have_build_id;
1057} 1100}
1058 1101
1059bool dsos__read_build_ids(void) 1102bool dsos__read_build_ids(bool with_hits)
1060{ 1103{
1061 bool kbuildids = __dsos__read_build_ids(&dsos__kernel), 1104 bool kbuildids = __dsos__read_build_ids(&dsos__kernel, with_hits),
1062 ubuildids = __dsos__read_build_ids(&dsos__user); 1105 ubuildids = __dsos__read_build_ids(&dsos__user, with_hits);
1063 return kbuildids || ubuildids; 1106 return kbuildids || ubuildids;
1064} 1107}
1065 1108
@@ -1191,6 +1234,7 @@ char dso__symtab_origin(const struct dso *self)
1191 static const char origin[] = { 1234 static const char origin[] = {
1192 [DSO__ORIG_KERNEL] = 'k', 1235 [DSO__ORIG_KERNEL] = 'k',
1193 [DSO__ORIG_JAVA_JIT] = 'j', 1236 [DSO__ORIG_JAVA_JIT] = 'j',
1237 [DSO__ORIG_BUILD_ID_CACHE] = 'B',
1194 [DSO__ORIG_FEDORA] = 'f', 1238 [DSO__ORIG_FEDORA] = 'f',
1195 [DSO__ORIG_UBUNTU] = 'u', 1239 [DSO__ORIG_UBUNTU] = 'u',
1196 [DSO__ORIG_BUILDID] = 'b', 1240 [DSO__ORIG_BUILDID] = 'b',
@@ -1203,19 +1247,19 @@ char dso__symtab_origin(const struct dso *self)
1203 return origin[self->origin]; 1247 return origin[self->origin];
1204} 1248}
1205 1249
1206int dso__load(struct dso *self, struct map *map, struct perf_session *session, 1250int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1207 symbol_filter_t filter)
1208{ 1251{
1209 int size = PATH_MAX; 1252 int size = PATH_MAX;
1210 char *name; 1253 char *name;
1211 u8 build_id[BUILD_ID_SIZE]; 1254 u8 build_id[BUILD_ID_SIZE];
1255 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
1212 int ret = -1; 1256 int ret = -1;
1213 int fd; 1257 int fd;
1214 1258
1215 dso__set_loaded(self, map->type); 1259 dso__set_loaded(self, map->type);
1216 1260
1217 if (self->kernel) 1261 if (self->kernel)
1218 return dso__load_kernel_sym(self, map, session, filter); 1262 return dso__load_kernel_sym(self, map, filter);
1219 1263
1220 name = malloc(size); 1264 name = malloc(size);
1221 if (!name) 1265 if (!name)
@@ -1230,8 +1274,16 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
1230 return ret; 1274 return ret;
1231 } 1275 }
1232 1276
1233 self->origin = DSO__ORIG_FEDORA - 1; 1277 self->origin = DSO__ORIG_BUILD_ID_CACHE;
1234 1278
1279 if (self->has_build_id) {
1280 build_id__sprintf(self->build_id, sizeof(self->build_id),
1281 build_id_hex);
1282 snprintf(name, size, "%s/%s/.build-id/%.2s/%s",
1283 getenv("HOME"), DEBUG_CACHE_DIR,
1284 build_id_hex, build_id_hex + 2);
1285 goto open_file;
1286 }
1235more: 1287more:
1236 do { 1288 do {
1237 self->origin++; 1289 self->origin++;
@@ -1247,8 +1299,6 @@ more:
1247 case DSO__ORIG_BUILDID: 1299 case DSO__ORIG_BUILDID:
1248 if (filename__read_build_id(self->long_name, build_id, 1300 if (filename__read_build_id(self->long_name, build_id,
1249 sizeof(build_id))) { 1301 sizeof(build_id))) {
1250 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
1251
1252 build_id__sprintf(build_id, sizeof(build_id), 1302 build_id__sprintf(build_id, sizeof(build_id),
1253 build_id_hex); 1303 build_id_hex);
1254 snprintf(name, size, 1304 snprintf(name, size,
@@ -1276,11 +1326,11 @@ compare_build_id:
1276 if (!dso__build_id_equal(self, build_id)) 1326 if (!dso__build_id_equal(self, build_id))
1277 goto more; 1327 goto more;
1278 } 1328 }
1279 1329open_file:
1280 fd = open(name, O_RDONLY); 1330 fd = open(name, O_RDONLY);
1281 } while (fd < 0); 1331 } while (fd < 0);
1282 1332
1283 ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0); 1333 ret = dso__load_sym(self, map, name, fd, filter, 0);
1284 close(fd); 1334 close(fd);
1285 1335
1286 /* 1336 /*
@@ -1309,14 +1359,34 @@ struct map *map_groups__find_by_name(struct map_groups *self,
1309 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { 1359 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
1310 struct map *map = rb_entry(nd, struct map, rb_node); 1360 struct map *map = rb_entry(nd, struct map, rb_node);
1311 1361
1312 if (map->dso && strcmp(map->dso->name, name) == 0) 1362 if (map->dso && strcmp(map->dso->short_name, name) == 0)
1313 return map; 1363 return map;
1314 } 1364 }
1315 1365
1316 return NULL; 1366 return NULL;
1317} 1367}
1318 1368
1319static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname) 1369static int dso__kernel_module_get_build_id(struct dso *self)
1370{
1371 char filename[PATH_MAX];
1372 /*
1373 * kernel module short names are of the form "[module]" and
1374 * we need just "module" here.
1375 */
1376 const char *name = self->short_name + 1;
1377
1378 snprintf(filename, sizeof(filename),
1379 "/sys/module/%.*s/notes/.note.gnu.build-id",
1380 (int)strlen(name - 1), name);
1381
1382 if (sysfs__read_build_id(filename, self->build_id,
1383 sizeof(self->build_id)) == 0)
1384 self->has_build_id = true;
1385
1386 return 0;
1387}
1388
1389static int map_groups__set_modules_path_dir(struct map_groups *self, char *dirname)
1320{ 1390{
1321 struct dirent *dent; 1391 struct dirent *dent;
1322 DIR *dir = opendir(dirname); 1392 DIR *dir = opendir(dirname);
@@ -1336,7 +1406,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
1336 1406
1337 snprintf(path, sizeof(path), "%s/%s", 1407 snprintf(path, sizeof(path), "%s/%s",
1338 dirname, dent->d_name); 1408 dirname, dent->d_name);
1339 if (perf_session__set_modules_path_dir(self, path) < 0) 1409 if (map_groups__set_modules_path_dir(self, path) < 0)
1340 goto failure; 1410 goto failure;
1341 } else { 1411 } else {
1342 char *dot = strrchr(dent->d_name, '.'), 1412 char *dot = strrchr(dent->d_name, '.'),
@@ -1350,7 +1420,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
1350 (int)(dot - dent->d_name), dent->d_name); 1420 (int)(dot - dent->d_name), dent->d_name);
1351 1421
1352 strxfrchar(dso_name, '-', '_'); 1422 strxfrchar(dso_name, '-', '_');
1353 map = map_groups__find_by_name(&self->kmaps, MAP__FUNCTION, dso_name); 1423 map = map_groups__find_by_name(self, MAP__FUNCTION, dso_name);
1354 if (map == NULL) 1424 if (map == NULL)
1355 continue; 1425 continue;
1356 1426
@@ -1361,6 +1431,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
1361 if (long_name == NULL) 1431 if (long_name == NULL)
1362 goto failure; 1432 goto failure;
1363 dso__set_long_name(map->dso, long_name); 1433 dso__set_long_name(map->dso, long_name);
1434 dso__kernel_module_get_build_id(map->dso);
1364 } 1435 }
1365 } 1436 }
1366 1437
@@ -1370,7 +1441,7 @@ failure:
1370 return -1; 1441 return -1;
1371} 1442}
1372 1443
1373static int perf_session__set_modules_path(struct perf_session *self) 1444static int map_groups__set_modules_path(struct map_groups *self)
1374{ 1445{
1375 struct utsname uts; 1446 struct utsname uts;
1376 char modules_path[PATH_MAX]; 1447 char modules_path[PATH_MAX];
@@ -1381,7 +1452,7 @@ static int perf_session__set_modules_path(struct perf_session *self)
1381 snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", 1452 snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
1382 uts.release); 1453 uts.release);
1383 1454
1384 return perf_session__set_modules_path_dir(self, modules_path); 1455 return map_groups__set_modules_path_dir(self, modules_path);
1385} 1456}
1386 1457
1387/* 1458/*
@@ -1391,8 +1462,8 @@ static int perf_session__set_modules_path(struct perf_session *self)
1391 */ 1462 */
1392static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) 1463static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
1393{ 1464{
1394 struct map *self = malloc(sizeof(*self)); 1465 struct map *self = zalloc(sizeof(*self) +
1395 1466 (dso->kernel ? sizeof(struct kmap) : 0));
1396 if (self != NULL) { 1467 if (self != NULL) {
1397 /* 1468 /*
1398 * ->end will be filled after we load all the symbols 1469 * ->end will be filled after we load all the symbols
@@ -1403,7 +1474,25 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
1403 return self; 1474 return self;
1404} 1475}
1405 1476
1406static int perf_session__create_module_maps(struct perf_session *self) 1477struct map *map_groups__new_module(struct map_groups *self, u64 start,
1478 const char *filename)
1479{
1480 struct map *map;
1481 struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
1482
1483 if (dso == NULL)
1484 return NULL;
1485
1486 map = map__new2(start, dso, MAP__FUNCTION);
1487 if (map == NULL)
1488 return NULL;
1489
1490 dso->origin = DSO__ORIG_KMODULE;
1491 map_groups__insert(self, map);
1492 return map;
1493}
1494
1495static int map_groups__create_modules(struct map_groups *self)
1407{ 1496{
1408 char *line = NULL; 1497 char *line = NULL;
1409 size_t n; 1498 size_t n;
@@ -1416,7 +1505,6 @@ static int perf_session__create_module_maps(struct perf_session *self)
1416 while (!feof(file)) { 1505 while (!feof(file)) {
1417 char name[PATH_MAX]; 1506 char name[PATH_MAX];
1418 u64 start; 1507 u64 start;
1419 struct dso *dso;
1420 char *sep; 1508 char *sep;
1421 int line_len; 1509 int line_len;
1422 1510
@@ -1442,32 +1530,16 @@ static int perf_session__create_module_maps(struct perf_session *self)
1442 *sep = '\0'; 1530 *sep = '\0';
1443 1531
1444 snprintf(name, sizeof(name), "[%s]", line); 1532 snprintf(name, sizeof(name), "[%s]", line);
1445 dso = dso__new(name); 1533 map = map_groups__new_module(self, start, name);
1446 1534 if (map == NULL)
1447 if (dso == NULL)
1448 goto out_delete_line;
1449
1450 map = map__new2(start, dso, MAP__FUNCTION);
1451 if (map == NULL) {
1452 dso__delete(dso);
1453 goto out_delete_line; 1535 goto out_delete_line;
1454 } 1536 dso__kernel_module_get_build_id(map->dso);
1455
1456 snprintf(name, sizeof(name),
1457 "/sys/module/%s/notes/.note.gnu.build-id", line);
1458 if (sysfs__read_build_id(name, dso->build_id,
1459 sizeof(dso->build_id)) == 0)
1460 dso->has_build_id = true;
1461
1462 dso->origin = DSO__ORIG_KMODULE;
1463 map_groups__insert(&self->kmaps, map);
1464 dsos__add(&dsos__kernel, dso);
1465 } 1537 }
1466 1538
1467 free(line); 1539 free(line);
1468 fclose(file); 1540 fclose(file);
1469 1541
1470 return perf_session__set_modules_path(self); 1542 return map_groups__set_modules_path(self);
1471 1543
1472out_delete_line: 1544out_delete_line:
1473 free(line); 1545 free(line);
@@ -1476,7 +1548,6 @@ out_failure:
1476} 1548}
1477 1549
1478static int dso__load_vmlinux(struct dso *self, struct map *map, 1550static int dso__load_vmlinux(struct dso *self, struct map *map,
1479 struct perf_session *session,
1480 const char *vmlinux, symbol_filter_t filter) 1551 const char *vmlinux, symbol_filter_t filter)
1481{ 1552{
1482 int err = -1, fd; 1553 int err = -1, fd;
@@ -1510,51 +1581,124 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
1510 return -1; 1581 return -1;
1511 1582
1512 dso__set_loaded(self, map->type); 1583 dso__set_loaded(self, map->type);
1513 err = dso__load_sym(self, map, session, self->long_name, fd, filter, 1, 0); 1584 err = dso__load_sym(self, map, vmlinux, fd, filter, 0);
1514 close(fd); 1585 close(fd);
1515 1586
1587 if (err > 0)
1588 pr_debug("Using %s for symbols\n", vmlinux);
1589
1590 return err;
1591}
1592
1593int dso__load_vmlinux_path(struct dso *self, struct map *map,
1594 symbol_filter_t filter)
1595{
1596 int i, err = 0;
1597
1598 pr_debug("Looking at the vmlinux_path (%d entries long)\n",
1599 vmlinux_path__nr_entries);
1600
1601 for (i = 0; i < vmlinux_path__nr_entries; ++i) {
1602 err = dso__load_vmlinux(self, map, vmlinux_path[i], filter);
1603 if (err > 0) {
1604 dso__set_long_name(self, strdup(vmlinux_path[i]));
1605 break;
1606 }
1607 }
1608
1516 return err; 1609 return err;
1517} 1610}
1518 1611
1519static int dso__load_kernel_sym(struct dso *self, struct map *map, 1612static int dso__load_kernel_sym(struct dso *self, struct map *map,
1520 struct perf_session *session, symbol_filter_t filter) 1613 symbol_filter_t filter)
1521{ 1614{
1522 int err; 1615 int err;
1523 bool is_kallsyms; 1616 const char *kallsyms_filename = NULL;
1617 char *kallsyms_allocated_filename = NULL;
1618 /*
1619 * Step 1: if the user specified a vmlinux filename, use it and only
1620 * it, reporting errors to the user if it cannot be used.
1621 *
1622 * For instance, try to analyse an ARM perf.data file _without_ a
1623 * build-id, or if the user specifies the wrong path to the right
1624 * vmlinux file, obviously we can't fallback to another vmlinux (a
1625 * x86_86 one, on the machine where analysis is being performed, say),
1626 * or worse, /proc/kallsyms.
1627 *
1628 * If the specified file _has_ a build-id and there is a build-id
1629 * section in the perf.data file, we will still do the expected
1630 * validation in dso__load_vmlinux and will bail out if they don't
1631 * match.
1632 */
1633 if (symbol_conf.vmlinux_name != NULL) {
1634 err = dso__load_vmlinux(self, map,
1635 symbol_conf.vmlinux_name, filter);
1636 goto out_try_fixup;
1637 }
1524 1638
1525 if (vmlinux_path != NULL) { 1639 if (vmlinux_path != NULL) {
1526 int i; 1640 err = dso__load_vmlinux_path(self, map, filter);
1527 pr_debug("Looking at the vmlinux_path (%d entries long)\n", 1641 if (err > 0)
1528 vmlinux_path__nr_entries); 1642 goto out_fixup;
1529 for (i = 0; i < vmlinux_path__nr_entries; ++i) { 1643 }
1530 err = dso__load_vmlinux(self, map, session, 1644
1531 vmlinux_path[i], filter); 1645 /*
1532 if (err > 0) { 1646 * Say the kernel DSO was created when processing the build-id header table,
1533 pr_debug("Using %s for symbols\n", 1647 * we have a build-id, so check if it is the same as the running kernel,
1534 vmlinux_path[i]); 1648 * using it if it is.
1535 dso__set_long_name(self, 1649 */
1536 strdup(vmlinux_path[i])); 1650 if (self->has_build_id) {
1537 goto out_fixup; 1651 u8 kallsyms_build_id[BUILD_ID_SIZE];
1652 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1653
1654 if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
1655 sizeof(kallsyms_build_id)) == 0) {
1656 if (dso__build_id_equal(self, kallsyms_build_id)) {
1657 kallsyms_filename = "/proc/kallsyms";
1658 goto do_kallsyms;
1538 } 1659 }
1539 } 1660 }
1540 } 1661 /*
1662 * Now look if we have it on the build-id cache in
1663 * $HOME/.debug/[kernel.kallsyms].
1664 */
1665 build_id__sprintf(self->build_id, sizeof(self->build_id),
1666 sbuild_id);
1541 1667
1542 is_kallsyms = self->long_name[0] == '['; 1668 if (asprintf(&kallsyms_allocated_filename,
1543 if (is_kallsyms) 1669 "%s/.debug/[kernel.kallsyms]/%s",
1544 goto do_kallsyms; 1670 getenv("HOME"), sbuild_id) == -1) {
1671 pr_err("Not enough memory for kallsyms file lookup\n");
1672 return -1;
1673 }
1545 1674
1546 err = dso__load_vmlinux(self, map, session, self->long_name, filter); 1675 kallsyms_filename = kallsyms_allocated_filename;
1547 if (err <= 0) { 1676
1548 pr_info("The file %s cannot be used, " 1677 if (access(kallsyms_filename, F_OK)) {
1549 "trying to use /proc/kallsyms...", self->long_name); 1678 pr_err("No kallsyms or vmlinux with build-id %s "
1550do_kallsyms: 1679 "was found\n", sbuild_id);
1551 err = dso__load_kallsyms(self, map, session, filter); 1680 free(kallsyms_allocated_filename);
1552 if (err > 0 && !is_kallsyms) 1681 return -1;
1553 dso__set_long_name(self, strdup("[kernel.kallsyms]")); 1682 }
1683 } else {
1684 /*
1685 * Last resort, if we don't have a build-id and couldn't find
1686 * any vmlinux file, try the running kernel kallsyms table.
1687 */
1688 kallsyms_filename = "/proc/kallsyms";
1554 } 1689 }
1555 1690
1691do_kallsyms:
1692 err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
1693 if (err > 0)
1694 pr_debug("Using %s for symbols\n", kallsyms_filename);
1695 free(kallsyms_allocated_filename);
1696
1697out_try_fixup:
1556 if (err > 0) { 1698 if (err > 0) {
1557out_fixup: 1699out_fixup:
1700 if (kallsyms_filename != NULL)
1701 dso__set_long_name(self, strdup("[kernel.kallsyms]"));
1558 map__fixup_start(map); 1702 map__fixup_start(map);
1559 map__fixup_end(map); 1703 map__fixup_end(map);
1560 } 1704 }
@@ -1564,7 +1708,6 @@ out_fixup:
1564 1708
1565LIST_HEAD(dsos__user); 1709LIST_HEAD(dsos__user);
1566LIST_HEAD(dsos__kernel); 1710LIST_HEAD(dsos__kernel);
1567struct dso *vdso;
1568 1711
1569static void dsos__add(struct list_head *head, struct dso *dso) 1712static void dsos__add(struct list_head *head, struct dso *dso)
1570{ 1713{
@@ -1576,19 +1719,19 @@ static struct dso *dsos__find(struct list_head *head, const char *name)
1576 struct dso *pos; 1719 struct dso *pos;
1577 1720
1578 list_for_each_entry(pos, head, node) 1721 list_for_each_entry(pos, head, node)
1579 if (strcmp(pos->name, name) == 0) 1722 if (strcmp(pos->long_name, name) == 0)
1580 return pos; 1723 return pos;
1581 return NULL; 1724 return NULL;
1582} 1725}
1583 1726
1584struct dso *dsos__findnew(const char *name) 1727struct dso *__dsos__findnew(struct list_head *head, const char *name)
1585{ 1728{
1586 struct dso *dso = dsos__find(&dsos__user, name); 1729 struct dso *dso = dsos__find(head, name);
1587 1730
1588 if (!dso) { 1731 if (!dso) {
1589 dso = dso__new(name); 1732 dso = dso__new(name);
1590 if (dso != NULL) { 1733 if (dso != NULL) {
1591 dsos__add(&dsos__user, dso); 1734 dsos__add(head, dso);
1592 dso__set_basename(dso); 1735 dso__set_basename(dso);
1593 } 1736 }
1594 } 1737 }
@@ -1613,75 +1756,78 @@ void dsos__fprintf(FILE *fp)
1613 __dsos__fprintf(&dsos__user, fp); 1756 __dsos__fprintf(&dsos__user, fp);
1614} 1757}
1615 1758
1616static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp) 1759static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
1760 bool with_hits)
1617{ 1761{
1618 struct dso *pos; 1762 struct dso *pos;
1619 size_t ret = 0; 1763 size_t ret = 0;
1620 1764
1621 list_for_each_entry(pos, head, node) { 1765 list_for_each_entry(pos, head, node) {
1766 if (with_hits && !pos->hit)
1767 continue;
1622 ret += dso__fprintf_buildid(pos, fp); 1768 ret += dso__fprintf_buildid(pos, fp);
1623 ret += fprintf(fp, " %s\n", pos->long_name); 1769 ret += fprintf(fp, " %s\n", pos->long_name);
1624 } 1770 }
1625 return ret; 1771 return ret;
1626} 1772}
1627 1773
1628size_t dsos__fprintf_buildid(FILE *fp) 1774size_t dsos__fprintf_buildid(FILE *fp, bool with_hits)
1629{ 1775{
1630 return (__dsos__fprintf_buildid(&dsos__kernel, fp) + 1776 return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) +
1631 __dsos__fprintf_buildid(&dsos__user, fp)); 1777 __dsos__fprintf_buildid(&dsos__user, fp, with_hits));
1632} 1778}
1633 1779
1634static struct dso *dsos__create_kernel( const char *vmlinux) 1780struct dso *dso__new_kernel(const char *name)
1635{ 1781{
1636 struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]"); 1782 struct dso *self = dso__new(name ?: "[kernel.kallsyms]");
1637 1783
1638 if (kernel == NULL) 1784 if (self != NULL) {
1639 return NULL; 1785 self->short_name = "[kernel]";
1786 self->kernel = 1;
1787 }
1640 1788
1641 kernel->short_name = "[kernel]"; 1789 return self;
1642 kernel->kernel = 1; 1790}
1643 1791
1644 vdso = dso__new("[vdso]"); 1792void dso__read_running_kernel_build_id(struct dso *self)
1645 if (vdso == NULL) 1793{
1646 goto out_delete_kernel_dso; 1794 if (sysfs__read_build_id("/sys/kernel/notes", self->build_id,
1647 dso__set_loaded(vdso, MAP__FUNCTION); 1795 sizeof(self->build_id)) == 0)
1796 self->has_build_id = true;
1797}
1648 1798
1649 if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, 1799static struct dso *dsos__create_kernel(const char *vmlinux)
1650 sizeof(kernel->build_id)) == 0) 1800{
1651 kernel->has_build_id = true; 1801 struct dso *kernel = dso__new_kernel(vmlinux);
1652 1802
1653 dsos__add(&dsos__kernel, kernel); 1803 if (kernel != NULL) {
1654 dsos__add(&dsos__user, vdso); 1804 dso__read_running_kernel_build_id(kernel);
1805 dsos__add(&dsos__kernel, kernel);
1806 }
1655 1807
1656 return kernel; 1808 return kernel;
1657
1658out_delete_kernel_dso:
1659 dso__delete(kernel);
1660 return NULL;
1661} 1809}
1662 1810
1663static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux) 1811int __map_groups__create_kernel_maps(struct map_groups *self,
1812 struct map *vmlinux_maps[MAP__NR_TYPES],
1813 struct dso *kernel)
1664{ 1814{
1665 struct map *functions, *variables; 1815 enum map_type type;
1666 struct dso *kernel = dsos__create_kernel(vmlinux);
1667 1816
1668 if (kernel == NULL) 1817 for (type = 0; type < MAP__NR_TYPES; ++type) {
1669 return -1; 1818 struct kmap *kmap;
1670 1819
1671 functions = map__new2(0, kernel, MAP__FUNCTION); 1820 vmlinux_maps[type] = map__new2(0, kernel, type);
1672 if (functions == NULL) 1821 if (vmlinux_maps[type] == NULL)
1673 return -1; 1822 return -1;
1674 1823
1675 variables = map__new2(0, kernel, MAP__VARIABLE); 1824 vmlinux_maps[type]->map_ip =
1676 if (variables == NULL) { 1825 vmlinux_maps[type]->unmap_ip = identity__map_ip;
1677 map__delete(functions);
1678 return -1;
1679 }
1680 1826
1681 functions->map_ip = functions->unmap_ip = 1827 kmap = map__kmap(vmlinux_maps[type]);
1682 variables->map_ip = variables->unmap_ip = identity__map_ip; 1828 kmap->kmaps = self;
1683 map_groups__insert(self, functions); 1829 map_groups__insert(self, vmlinux_maps[type]);
1684 map_groups__insert(self, variables); 1830 }
1685 1831
1686 return 0; 1832 return 0;
1687} 1833}
@@ -1791,19 +1937,22 @@ out_free_comm_list:
1791 return -1; 1937 return -1;
1792} 1938}
1793 1939
1794int perf_session__create_kernel_maps(struct perf_session *self) 1940int map_groups__create_kernel_maps(struct map_groups *self,
1941 struct map *vmlinux_maps[MAP__NR_TYPES])
1795{ 1942{
1796 if (map_groups__create_kernel_maps(&self->kmaps, 1943 struct dso *kernel = dsos__create_kernel(symbol_conf.vmlinux_name);
1797 symbol_conf.vmlinux_name) < 0) 1944
1945 if (kernel == NULL)
1946 return -1;
1947
1948 if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0)
1798 return -1; 1949 return -1;
1799 1950
1800 if (symbol_conf.use_modules && 1951 if (symbol_conf.use_modules && map_groups__create_modules(self) < 0)
1801 perf_session__create_module_maps(self) < 0) 1952 pr_debug("Problems creating module maps, continuing anyway...\n");
1802 pr_debug("Failed to load list of modules for session %s, "
1803 "continuing...\n", self->filename);
1804 /* 1953 /*
1805 * Now that we have all the maps created, just set the ->end of them: 1954 * Now that we have all the maps created, just set the ->end of them:
1806 */ 1955 */
1807 map_groups__fixup_end(&self->kmaps); 1956 map_groups__fixup_end(self);
1808 return 0; 1957 return 0;
1809} 1958}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8aded2356f79..280dadd32a08 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -8,6 +8,8 @@
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include "event.h" 9#include "event.h"
10 10
11#define DEBUG_CACHE_DIR ".debug"
12
11#ifdef HAVE_CPLUS_DEMANGLE 13#ifdef HAVE_CPLUS_DEMANGLE
12extern char *cplus_demangle(const char *, int); 14extern char *cplus_demangle(const char *, int);
13 15
@@ -49,6 +51,8 @@ struct symbol {
49 char name[0]; 51 char name[0];
50}; 52};
51 53
54void symbol__delete(struct symbol *self);
55
52struct strlist; 56struct strlist;
53 57
54struct symbol_conf { 58struct symbol_conf {
@@ -58,7 +62,8 @@ struct symbol_conf {
58 sort_by_name, 62 sort_by_name,
59 show_nr_samples, 63 show_nr_samples,
60 use_callchain, 64 use_callchain,
61 exclude_other; 65 exclude_other,
66 full_paths;
62 const char *vmlinux_name, 67 const char *vmlinux_name,
63 *field_sep; 68 *field_sep;
64 char *dso_list_str, 69 char *dso_list_str,
@@ -77,6 +82,12 @@ static inline void *symbol__priv(struct symbol *self)
77 return ((void *)self) - symbol_conf.priv_size; 82 return ((void *)self) - symbol_conf.priv_size;
78} 83}
79 84
85struct ref_reloc_sym {
86 const char *name;
87 u64 addr;
88 u64 unrelocated_addr;
89};
90
80struct addr_location { 91struct addr_location {
81 struct thread *thread; 92 struct thread *thread;
82 struct map *map; 93 struct map *map;
@@ -94,6 +105,7 @@ struct dso {
94 u8 slen_calculated:1; 105 u8 slen_calculated:1;
95 u8 has_build_id:1; 106 u8 has_build_id:1;
96 u8 kernel:1; 107 u8 kernel:1;
108 u8 hit:1;
97 unsigned char origin; 109 unsigned char origin;
98 u8 sorted_by_name; 110 u8 sorted_by_name;
99 u8 loaded; 111 u8 loaded;
@@ -105,37 +117,55 @@ struct dso {
105}; 117};
106 118
107struct dso *dso__new(const char *name); 119struct dso *dso__new(const char *name);
120struct dso *dso__new_kernel(const char *name);
108void dso__delete(struct dso *self); 121void dso__delete(struct dso *self);
109 122
110bool dso__loaded(const struct dso *self, enum map_type type); 123bool dso__loaded(const struct dso *self, enum map_type type);
111bool dso__sorted_by_name(const struct dso *self, enum map_type type); 124bool dso__sorted_by_name(const struct dso *self, enum map_type type);
112 125
126static inline void dso__set_loaded(struct dso *self, enum map_type type)
127{
128 self->loaded |= (1 << type);
129}
130
113void dso__sort_by_name(struct dso *self, enum map_type type); 131void dso__sort_by_name(struct dso *self, enum map_type type);
114 132
115struct perf_session; 133extern struct list_head dsos__user, dsos__kernel;
134
135struct dso *__dsos__findnew(struct list_head *head, const char *name);
136
137static inline struct dso *dsos__findnew(const char *name)
138{
139 return __dsos__findnew(&dsos__user, name);
140}
116 141
117struct dso *dsos__findnew(const char *name); 142int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
118int dso__load(struct dso *self, struct map *map, struct perf_session *session, 143int dso__load_vmlinux_path(struct dso *self, struct map *map,
119 symbol_filter_t filter); 144 symbol_filter_t filter);
145int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
146 symbol_filter_t filter);
120void dsos__fprintf(FILE *fp); 147void dsos__fprintf(FILE *fp);
121size_t dsos__fprintf_buildid(FILE *fp); 148size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
122 149
123size_t dso__fprintf_buildid(struct dso *self, FILE *fp); 150size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
124size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); 151size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
125char dso__symtab_origin(const struct dso *self); 152char dso__symtab_origin(const struct dso *self);
153void dso__set_long_name(struct dso *self, char *name);
126void dso__set_build_id(struct dso *self, void *build_id); 154void dso__set_build_id(struct dso *self, void *build_id);
155void dso__read_running_kernel_build_id(struct dso *self);
127struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); 156struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
128struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, 157struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
129 const char *name); 158 const char *name);
130 159
131int filename__read_build_id(const char *filename, void *bf, size_t size); 160int filename__read_build_id(const char *filename, void *bf, size_t size);
132int sysfs__read_build_id(const char *filename, void *bf, size_t size); 161int sysfs__read_build_id(const char *filename, void *bf, size_t size);
133bool dsos__read_build_ids(void); 162bool dsos__read_build_ids(bool with_hits);
134int build_id__sprintf(u8 *self, int len, char *bf); 163int build_id__sprintf(const u8 *self, int len, char *bf);
164int kallsyms__parse(const char *filename, void *arg,
165 int (*process_symbol)(void *arg, const char *name,
166 char type, u64 start));
135 167
136int symbol__init(void); 168int symbol__init(void);
137int perf_session__create_kernel_maps(struct perf_session *self); 169bool symbol_type__is_a(char symbol_type, enum map_type map_type);
138 170
139extern struct list_head dsos__user, dsos__kernel;
140extern struct dso *vdso;
141#endif /* __PERF_SYMBOL */ 171#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 4a08dcf50b68..21b92162282b 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -31,12 +31,41 @@ static struct thread *thread__new(pid_t pid)
31 return self; 31 return self;
32} 32}
33 33
34static void map_groups__flush(struct map_groups *self)
35{
36 int type;
37
38 for (type = 0; type < MAP__NR_TYPES; type++) {
39 struct rb_root *root = &self->maps[type];
40 struct rb_node *next = rb_first(root);
41
42 while (next) {
43 struct map *pos = rb_entry(next, struct map, rb_node);
44 next = rb_next(&pos->rb_node);
45 rb_erase(&pos->rb_node, root);
46 /*
47 * We may have references to this map, for
48 * instance in some hist_entry instances, so
49 * just move them to a separate list.
50 */
51 list_add_tail(&pos->node, &self->removed_maps[pos->type]);
52 }
53 }
54}
55
34int thread__set_comm(struct thread *self, const char *comm) 56int thread__set_comm(struct thread *self, const char *comm)
35{ 57{
58 int err;
59
36 if (self->comm) 60 if (self->comm)
37 free(self->comm); 61 free(self->comm);
38 self->comm = strdup(comm); 62 self->comm = strdup(comm);
39 return self->comm ? 0 : -ENOMEM; 63 err = self->comm == NULL ? -ENOMEM : 0;
64 if (!err) {
65 self->comm_set = true;
66 map_groups__flush(&self->mg);
67 }
68 return err;
40} 69}
41 70
42int thread__comm_len(struct thread *self) 71int thread__comm_len(struct thread *self)
@@ -50,11 +79,6 @@ int thread__comm_len(struct thread *self)
50 return self->comm_len; 79 return self->comm_len;
51} 80}
52 81
53static const char *map_type__name[MAP__NR_TYPES] = {
54 [MAP__FUNCTION] = "Functions",
55 [MAP__VARIABLE] = "Variables",
56};
57
58static size_t __map_groups__fprintf_maps(struct map_groups *self, 82static size_t __map_groups__fprintf_maps(struct map_groups *self,
59 enum map_type type, FILE *fp) 83 enum map_type type, FILE *fp)
60{ 84{
@@ -255,11 +279,14 @@ int thread__fork(struct thread *self, struct thread *parent)
255{ 279{
256 int i; 280 int i;
257 281
258 if (self->comm) 282 if (parent->comm_set) {
259 free(self->comm); 283 if (self->comm)
260 self->comm = strdup(parent->comm); 284 free(self->comm);
261 if (!self->comm) 285 self->comm = strdup(parent->comm);
262 return -ENOMEM; 286 if (!self->comm)
287 return -ENOMEM;
288 self->comm_set = true;
289 }
263 290
264 for (i = 0; i < MAP__NR_TYPES; ++i) 291 for (i = 0; i < MAP__NR_TYPES; ++i)
265 if (map_groups__clone(&self->mg, &parent->mg, i) < 0) 292 if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
@@ -282,14 +309,13 @@ size_t perf_session__fprintf(struct perf_session *self, FILE *fp)
282} 309}
283 310
284struct symbol *map_groups__find_symbol(struct map_groups *self, 311struct symbol *map_groups__find_symbol(struct map_groups *self,
285 struct perf_session *session,
286 enum map_type type, u64 addr, 312 enum map_type type, u64 addr,
287 symbol_filter_t filter) 313 symbol_filter_t filter)
288{ 314{
289 struct map *map = map_groups__find(self, type, addr); 315 struct map *map = map_groups__find(self, type, addr);
290 316
291 if (map != NULL) 317 if (map != NULL)
292 return map__find_symbol(map, session, map->map_ip(map, addr), filter); 318 return map__find_symbol(map, map->map_ip(map, addr), filter);
293 319
294 return NULL; 320 return NULL;
295} 321}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index c206f72c8881..0a28f39de545 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -15,6 +15,7 @@ struct thread {
15 struct map_groups mg; 15 struct map_groups mg;
16 pid_t pid; 16 pid_t pid;
17 char shortname[3]; 17 char shortname[3];
18 bool comm_set;
18 char *comm; 19 char *comm;
19 int comm_len; 20 int comm_len;
20}; 21};
@@ -48,23 +49,36 @@ static inline struct map *thread__find_map(struct thread *self,
48 return self ? map_groups__find(&self->mg, type, addr) : NULL; 49 return self ? map_groups__find(&self->mg, type, addr) : NULL;
49} 50}
50 51
52void thread__find_addr_map(struct thread *self,
53 struct perf_session *session, u8 cpumode,
54 enum map_type type, u64 addr,
55 struct addr_location *al);
56
51void thread__find_addr_location(struct thread *self, 57void thread__find_addr_location(struct thread *self,
52 struct perf_session *session, u8 cpumode, 58 struct perf_session *session, u8 cpumode,
53 enum map_type type, u64 addr, 59 enum map_type type, u64 addr,
54 struct addr_location *al, 60 struct addr_location *al,
55 symbol_filter_t filter); 61 symbol_filter_t filter);
56struct symbol *map_groups__find_symbol(struct map_groups *self, 62struct symbol *map_groups__find_symbol(struct map_groups *self,
57 struct perf_session *session,
58 enum map_type type, u64 addr, 63 enum map_type type, u64 addr,
59 symbol_filter_t filter); 64 symbol_filter_t filter);
60 65
61static inline struct symbol * 66static inline struct symbol *map_groups__find_function(struct map_groups *self,
62map_groups__find_function(struct map_groups *self, struct perf_session *session, 67 u64 addr,
63 u64 addr, symbol_filter_t filter) 68 symbol_filter_t filter)
64{ 69{
65 return map_groups__find_symbol(self, session, MAP__FUNCTION, addr, filter); 70 return map_groups__find_symbol(self, MAP__FUNCTION, addr, filter);
66} 71}
67 72
68struct map *map_groups__find_by_name(struct map_groups *self, 73struct map *map_groups__find_by_name(struct map_groups *self,
69 enum map_type type, const char *name); 74 enum map_type type, const char *name);
75
76int __map_groups__create_kernel_maps(struct map_groups *self,
77 struct map *vmlinux_maps[MAP__NR_TYPES],
78 struct dso *kernel);
79int map_groups__create_kernel_maps(struct map_groups *self,
80 struct map *vmlinux_maps[MAP__NR_TYPES]);
81
82struct map *map_groups__new_module(struct map_groups *self, u64 start,
83 const char *filename);
70#endif /* __PERF_THREAD_H */ 84#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index cace35595530..5ea8973ad331 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -20,6 +20,7 @@
20 */ 20 */
21#define _GNU_SOURCE 21#define _GNU_SOURCE
22#include <dirent.h> 22#include <dirent.h>
23#include <mntent.h>
23#include <stdio.h> 24#include <stdio.h>
24#include <stdlib.h> 25#include <stdlib.h>
25#include <string.h> 26#include <string.h>
@@ -37,6 +38,7 @@
37 38
38#include "../perf.h" 39#include "../perf.h"
39#include "trace-event.h" 40#include "trace-event.h"
41#include "debugfs.h"
40 42
41#define VERSION "0.5" 43#define VERSION "0.5"
42 44
@@ -101,32 +103,12 @@ void *malloc_or_die(unsigned int size)
101 103
102static const char *find_debugfs(void) 104static const char *find_debugfs(void)
103{ 105{
104 static char debugfs[MAX_PATH+1]; 106 const char *path = debugfs_mount(NULL);
105 static int debugfs_found;
106 char type[100];
107 FILE *fp;
108
109 if (debugfs_found)
110 return debugfs;
111
112 if ((fp = fopen("/proc/mounts","r")) == NULL)
113 die("Can't open /proc/mounts for read");
114
115 while (fscanf(fp, "%*s %"
116 STR(MAX_PATH)
117 "s %99s %*s %*d %*d\n",
118 debugfs, type) == 2) {
119 if (strcmp(type, "debugfs") == 0)
120 break;
121 }
122 fclose(fp);
123
124 if (strcmp(type, "debugfs") != 0)
125 die("debugfs not mounted, please mount");
126 107
127 debugfs_found = 1; 108 if (!path)
109 die("Your kernel not support debugfs filesystem");
128 110
129 return debugfs; 111 return path;
130} 112}
131 113
132/* 114/*
@@ -271,6 +253,8 @@ static void read_header_files(void)
271 write_or_die("header_page", 12); 253 write_or_die("header_page", 12);
272 write_or_die(&size, 8); 254 write_or_die(&size, 8);
273 check_size = copy_file_fd(fd); 255 check_size = copy_file_fd(fd);
256 close(fd);
257
274 if (size != check_size) 258 if (size != check_size)
275 die("wrong size for '%s' size=%lld read=%lld", 259 die("wrong size for '%s' size=%lld read=%lld",
276 path, size, check_size); 260 path, size, check_size);
@@ -289,6 +273,7 @@ static void read_header_files(void)
289 if (size != check_size) 273 if (size != check_size)
290 die("wrong size for '%s'", path); 274 die("wrong size for '%s'", path);
291 put_tracing_file(path); 275 put_tracing_file(path);
276 close(fd);
292} 277}
293 278
294static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) 279static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -317,7 +302,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
317 die("can't read directory '%s'", sys); 302 die("can't read directory '%s'", sys);
318 303
319 while ((dent = readdir(dir))) { 304 while ((dent = readdir(dir))) {
320 if (strcmp(dent->d_name, ".") == 0 || 305 if (dent->d_type != DT_DIR ||
306 strcmp(dent->d_name, ".") == 0 ||
321 strcmp(dent->d_name, "..") == 0 || 307 strcmp(dent->d_name, "..") == 0 ||
322 !name_in_tp_list(dent->d_name, tps)) 308 !name_in_tp_list(dent->d_name, tps))
323 continue; 309 continue;
@@ -334,7 +320,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
334 320
335 rewinddir(dir); 321 rewinddir(dir);
336 while ((dent = readdir(dir))) { 322 while ((dent = readdir(dir))) {
337 if (strcmp(dent->d_name, ".") == 0 || 323 if (dent->d_type != DT_DIR ||
324 strcmp(dent->d_name, ".") == 0 ||
338 strcmp(dent->d_name, "..") == 0 || 325 strcmp(dent->d_name, "..") == 0 ||
339 !name_in_tp_list(dent->d_name, tps)) 326 !name_in_tp_list(dent->d_name, tps))
340 continue; 327 continue;
@@ -353,6 +340,7 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
353 340
354 free(format); 341 free(format);
355 } 342 }
343 closedir(dir);
356} 344}
357 345
358static void read_ftrace_files(struct tracepoint_path *tps) 346static void read_ftrace_files(struct tracepoint_path *tps)
@@ -394,26 +382,21 @@ static void read_event_files(struct tracepoint_path *tps)
394 die("can't read directory '%s'", path); 382 die("can't read directory '%s'", path);
395 383
396 while ((dent = readdir(dir))) { 384 while ((dent = readdir(dir))) {
397 if (strcmp(dent->d_name, ".") == 0 || 385 if (dent->d_type != DT_DIR ||
386 strcmp(dent->d_name, ".") == 0 ||
398 strcmp(dent->d_name, "..") == 0 || 387 strcmp(dent->d_name, "..") == 0 ||
399 strcmp(dent->d_name, "ftrace") == 0 || 388 strcmp(dent->d_name, "ftrace") == 0 ||
400 !system_in_tp_list(dent->d_name, tps)) 389 !system_in_tp_list(dent->d_name, tps))
401 continue; 390 continue;
402 sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); 391 count++;
403 sprintf(sys, "%s/%s", path, dent->d_name);
404 ret = stat(sys, &st);
405 free(sys);
406 if (ret < 0)
407 continue;
408 if (S_ISDIR(st.st_mode))
409 count++;
410 } 392 }
411 393
412 write_or_die(&count, 4); 394 write_or_die(&count, 4);
413 395
414 rewinddir(dir); 396 rewinddir(dir);
415 while ((dent = readdir(dir))) { 397 while ((dent = readdir(dir))) {
416 if (strcmp(dent->d_name, ".") == 0 || 398 if (dent->d_type != DT_DIR ||
399 strcmp(dent->d_name, ".") == 0 ||
417 strcmp(dent->d_name, "..") == 0 || 400 strcmp(dent->d_name, "..") == 0 ||
418 strcmp(dent->d_name, "ftrace") == 0 || 401 strcmp(dent->d_name, "ftrace") == 0 ||
419 !system_in_tp_list(dent->d_name, tps)) 402 !system_in_tp_list(dent->d_name, tps))
@@ -422,14 +405,13 @@ static void read_event_files(struct tracepoint_path *tps)
422 sprintf(sys, "%s/%s", path, dent->d_name); 405 sprintf(sys, "%s/%s", path, dent->d_name);
423 ret = stat(sys, &st); 406 ret = stat(sys, &st);
424 if (ret >= 0) { 407 if (ret >= 0) {
425 if (S_ISDIR(st.st_mode)) { 408 write_or_die(dent->d_name, strlen(dent->d_name) + 1);
426 write_or_die(dent->d_name, strlen(dent->d_name) + 1); 409 copy_event_system(sys, tps);
427 copy_event_system(sys, tps);
428 }
429 } 410 }
430 free(sys); 411 free(sys);
431 } 412 }
432 413
414 closedir(dir);
433 put_tracing_file(path); 415 put_tracing_file(path);
434} 416}
435 417
@@ -533,7 +515,7 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
533 write_or_die(buf, 1); 515 write_or_die(buf, 1);
534 516
535 /* save page_size */ 517 /* save page_size */
536 page_size = getpagesize(); 518 page_size = sysconf(_SC_PAGESIZE);
537 write_or_die(&page_size, 4); 519 write_or_die(&page_size, 4);
538 520
539 read_header_files(); 521 read_header_files();
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index c5c32be040bf..9b3c20f42f98 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -1925,6 +1925,15 @@ void *raw_field_ptr(struct event *event, const char *name, void *data)
1925 if (!field) 1925 if (!field)
1926 return NULL; 1926 return NULL;
1927 1927
1928 if (field->flags & FIELD_IS_STRING) {
1929 int offset;
1930
1931 offset = *(int *)(data + field->offset);
1932 offset &= 0xffff;
1933
1934 return data + offset;
1935 }
1936
1928 return data + field->offset; 1937 return data + field->offset;
1929} 1938}
1930 1939
@@ -3277,3 +3286,18 @@ void parse_set_info(int nr_cpus, int long_sz)
3277 cpus = nr_cpus; 3286 cpus = nr_cpus;
3278 long_size = long_sz; 3287 long_size = long_sz;
3279} 3288}
3289
3290int common_pc(struct scripting_context *context)
3291{
3292 return parse_common_pc(context->event_data);
3293}
3294
3295int common_flags(struct scripting_context *context)
3296{
3297 return parse_common_flags(context->event_data);
3298}
3299
3300int common_lock_depth(struct scripting_context *context)
3301{
3302 return parse_common_lock_depth(context->event_data);
3303}
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
deleted file mode 100644
index e88fb26137bb..000000000000
--- a/tools/perf/util/trace-event-perl.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef __PERF_TRACE_EVENT_PERL_H
2#define __PERF_TRACE_EVENT_PERL_H
3#ifdef NO_LIBPERL
4typedef int INTERP;
5#define dSP
6#define ENTER
7#define SAVETMPS
8#define PUTBACK
9#define SPAGAIN
10#define FREETMPS
11#define LEAVE
12#define SP
13#define ERRSV
14#define G_SCALAR (0)
15#define G_DISCARD (0)
16#define G_NOARGS (0)
17#define PUSHMARK(a)
18#define SvTRUE(a) (0)
19#define XPUSHs(s)
20#define sv_2mortal(a)
21#define newSVpv(a,b)
22#define newSVuv(a)
23#define newSViv(a)
24#define get_cv(a,b) (0)
25#define call_pv(a,b) (0)
26#define perl_alloc() (0)
27#define perl_construct(a) (0)
28#define perl_parse(a,b,c,d,e) (0)
29#define perl_run(a) (0)
30#define perl_destruct(a) (0)
31#define perl_free(a) (0)
32#define pTHX void
33#define CV void
34#define dXSUB_SYS
35#define pTHX_
36static inline void newXS(const char *a, void *b, const char *c) {}
37static void boot_Perf__Trace__Context(pTHX_ CV *cv) {}
38static void boot_DynaLoader(pTHX_ CV *cv) {}
39#else
40#include <EXTERN.h>
41#include <perl.h>
42void boot_Perf__Trace__Context(pTHX_ CV *cv);
43void boot_DynaLoader(pTHX_ CV *cv);
44typedef PerlInterpreter * INTERP;
45#endif
46
47struct scripting_context {
48 void *event_data;
49};
50
51int common_pc(struct scripting_context *context);
52int common_flags(struct scripting_context *context);
53int common_lock_depth(struct scripting_context *context);
54
55#endif /* __PERF_TRACE_EVENT_PERL_H */
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 1744422cafcb..7cd1193918c7 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 20 */
21#define _LARGEFILE64_SOURCE 21#define _FILE_OFFSET_BITS 64
22 22
23#include <dirent.h> 23#include <dirent.h>
24#include <stdio.h> 24#include <stdio.h>
@@ -83,7 +83,7 @@ static char *read_string(void)
83 char *str = NULL; 83 char *str = NULL;
84 int size = 0; 84 int size = 0;
85 int i; 85 int i;
86 int r; 86 off_t r;
87 87
88 for (;;) { 88 for (;;) {
89 r = read(input_fd, buf, BUFSIZ); 89 r = read(input_fd, buf, BUFSIZ);
@@ -118,7 +118,7 @@ static char *read_string(void)
118 118
119 /* move the file descriptor to the end of the string */ 119 /* move the file descriptor to the end of the string */
120 r = lseek(input_fd, -(r - i), SEEK_CUR); 120 r = lseek(input_fd, -(r - i), SEEK_CUR);
121 if (r < 0) 121 if (r == (off_t)-1)
122 die("lseek"); 122 die("lseek");
123 123
124 if (str) { 124 if (str) {
@@ -282,8 +282,8 @@ static void update_cpu_data_index(int cpu)
282 282
283static void get_next_page(int cpu) 283static void get_next_page(int cpu)
284{ 284{
285 off64_t save_seek; 285 off_t save_seek;
286 off64_t ret; 286 off_t ret;
287 287
288 if (!cpu_data[cpu].page) 288 if (!cpu_data[cpu].page)
289 return; 289 return;
@@ -298,17 +298,17 @@ static void get_next_page(int cpu)
298 update_cpu_data_index(cpu); 298 update_cpu_data_index(cpu);
299 299
300 /* other parts of the code may expect the pointer to not move */ 300 /* other parts of the code may expect the pointer to not move */
301 save_seek = lseek64(input_fd, 0, SEEK_CUR); 301 save_seek = lseek(input_fd, 0, SEEK_CUR);
302 302
303 ret = lseek64(input_fd, cpu_data[cpu].offset, SEEK_SET); 303 ret = lseek(input_fd, cpu_data[cpu].offset, SEEK_SET);
304 if (ret < 0) 304 if (ret == (off_t)-1)
305 die("failed to lseek"); 305 die("failed to lseek");
306 ret = read(input_fd, cpu_data[cpu].page, page_size); 306 ret = read(input_fd, cpu_data[cpu].page, page_size);
307 if (ret < 0) 307 if (ret < 0)
308 die("failed to read page"); 308 die("failed to read page");
309 309
310 /* reset the file pointer back */ 310 /* reset the file pointer back */
311 lseek64(input_fd, save_seek, SEEK_SET); 311 lseek(input_fd, save_seek, SEEK_SET);
312 312
313 return; 313 return;
314 } 314 }
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
new file mode 100644
index 000000000000..7ea983acfaea
--- /dev/null
+++ b/tools/perf/util/trace-event-scripting.c
@@ -0,0 +1,167 @@
1/*
2 * trace-event-scripting. Scripting engine common and initialization code.
3 *
4 * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <ctype.h>
26#include <errno.h>
27
28#include "../perf.h"
29#include "util.h"
30#include "trace-event.h"
31
32struct scripting_context *scripting_context;
33
34static int stop_script_unsupported(void)
35{
36 return 0;
37}
38
39static void process_event_unsupported(int cpu __unused,
40 void *data __unused,
41 int size __unused,
42 unsigned long long nsecs __unused,
43 char *comm __unused)
44{
45}
46
47static void print_python_unsupported_msg(void)
48{
49 fprintf(stderr, "Python scripting not supported."
50 " Install libpython and rebuild perf to enable it.\n"
51 "For example:\n # apt-get install python-dev (ubuntu)"
52 "\n # yum install python-devel (Fedora)"
53 "\n etc.\n");
54}
55
56static int python_start_script_unsupported(const char *script __unused,
57 int argc __unused,
58 const char **argv __unused)
59{
60 print_python_unsupported_msg();
61
62 return -1;
63}
64
65static int python_generate_script_unsupported(const char *outfile __unused)
66{
67 print_python_unsupported_msg();
68
69 return -1;
70}
71
72struct scripting_ops python_scripting_unsupported_ops = {
73 .name = "Python",
74 .start_script = python_start_script_unsupported,
75 .stop_script = stop_script_unsupported,
76 .process_event = process_event_unsupported,
77 .generate_script = python_generate_script_unsupported,
78};
79
80static void register_python_scripting(struct scripting_ops *scripting_ops)
81{
82 int err;
83 err = script_spec_register("Python", scripting_ops);
84 if (err)
85 die("error registering Python script extension");
86
87 err = script_spec_register("py", scripting_ops);
88 if (err)
89 die("error registering py script extension");
90
91 scripting_context = malloc(sizeof(struct scripting_context));
92}
93
94#ifdef NO_LIBPYTHON
95void setup_python_scripting(void)
96{
97 register_python_scripting(&python_scripting_unsupported_ops);
98}
99#else
100struct scripting_ops python_scripting_ops;
101
102void setup_python_scripting(void)
103{
104 register_python_scripting(&python_scripting_ops);
105}
106#endif
107
108static void print_perl_unsupported_msg(void)
109{
110 fprintf(stderr, "Perl scripting not supported."
111 " Install libperl and rebuild perf to enable it.\n"
112 "For example:\n # apt-get install libperl-dev (ubuntu)"
113 "\n # yum install 'perl(ExtUtils::Embed)' (Fedora)"
114 "\n etc.\n");
115}
116
117static int perl_start_script_unsupported(const char *script __unused,
118 int argc __unused,
119 const char **argv __unused)
120{
121 print_perl_unsupported_msg();
122
123 return -1;
124}
125
126static int perl_generate_script_unsupported(const char *outfile __unused)
127{
128 print_perl_unsupported_msg();
129
130 return -1;
131}
132
133struct scripting_ops perl_scripting_unsupported_ops = {
134 .name = "Perl",
135 .start_script = perl_start_script_unsupported,
136 .stop_script = stop_script_unsupported,
137 .process_event = process_event_unsupported,
138 .generate_script = perl_generate_script_unsupported,
139};
140
141static void register_perl_scripting(struct scripting_ops *scripting_ops)
142{
143 int err;
144 err = script_spec_register("Perl", scripting_ops);
145 if (err)
146 die("error registering Perl script extension");
147
148 err = script_spec_register("pl", scripting_ops);
149 if (err)
150 die("error registering pl script extension");
151
152 scripting_context = malloc(sizeof(struct scripting_context));
153}
154
155#ifdef NO_LIBPERL
156void setup_perl_scripting(void)
157{
158 register_perl_scripting(&perl_scripting_unsupported_ops);
159}
160#else
161struct scripting_ops perl_scripting_ops;
162
163void setup_perl_scripting(void)
164{
165 register_perl_scripting(&perl_scripting_ops);
166}
167#endif
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 6ad405620c9b..c3269b937db4 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -279,7 +279,15 @@ struct scripting_ops {
279 279
280int script_spec_register(const char *spec, struct scripting_ops *ops); 280int script_spec_register(const char *spec, struct scripting_ops *ops);
281 281
282extern struct scripting_ops perl_scripting_ops;
283void setup_perl_scripting(void); 282void setup_perl_scripting(void);
283void setup_python_scripting(void);
284
285struct scripting_context {
286 void *event_data;
287};
288
289int common_pc(struct scripting_context *context);
290int common_flags(struct scripting_context *context);
291int common_lock_depth(struct scripting_context *context);
284 292
285#endif /* __PERF_TRACE_EVENTS_H */ 293#endif /* __PERF_TRACE_EVENTS_H */
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000000..f9b890fde681
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,94 @@
1#include "util.h"
2#include <sys/mman.h>
3
4int mkdir_p(char *path, mode_t mode)
5{
6 struct stat st;
7 int err;
8 char *d = path;
9
10 if (*d != '/')
11 return -1;
12
13 if (stat(path, &st) == 0)
14 return 0;
15
16 while (*++d == '/');
17
18 while ((d = strchr(d, '/'))) {
19 *d = '\0';
20 err = stat(path, &st) && mkdir(path, mode);
21 *d++ = '/';
22 if (err)
23 return -1;
24 while (*d == '/')
25 ++d;
26 }
27 return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
28}
29
30static int slow_copyfile(const char *from, const char *to)
31{
32 int err = 0;
33 char *line = NULL;
34 size_t n;
35 FILE *from_fp = fopen(from, "r"), *to_fp;
36
37 if (from_fp == NULL)
38 goto out;
39
40 to_fp = fopen(to, "w");
41 if (to_fp == NULL)
42 goto out_fclose_from;
43
44 while (getline(&line, &n, from_fp) > 0)
45 if (fputs(line, to_fp) == EOF)
46 goto out_fclose_to;
47 err = 0;
48out_fclose_to:
49 fclose(to_fp);
50 free(line);
51out_fclose_from:
52 fclose(from_fp);
53out:
54 return err;
55}
56
57int copyfile(const char *from, const char *to)
58{
59 int fromfd, tofd;
60 struct stat st;
61 void *addr;
62 int err = -1;
63
64 if (stat(from, &st))
65 goto out;
66
67 if (st.st_size == 0) /* /proc? do it slowly... */
68 return slow_copyfile(from, to);
69
70 fromfd = open(from, O_RDONLY);
71 if (fromfd < 0)
72 goto out;
73
74 tofd = creat(to, 0755);
75 if (tofd < 0)
76 goto out_close_from;
77
78 addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
79 if (addr == MAP_FAILED)
80 goto out_close_to;
81
82 if (write(tofd, addr, st.st_size) == st.st_size)
83 err = 0;
84
85 munmap(addr, st.st_size);
86out_close_to:
87 close(tofd);
88 if (err)
89 unlink(to);
90out_close_from:
91 close(fromfd);
92out:
93 return err;
94}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c673d8825883..0f5b2a6f1080 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -403,4 +403,7 @@ void git_qsort(void *base, size_t nmemb, size_t size,
403#endif 403#endif
404#endif 404#endif
405 405
406int mkdir_p(char *path, mode_t mode);
407int copyfile(const char *from, const char *to);
408
406#endif 409#endif
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index 1c15e39f99e3..cfa55d686e3b 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -169,6 +169,7 @@ static void perf_read_values__display_pretty(FILE *fp,
169 counterwidth[j], values->value[i][j]); 169 counterwidth[j], values->value[i][j]);
170 fprintf(fp, "\n"); 170 fprintf(fp, "\n");
171 } 171 }
172 free(counterwidth);
172} 173}
173 174
174static void perf_read_values__display_raw(FILE *fp, 175static void perf_read_values__display_raw(FILE *fp,