aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-05 18:30:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-05 18:30:21 -0500
commitc3fa27d1367fac63ac8533d6f20ea851d0d70a10 (patch)
treee7731554085e22b6b63411b1ebb401079f3e0bbb
parent96fa2b508d2d3fe040cf4ef2fffb955f0a537ea1 (diff)
parentd103d01e4b19f185d3c85f77402b605534c32e89 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (470 commits) x86: Fix comments of register/stack access functions perf tools: Replace %m with %a in sscanf hw-breakpoints: Keep track of user disabled breakpoints tracing/syscalls: Make syscall events print callbacks static tracing: Add DEFINE_EVENT(), DEFINE_SINGLE_EVENT() support to docbook perf: Don't free perf_mmap_data until work has been done perf_event: Fix compile error perf tools: Fix _GNU_SOURCE macro related strndup() build error trace_syscalls: Remove unused syscall_name_to_nr() trace_syscalls: Simplify syscall profile trace_syscalls: Remove duplicate init_enter_##sname() trace_syscalls: Add syscall_nr field to struct syscall_metadata trace_syscalls: Remove enter_id exit_id trace_syscalls: Set event_enter_##sname->data to its metadata trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit perf_event: Initialize data.period in perf_swevent_hrtimer() perf probe: Simplify event naming perf probe: Add --list option for listing current probe events perf probe: Add argv_split() from lib/argv_split.c perf probe: Move probe event utility functions to probe-event.c ...
-rw-r--r--Documentation/DocBook/tracepoint.tmpl5
-rw-r--r--Documentation/trace/kprobetrace.txt149
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/powerpc/Kconfig.debug2
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h19
-rw-r--r--arch/powerpc/include/asm/hvcall.h2
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/trace.h133
-rw-r--r--arch/powerpc/kernel/align.c12
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S3
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/perf_event.c2
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c4
-rw-r--r--arch/powerpc/kernel/power5-pmu.c6
-rw-r--r--arch/powerpc/kernel/power6-pmu.c2
-rw-r--r--arch/powerpc/kernel/power7-pmu.c6
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c1
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/traps.c18
-rw-r--r--arch/powerpc/lib/copypage_64.S4
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S132
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c38
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c33
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/Makefile3
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/a.out-core.h10
-rw-r--r--arch/x86/include/asm/debugreg.h33
-rw-r--r--arch/x86/include/asm/hardirq.h6
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h73
-rw-r--r--arch/x86/include/asm/inat.h220
-rw-r--r--arch/x86/include/asm/inat_types.h29
-rw-r--r--arch/x86/include/asm/insn.h184
-rw-r--r--arch/x86/include/asm/mce.h14
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/processor.h14
-rw-r--r--arch/x86/include/asm/ptrace.h62
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c103
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event.c205
-rw-r--r--arch/x86/kernel/entry_32.S24
-rw-r--r--arch/x86/kernel/entry_64.S8
-rw-r--r--arch/x86/kernel/hw_breakpoint.c555
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c243
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/ptrace.c415
-rw-r--r--arch/x86/kernel/setup.c3
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/lib/.gitignore1
-rw-r--r--arch/x86/lib/Makefile13
-rw-r--r--arch/x86/lib/inat.c90
-rw-r--r--arch/x86/lib/insn.c516
-rw-r--r--arch/x86/lib/x86-opcode-map.txt893
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c26
-rw-r--r--arch/x86/tools/Makefile31
-rw-r--r--arch/x86/tools/chkobjdump.awk23
-rw-r--r--arch/x86/tools/distill.awk47
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk380
-rw-r--r--arch/x86/tools/test_get_len.c173
-rw-r--r--drivers/edac/edac_mce_amd.c21
-rw-r--r--include/linux/ftrace_event.h38
-rw-r--r--include/linux/hw_breakpoint.h131
-rw-r--r--include/linux/kprobes.h2
-rw-r--r--include/linux/perf_counter.h3
-rw-r--r--include/linux/perf_event.h59
-rw-r--r--include/linux/syscalls.h77
-rw-r--r--include/linux/tracepoint.h6
-rw-r--r--include/trace/define_trace.h11
-rw-r--r--include/trace/events/bkl.h18
-rw-r--r--include/trace/events/block.h202
-rw-r--r--include/trace/events/ext4.h129
-rw-r--r--include/trace/events/irq.h52
-rw-r--r--include/trace/events/jbd2.h63
-rw-r--r--include/trace/events/kmem.h130
-rw-r--r--include/trace/events/lock.h (renamed from include/trace/events/lockdep.h)8
-rw-r--r--include/trace/events/mce.h69
-rw-r--r--include/trace/events/module.h22
-rw-r--r--include/trace/events/power.h38
-rw-r--r--include/trace/events/sched.h217
-rw-r--r--include/trace/events/signal.h173
-rw-r--r--include/trace/events/timer.h79
-rw-r--r--include/trace/events/workqueue.h22
-rw-r--r--include/trace/ftrace.h338
-rw-r--r--include/trace/syscall.h31
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c423
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kprobes.c68
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/perf_event.c627
-rw-r--r--kernel/signal.c27
-rw-r--r--kernel/trace/Kconfig38
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.h38
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c43
-rw-r--r--kernel/trace/trace_events.c168
-rw-r--r--kernel/trace/trace_events_filter.c310
-rw-r--r--kernel/trace/trace_export.c39
-rw-r--r--kernel/trace/trace_kprobe.c1523
-rw-r--r--kernel/trace/trace_ksym.c550
-rw-r--r--kernel/trace/trace_selftest.c55
-rw-r--r--kernel/trace/trace_syscalls.c195
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c87
-rwxr-xr-xscripts/kernel-doc15
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/perf-bench.txt120
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt34
-rw-r--r--tools/perf/Documentation/perf-kmem.txt44
-rw-r--r--tools/perf/Documentation/perf-probe.txt49
-rw-r--r--tools/perf/Documentation/perf-record.txt16
-rw-r--r--tools/perf/Documentation/perf-report.txt8
-rw-r--r--tools/perf/Documentation/perf-timechart.txt5
-rw-r--r--tools/perf/Documentation/perf-trace-perl.txt219
-rw-r--r--tools/perf/Documentation/perf-trace.txt11
-rw-r--r--tools/perf/Makefile151
-rw-r--r--tools/perf/bench/bench.h17
-rw-r--r--tools/perf/bench/mem-memcpy.c193
-rw-r--r--tools/perf/bench/sched-messaging.c336
-rw-r--r--tools/perf/bench/sched-pipe.c124
-rw-r--r--tools/perf/builtin-annotate.c885
-rw-r--r--tools/perf/builtin-bench.c196
-rw-r--r--tools/perf/builtin-buildid-list.c116
-rw-r--r--tools/perf/builtin-help.c16
-rw-r--r--tools/perf/builtin-kmem.c807
-rw-r--r--tools/perf/builtin-probe.c242
-rw-r--r--tools/perf/builtin-record.c345
-rw-r--r--tools/perf/builtin-report.c1147
-rw-r--r--tools/perf/builtin-sched.c351
-rw-r--r--tools/perf/builtin-stat.c34
-rw-r--r--tools/perf/builtin-timechart.c267
-rw-r--r--tools/perf/builtin-top.c503
-rw-r--r--tools/perf/builtin-trace.c418
-rw-r--r--tools/perf/builtin.h4
-rw-r--r--tools/perf/command-list.txt4
-rw-r--r--tools/perf/design.txt2
-rw-r--r--tools/perf/perf.c86
-rw-r--r--tools/perf/perf.h12
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c134
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs41
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL17
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/README59
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm55
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm192
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm88
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/typemap1
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-record7
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-report5
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-record2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report5
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-record2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report5
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-record6
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report5
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-record2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report6
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl106
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl105
-rw-r--r--tools/perf/scripts/perl/rw-by-pid.pl170
-rw-r--r--tools/perf/scripts/perl/wakeup-latency.pl103
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl129
-rw-r--r--tools/perf/util/cache.h11
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/color.h6
-rw-r--r--tools/perf/util/ctype.c8
-rw-r--r--tools/perf/util/data_map.c291
-rw-r--r--tools/perf/util/data_map.h32
-rw-r--r--tools/perf/util/debug.c4
-rw-r--r--tools/perf/util/debug.h9
-rw-r--r--tools/perf/util/debugfs.c241
-rw-r--r--tools/perf/util/debugfs.h25
-rw-r--r--tools/perf/util/event.c312
-rw-r--r--tools/perf/util/event.h75
-rw-r--r--tools/perf/util/exec_cmd.h6
-rw-r--r--tools/perf/util/header.c403
-rw-r--r--tools/perf/util/header.h76
-rw-r--r--tools/perf/util/help.h6
-rw-r--r--tools/perf/util/hist.c202
-rw-r--r--tools/perf/util/hist.h50
-rw-r--r--tools/perf/util/include/asm/asm-offsets.h1
-rw-r--r--tools/perf/util/include/asm/bitops.h18
-rw-r--r--tools/perf/util/include/asm/bug.h22
-rw-r--r--tools/perf/util/include/asm/byteorder.h2
-rw-r--r--tools/perf/util/include/asm/swab.h1
-rw-r--r--tools/perf/util/include/asm/uaccess.h14
-rw-r--r--tools/perf/util/include/linux/bitmap.h3
-rw-r--r--tools/perf/util/include/linux/bitops.h29
-rw-r--r--tools/perf/util/include/linux/compiler.h10
-rw-r--r--tools/perf/util/include/linux/ctype.h1
-rw-r--r--tools/perf/util/include/linux/kernel.h76
-rw-r--r--tools/perf/util/include/linux/string.h1
-rw-r--r--tools/perf/util/include/linux/types.h9
-rw-r--r--tools/perf/util/levenshtein.h6
-rw-r--r--tools/perf/util/map.c99
-rw-r--r--tools/perf/util/module.c545
-rw-r--r--tools/perf/util/module.h53
-rw-r--r--tools/perf/util/parse-events.c155
-rw-r--r--tools/perf/util/parse-events.h8
-rw-r--r--tools/perf/util/parse-options.h6
-rw-r--r--tools/perf/util/probe-event.c484
-rw-r--r--tools/perf/util/probe-event.h18
-rw-r--r--tools/perf/util/probe-finder.c732
-rw-r--r--tools/perf/util/probe-finder.h57
-rw-r--r--tools/perf/util/quote.h6
-rw-r--r--tools/perf/util/run-command.h6
-rw-r--r--tools/perf/util/sigchain.h6
-rw-r--r--tools/perf/util/sort.c290
-rw-r--r--tools/perf/util/sort.h99
-rw-r--r--tools/perf/util/strbuf.h6
-rw-r--r--tools/perf/util/string.c194
-rw-r--r--tools/perf/util/string.h10
-rw-r--r--tools/perf/util/strlist.h6
-rw-r--r--tools/perf/util/svghelper.h6
-rw-r--r--tools/perf/util/symbol.c1193
-rw-r--r--tools/perf/util/symbol.h92
-rw-r--r--tools/perf/util/thread.c250
-rw-r--r--tools/perf/util/thread.h50
-rw-r--r--tools/perf/util/trace-event-info.c26
-rw-r--r--tools/perf/util/trace-event-parse.c573
-rw-r--r--tools/perf/util/trace-event-perl.c598
-rw-r--r--tools/perf/util/trace-event-perl.h51
-rw-r--r--tools/perf/util/trace-event-read.c11
-rw-r--r--tools/perf/util/trace-event.h64
-rw-r--r--tools/perf/util/types.h6
-rw-r--r--tools/perf/util/util.h31
-rw-r--r--tools/perf/util/values.h6
-rw-r--r--tools/perf/util/wrapper.c61
251 files changed, 21092 insertions, 6102 deletions
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index b0756d0fd579..8bca1d5cec09 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -86,4 +86,9 @@
86!Iinclude/trace/events/irq.h 86!Iinclude/trace/events/irq.h
87 </chapter> 87 </chapter>
88 88
89 <chapter id="signal">
90 <title>SIGNAL</title>
91!Iinclude/trace/events/signal.h
92 </chapter>
93
89</book> 94</book>
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
new file mode 100644
index 000000000000..47aabeebbdf6
--- /dev/null
+++ b/Documentation/trace/kprobetrace.txt
@@ -0,0 +1,149 @@
1 Kprobe-based Event Tracing
2 ==========================
3
4 Documentation is written by Masami Hiramatsu
5
6
7Overview
8--------
9These events are similar to tracepoint based events. Instead of Tracepoint,
10this is based on kprobes (kprobe and kretprobe). So it can probe wherever
11kprobes can probe (this means, all functions body except for __kprobes
12functions). Unlike the Tracepoint based event, this can be added and removed
13dynamically, on the fly.
14
15To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y.
16
17Similar to the events tracer, this doesn't need to be activated via
18current_tracer. Instead of that, add probe points via
19/sys/kernel/debug/tracing/kprobe_events, and enable it via
20/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled.
21
22
23Synopsis of kprobe_events
24-------------------------
25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
27
28 GRP : Group name. If omitted, use "kprobes" for it.
29 EVENT : Event name. If omitted, the event name is generated
30 based on SYMBOL+offs or MEMADDR.
31 SYMBOL[+offs] : Symbol+offset where the probe is inserted.
32 MEMADDR : Address where the probe is inserted.
33
34 FETCHARGS : Arguments. Each probe can have up to 128 args.
35 %REG : Fetch register REG
36 @ADDR : Fetch memory at ADDR (ADDR should be in kernel)
37 @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
38 $stackN : Fetch Nth entry of stack (N >= 0)
39 $stack : Fetch stack address.
40 $argN : Fetch function argument. (N >= 0)(*)
41 $retval : Fetch return value.(**)
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
43 NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
44
45 (*) aN may not correct on asmlinkaged functions and at the middle of
46 function body.
47 (**) only for return probe.
48 (***) this is useful for fetching a field of data structures.
49
50
51Per-Probe Event Filtering
52-------------------------
53 Per-probe event filtering feature allows you to set different filter on each
54probe and gives you what arguments will be shown in trace buffer. If an event
55name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event
56under tracing/events/kprobes/<EVENT>, at the directory you can see 'id',
57'enabled', 'format' and 'filter'.
58
59enabled:
60 You can enable/disable the probe by writing 1 or 0 on it.
61
62format:
63 This shows the format of this probe event.
64
65filter:
66 You can write filtering rules of this event.
67
68id:
69 This shows the id of this probe event.
70
71
72Event Profiling
73---------------
74 You can check the total number of probe hits and probe miss-hits via
75/sys/kernel/debug/tracing/kprobe_profile.
76 The first column is event name, the second is the number of probe hits,
77the third is the number of probe miss-hits.
78
79
80Usage examples
81--------------
82To add a probe as a new event, write a new definition to kprobe_events
83as below.
84
85 echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
86
87 This sets a kprobe on the top of do_sys_open() function with recording
881st to 4th arguments as "myprobe" event. As this example shows, users can
89choose more familiar names for each arguments.
90
91 echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
92
93 This sets a kretprobe on the return point of do_sys_open() function with
94recording return value as "myretprobe" event.
95 You can see the format of these events via
96/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
97
98 cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
99name: myprobe
100ID: 75
101format:
102 field:unsigned short common_type; offset:0; size:2;
103 field:unsigned char common_flags; offset:2; size:1;
104 field:unsigned char common_preempt_count; offset:3; size:1;
105 field:int common_pid; offset:4; size:4;
106 field:int common_tgid; offset:8; size:4;
107
108 field: unsigned long ip; offset:16;tsize:8;
109 field: int nargs; offset:24;tsize:4;
110 field: unsigned long dfd; offset:32;tsize:8;
111 field: unsigned long filename; offset:40;tsize:8;
112 field: unsigned long flags; offset:48;tsize:8;
113 field: unsigned long mode; offset:56;tsize:8;
114
115print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
116
117
118 You can see that the event has 4 arguments as in the expressions you specified.
119
120 echo > /sys/kernel/debug/tracing/kprobe_events
121
122 This clears all probe points.
123
124 Right after definition, each event is disabled by default. For tracing these
125events, you need to enable it.
126
127 echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
128 echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
129
130 And you can see the traced information via /sys/kernel/debug/tracing/trace.
131
132 cat /sys/kernel/debug/tracing/trace
133# tracer: nop
134#
135# TASK-PID CPU# TIMESTAMP FUNCTION
136# | | | | |
137 <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
138 <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe
139 <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
140 <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
141 <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
142 <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
143
144
145 Each line shows when the kernel hits an event, and <- SYMBOL means kernel
146returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
147returns from do_sys_open to sys_open+0x1b).
148
149
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
126config HAVE_DEFAULT_NO_SPIN_MUTEXES 126config HAVE_DEFAULT_NO_SPIN_MUTEXES
127 bool 127 bool
128 128
129config HAVE_HW_BREAKPOINT
130 bool
131 depends on HAVE_PERF_EVENTS
132 select ANON_INODES
133 select PERF_EVENTS
134
135
129source "kernel/gcov/Kconfig" 136source "kernel/gcov/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3b1005185390..bf3382f1904d 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
46 46
47config HCALL_STATS 47config HCALL_STATS
48 bool "Hypervisor call instrumentation" 48 bool "Hypervisor call instrumentation"
49 depends on PPC_PSERIES && DEBUG_FS 49 depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
50 help 50 help
51 Adds code to keep track of the number of hypervisor calls made and 51 Adds code to keep track of the number of hypervisor calls made and
52 the amount of time spent in hypervisor calls. Wall time spent in 52 the amount of time spent in hypervisor calls. Wall time spent in
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index f1889abb89b1..c568329723b8 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
1683CONFIG_DEBUG_STACKOVERFLOW=y 1683CONFIG_DEBUG_STACKOVERFLOW=y
1684# CONFIG_DEBUG_STACK_USAGE is not set 1684# CONFIG_DEBUG_STACK_USAGE is not set
1685# CONFIG_DEBUG_PAGEALLOC is not set 1685# CONFIG_DEBUG_PAGEALLOC is not set
1686CONFIG_HCALL_STATS=y 1686# CONFIG_HCALL_STATS is not set
1687# CONFIG_CODE_PATCHING_SELFTEST is not set 1687# CONFIG_CODE_PATCHING_SELFTEST is not set
1688# CONFIG_FTR_FIXUP_SELFTEST is not set 1688# CONFIG_FTR_FIXUP_SELFTEST is not set
1689# CONFIG_MSI_BITMAP_SELFTEST is not set 1689# CONFIG_MSI_BITMAP_SELFTEST is not set
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 9154e8526732..f0fb4fc1f6e6 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -19,6 +19,7 @@
19#define _ASM_POWERPC_EMULATED_OPS_H 19#define _ASM_POWERPC_EMULATED_OPS_H
20 20
21#include <asm/atomic.h> 21#include <asm/atomic.h>
22#include <linux/perf_event.h>
22 23
23 24
24#ifdef CONFIG_PPC_EMULATED_STATS 25#ifdef CONFIG_PPC_EMULATED_STATS
@@ -57,7 +58,7 @@ extern u32 ppc_warn_emulated;
57 58
58extern void ppc_warn_emulated_print(const char *type); 59extern void ppc_warn_emulated_print(const char *type);
59 60
60#define PPC_WARN_EMULATED(type) \ 61#define __PPC_WARN_EMULATED(type) \
61 do { \ 62 do { \
62 atomic_inc(&ppc_emulated.type.val); \ 63 atomic_inc(&ppc_emulated.type.val); \
63 if (ppc_warn_emulated) \ 64 if (ppc_warn_emulated) \
@@ -66,8 +67,22 @@ extern void ppc_warn_emulated_print(const char *type);
66 67
67#else /* !CONFIG_PPC_EMULATED_STATS */ 68#else /* !CONFIG_PPC_EMULATED_STATS */
68 69
69#define PPC_WARN_EMULATED(type) do { } while (0) 70#define __PPC_WARN_EMULATED(type) do { } while (0)
70 71
71#endif /* !CONFIG_PPC_EMULATED_STATS */ 72#endif /* !CONFIG_PPC_EMULATED_STATS */
72 73
74#define PPC_WARN_EMULATED(type, regs) \
75 do { \
76 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
77 1, 0, regs, 0); \
78 __PPC_WARN_EMULATED(type); \
79 } while (0)
80
81#define PPC_WARN_ALIGNMENT(type, regs) \
82 do { \
83 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
84 1, 0, regs, regs->dar); \
85 __PPC_WARN_EMULATED(type); \
86 } while (0)
87
73#endif /* _ASM_POWERPC_EMULATED_OPS_H */ 88#endif /* _ASM_POWERPC_EMULATED_OPS_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 6251a4b10be7..c27caac47ad1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -274,6 +274,8 @@ struct hcall_stats {
274 unsigned long num_calls; /* number of calls (on this CPU) */ 274 unsigned long num_calls; /* number of calls (on this CPU) */
275 unsigned long tb_total; /* total wall time (mftb) of calls. */ 275 unsigned long tb_total; /* total wall time (mftb) of calls. */
276 unsigned long purr_total; /* total cpu time (PURR) of calls. */ 276 unsigned long purr_total; /* total cpu time (PURR) of calls. */
277 unsigned long tb_start;
278 unsigned long purr_start;
277}; 279};
278#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) 280#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
279 281
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 6315edc205d8..bc8dd53f718a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -489,6 +489,8 @@
489#define SPRN_MMCR1 798 489#define SPRN_MMCR1 798
490#define SPRN_MMCRA 0x312 490#define SPRN_MMCRA 0x312
491#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ 491#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
492#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
493#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
492#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ 494#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
493#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ 495#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
494#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ 496#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
new file mode 100644
index 000000000000..cbe2297d68b6
--- /dev/null
+++ b/arch/powerpc/include/asm/trace.h
@@ -0,0 +1,133 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM powerpc
3
4#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_POWERPC_H
6
7#include <linux/tracepoint.h>
8
9struct pt_regs;
10
11TRACE_EVENT(irq_entry,
12
13 TP_PROTO(struct pt_regs *regs),
14
15 TP_ARGS(regs),
16
17 TP_STRUCT__entry(
18 __field(struct pt_regs *, regs)
19 ),
20
21 TP_fast_assign(
22 __entry->regs = regs;
23 ),
24
25 TP_printk("pt_regs=%p", __entry->regs)
26);
27
28TRACE_EVENT(irq_exit,
29
30 TP_PROTO(struct pt_regs *regs),
31
32 TP_ARGS(regs),
33
34 TP_STRUCT__entry(
35 __field(struct pt_regs *, regs)
36 ),
37
38 TP_fast_assign(
39 __entry->regs = regs;
40 ),
41
42 TP_printk("pt_regs=%p", __entry->regs)
43);
44
45TRACE_EVENT(timer_interrupt_entry,
46
47 TP_PROTO(struct pt_regs *regs),
48
49 TP_ARGS(regs),
50
51 TP_STRUCT__entry(
52 __field(struct pt_regs *, regs)
53 ),
54
55 TP_fast_assign(
56 __entry->regs = regs;
57 ),
58
59 TP_printk("pt_regs=%p", __entry->regs)
60);
61
62TRACE_EVENT(timer_interrupt_exit,
63
64 TP_PROTO(struct pt_regs *regs),
65
66 TP_ARGS(regs),
67
68 TP_STRUCT__entry(
69 __field(struct pt_regs *, regs)
70 ),
71
72 TP_fast_assign(
73 __entry->regs = regs;
74 ),
75
76 TP_printk("pt_regs=%p", __entry->regs)
77);
78
79#ifdef CONFIG_PPC_PSERIES
80extern void hcall_tracepoint_regfunc(void);
81extern void hcall_tracepoint_unregfunc(void);
82
83TRACE_EVENT_FN(hcall_entry,
84
85 TP_PROTO(unsigned long opcode, unsigned long *args),
86
87 TP_ARGS(opcode, args),
88
89 TP_STRUCT__entry(
90 __field(unsigned long, opcode)
91 ),
92
93 TP_fast_assign(
94 __entry->opcode = opcode;
95 ),
96
97 TP_printk("opcode=%lu", __entry->opcode),
98
99 hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
100);
101
102TRACE_EVENT_FN(hcall_exit,
103
104 TP_PROTO(unsigned long opcode, unsigned long retval,
105 unsigned long *retbuf),
106
107 TP_ARGS(opcode, retval, retbuf),
108
109 TP_STRUCT__entry(
110 __field(unsigned long, opcode)
111 __field(unsigned long, retval)
112 ),
113
114 TP_fast_assign(
115 __entry->opcode = opcode;
116 __entry->retval = retval;
117 ),
118
119 TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
120
121 hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
122);
123#endif
124
125#endif /* _TRACE_POWERPC_H */
126
127#undef TRACE_INCLUDE_PATH
128#undef TRACE_INCLUDE_FILE
129
130#define TRACE_INCLUDE_PATH asm
131#define TRACE_INCLUDE_FILE trace
132
133#include <trace/define_trace.h>
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index a5b632e52fae..3839839f83c7 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
732 732
733#ifdef CONFIG_SPE 733#ifdef CONFIG_SPE
734 if ((instr >> 26) == 0x4) { 734 if ((instr >> 26) == 0x4) {
735 PPC_WARN_EMULATED(spe); 735 PPC_WARN_ALIGNMENT(spe, regs);
736 return emulate_spe(regs, reg, instr); 736 return emulate_spe(regs, reg, instr);
737 } 737 }
738#endif 738#endif
@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
786 flags |= SPLT; 786 flags |= SPLT;
787 nb = 8; 787 nb = 8;
788 } 788 }
789 PPC_WARN_EMULATED(vsx); 789 PPC_WARN_ALIGNMENT(vsx, regs);
790 return emulate_vsx(addr, reg, areg, regs, flags, nb); 790 return emulate_vsx(addr, reg, areg, regs, flags, nb);
791 } 791 }
792#endif 792#endif
@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
794 * the exception of DCBZ which is handled as a special case here 794 * the exception of DCBZ which is handled as a special case here
795 */ 795 */
796 if (instr == DCBZ) { 796 if (instr == DCBZ) {
797 PPC_WARN_EMULATED(dcbz); 797 PPC_WARN_ALIGNMENT(dcbz, regs);
798 return emulate_dcbz(regs, addr); 798 return emulate_dcbz(regs, addr);
799 } 799 }
800 if (unlikely(nb == 0)) 800 if (unlikely(nb == 0))
@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
804 * function 804 * function
805 */ 805 */
806 if (flags & M) { 806 if (flags & M) {
807 PPC_WARN_EMULATED(multiple); 807 PPC_WARN_ALIGNMENT(multiple, regs);
808 return emulate_multiple(regs, addr, reg, nb, 808 return emulate_multiple(regs, addr, reg, nb,
809 flags, instr, swiz); 809 flags, instr, swiz);
810 } 810 }
@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
825 825
826 /* Special case for 16-byte FP loads and stores */ 826 /* Special case for 16-byte FP loads and stores */
827 if (nb == 16) { 827 if (nb == 16) {
828 PPC_WARN_EMULATED(fp_pair); 828 PPC_WARN_ALIGNMENT(fp_pair, regs);
829 return emulate_fp_pair(addr, reg, flags); 829 return emulate_fp_pair(addr, reg, flags);
830 } 830 }
831 831
832 PPC_WARN_EMULATED(unaligned); 832 PPC_WARN_ALIGNMENT(unaligned, regs);
833 833
834 /* If we are loading, get the data from user space, else 834 /* If we are loading, get the data from user space, else
835 * get it from register values 835 * get it from register values
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 9763267e38b4..bdcb557d470a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -551,7 +551,7 @@ restore:
551BEGIN_FW_FTR_SECTION 551BEGIN_FW_FTR_SECTION
552 ld r5,SOFTE(r1) 552 ld r5,SOFTE(r1)
553FW_FTR_SECTION_ELSE 553FW_FTR_SECTION_ELSE
554 b iseries_check_pending_irqs 554 b .Liseries_check_pending_irqs
555ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 555ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
5562: 5562:
557 TRACE_AND_RESTORE_IRQ(r5); 557 TRACE_AND_RESTORE_IRQ(r5);
@@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
623 623
624#endif /* CONFIG_PPC_BOOK3E */ 624#endif /* CONFIG_PPC_BOOK3E */
625 625
626iseries_check_pending_irqs: 626.Liseries_check_pending_irqs:
627#ifdef CONFIG_PPC_ISERIES 627#ifdef CONFIG_PPC_ISERIES
628 ld r5,SOFTE(r1) 628 ld r5,SOFTE(r1)
629 cmpdi 0,r5,0 629 cmpdi 0,r5,0
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1808876edcc9..c7eb4e0eb86c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
185 * prolog code of the PerformanceMonitor one. A little 185 * prolog code of the PerformanceMonitor one. A little
186 * trickery is thus necessary 186 * trickery is thus necessary
187 */ 187 */
188performance_monitor_pSeries_1:
188 . = 0xf00 189 . = 0xf00
189 b performance_monitor_pSeries 190 b performance_monitor_pSeries
190 191
192altivec_unavailable_pSeries_1:
191 . = 0xf20 193 . = 0xf20
192 b altivec_unavailable_pSeries 194 b altivec_unavailable_pSeries
193 195
196vsx_unavailable_pSeries_1:
194 . = 0xf40 197 . = 0xf40
195 b vsx_unavailable_pSeries 198 b vsx_unavailable_pSeries
196 199
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index e5d121177984..02a334662cc0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -70,6 +70,8 @@
70#include <asm/firmware.h> 70#include <asm/firmware.h>
71#include <asm/lv1call.h> 71#include <asm/lv1call.h>
72#endif 72#endif
73#define CREATE_TRACE_POINTS
74#include <asm/trace.h>
73 75
74int __irq_offset_value; 76int __irq_offset_value;
75static int ppc_spurious_interrupts; 77static int ppc_spurious_interrupts;
@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
325 struct pt_regs *old_regs = set_irq_regs(regs); 327 struct pt_regs *old_regs = set_irq_regs(regs);
326 unsigned int irq; 328 unsigned int irq;
327 329
330 trace_irq_entry(regs);
331
328 irq_enter(); 332 irq_enter();
329 333
330 check_stack_overflow(); 334 check_stack_overflow();
@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
348 timer_interrupt(regs); 352 timer_interrupt(regs);
349 } 353 }
350#endif 354#endif
355
356 trace_irq_exit(regs);
351} 357}
352 358
353void __init init_IRQ(void) 359void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 87f1663584b0..1eb85fbf53a5 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1165,7 +1165,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1165 */ 1165 */
1166 if (record) { 1166 if (record) {
1167 struct perf_sample_data data = { 1167 struct perf_sample_data data = {
1168 .addr = 0, 1168 .addr = ~0ULL,
1169 .period = event->hw.last_period, 1169 .period = event->hw.last_period,
1170 }; 1170 };
1171 1171
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 0f4c1c73a6ad..199de527d411 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -73,10 +73,6 @@
73#define MMCR1_PMCSEL_MSK 0x7f 73#define MMCR1_PMCSEL_MSK 0x7f
74 74
75/* 75/*
76 * Bits in MMCRA
77 */
78
79/*
80 * Layout of constraint bits: 76 * Layout of constraint bits:
81 * 6666555555555544444444443333333333222222222211111111110000000000 77 * 6666555555555544444444443333333333222222222211111111110000000000
82 * 3210987654321098765432109876543210987654321098765432109876543210 78 * 3210987654321098765432109876543210987654321098765432109876543210
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index c351b3a57fbb..98b6a729a9dd 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -73,10 +73,6 @@
73#define MMCR1_PMCSEL_MSK 0x7f 73#define MMCR1_PMCSEL_MSK 0x7f
74 74
75/* 75/*
76 * Bits in MMCRA
77 */
78
79/*
80 * Layout of constraint bits: 76 * Layout of constraint bits:
81 * 6666555555555544444444443333333333222222222211111111110000000000 77 * 6666555555555544444444443333333333222222222211111111110000000000
82 * 3210987654321098765432109876543210987654321098765432109876543210 78 * 3210987654321098765432109876543210987654321098765432109876543210
@@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
390 unsigned int hwc[], unsigned long mmcr[]) 386 unsigned int hwc[], unsigned long mmcr[])
391{ 387{
392 unsigned long mmcr1 = 0; 388 unsigned long mmcr1 = 0;
393 unsigned long mmcra = 0; 389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
394 unsigned int pmc, unit, byte, psel; 390 unsigned int pmc, unit, byte, psel;
395 unsigned int ttm, grp; 391 unsigned int ttm, grp;
396 int i, isbus, bit, grsel; 392 int i, isbus, bit, grsel;
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index ca399ba5034c..84a607bda8fb 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
178 unsigned int hwc[], unsigned long mmcr[]) 178 unsigned int hwc[], unsigned long mmcr[])
179{ 179{
180 unsigned long mmcr1 = 0; 180 unsigned long mmcr1 = 0;
181 unsigned long mmcra = 0; 181 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
182 int i; 182 int i;
183 unsigned int pmc, ev, b, u, s, psel; 183 unsigned int pmc, ev, b, u, s, psel;
184 unsigned int ttmset = 0; 184 unsigned int ttmset = 0;
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 28a4daacdc02..852f7b7f6b40 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -51,10 +51,6 @@
51#define MMCR1_PMCSEL_MSK 0xff 51#define MMCR1_PMCSEL_MSK 0xff
52 52
53/* 53/*
54 * Bits in MMCRA
55 */
56
57/*
58 * Layout of constraint bits: 54 * Layout of constraint bits:
59 * 6666555555555544444444443333333333222222222211111111110000000000 55 * 6666555555555544444444443333333333222222222211111111110000000000
60 * 3210987654321098765432109876543210987654321098765432109876543210 56 * 3210987654321098765432109876543210987654321098765432109876543210
@@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
230 unsigned int hwc[], unsigned long mmcr[]) 226 unsigned int hwc[], unsigned long mmcr[])
231{ 227{
232 unsigned long mmcr1 = 0; 228 unsigned long mmcr1 = 0;
233 unsigned long mmcra = 0; 229 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
234 unsigned int pmc, unit, combine, l2sel, psel; 230 unsigned int pmc, unit, combine, l2sel, psel;
235 unsigned int pmc_inuse = 0; 231 unsigned int pmc_inuse = 0;
236 int i; 232 int i;
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 479574413a93..8eff48e20dba 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = {
84}; 84};
85 85
86/* 86/*
87 * Bits in MMCRA
88 */
89
90/*
91 * Layout of constraint bits: 87 * Layout of constraint bits:
92 * 6666555555555544444444443333333333222222222211111111110000000000 88 * 6666555555555544444444443333333333222222222211111111110000000000
93 * 3210987654321098765432109876543210987654321098765432109876543210 89 * 3210987654321098765432109876543210987654321098765432109876543210
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4271f7a655a3..845c72ab7357 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
660 660
661#ifdef CONFIG_DEBUG_FS 661#ifdef CONFIG_DEBUG_FS
662struct dentry *powerpc_debugfs_root; 662struct dentry *powerpc_debugfs_root;
663EXPORT_SYMBOL(powerpc_debugfs_root);
663 664
664static int powerpc_debugfs_init(void) 665static int powerpc_debugfs_init(void)
665{ 666{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a136a11c490d..36707dec94d7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -54,6 +54,7 @@
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h> 55#include <linux/delay.h>
56#include <linux/perf_event.h> 56#include <linux/perf_event.h>
57#include <asm/trace.h>
57 58
58#include <asm/io.h> 59#include <asm/io.h>
59#include <asm/processor.h> 60#include <asm/processor.h>
@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs)
571 struct clock_event_device *evt = &decrementer->event; 572 struct clock_event_device *evt = &decrementer->event;
572 u64 now; 573 u64 now;
573 574
575 trace_timer_interrupt_entry(regs);
576
574 /* Ensure a positive value is written to the decrementer, or else 577 /* Ensure a positive value is written to the decrementer, or else
575 * some CPUs will continuue to take decrementer exceptions */ 578 * some CPUs will continuue to take decrementer exceptions */
576 set_dec(DECREMENTER_MAX); 579 set_dec(DECREMENTER_MAX);
@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs)
590 now = decrementer->next_tb - now; 593 now = decrementer->next_tb - now;
591 if (now <= DECREMENTER_MAX) 594 if (now <= DECREMENTER_MAX)
592 set_dec((int)now); 595 set_dec((int)now);
596 trace_timer_interrupt_exit(regs);
593 return; 597 return;
594 } 598 }
595 old_regs = set_irq_regs(regs); 599 old_regs = set_irq_regs(regs);
@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs)
620 624
621 irq_exit(); 625 irq_exit();
622 set_irq_regs(old_regs); 626 set_irq_regs(old_regs);
627
628 trace_timer_interrupt_exit(regs);
623} 629}
624 630
625void wakeup_decrementer(void) 631void wakeup_decrementer(void)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6f0ae1a9bfae..9d1f9354d6ca 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs)
759 759
760 /* Emulate the mfspr rD, PVR. */ 760 /* Emulate the mfspr rD, PVR. */
761 if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { 761 if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
762 PPC_WARN_EMULATED(mfpvr); 762 PPC_WARN_EMULATED(mfpvr, regs);
763 rd = (instword >> 21) & 0x1f; 763 rd = (instword >> 21) & 0x1f;
764 regs->gpr[rd] = mfspr(SPRN_PVR); 764 regs->gpr[rd] = mfspr(SPRN_PVR);
765 return 0; 765 return 0;
@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs)
767 767
768 /* Emulating the dcba insn is just a no-op. */ 768 /* Emulating the dcba insn is just a no-op. */
769 if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { 769 if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
770 PPC_WARN_EMULATED(dcba); 770 PPC_WARN_EMULATED(dcba, regs);
771 return 0; 771 return 0;
772 } 772 }
773 773
@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs)
776 int shift = (instword >> 21) & 0x1c; 776 int shift = (instword >> 21) & 0x1c;
777 unsigned long msk = 0xf0000000UL >> shift; 777 unsigned long msk = 0xf0000000UL >> shift;
778 778
779 PPC_WARN_EMULATED(mcrxr); 779 PPC_WARN_EMULATED(mcrxr, regs);
780 regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); 780 regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
781 regs->xer &= ~0xf0000000UL; 781 regs->xer &= ~0xf0000000UL;
782 return 0; 782 return 0;
@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs)
784 784
785 /* Emulate load/store string insn. */ 785 /* Emulate load/store string insn. */
786 if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { 786 if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
787 PPC_WARN_EMULATED(string); 787 PPC_WARN_EMULATED(string, regs);
788 return emulate_string_inst(regs, instword); 788 return emulate_string_inst(regs, instword);
789 } 789 }
790 790
791 /* Emulate the popcntb (Population Count Bytes) instruction. */ 791 /* Emulate the popcntb (Population Count Bytes) instruction. */
792 if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { 792 if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
793 PPC_WARN_EMULATED(popcntb); 793 PPC_WARN_EMULATED(popcntb, regs);
794 return emulate_popcntb_inst(regs, instword); 794 return emulate_popcntb_inst(regs, instword);
795 } 795 }
796 796
797 /* Emulate isel (Integer Select) instruction */ 797 /* Emulate isel (Integer Select) instruction */
798 if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { 798 if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
799 PPC_WARN_EMULATED(isel); 799 PPC_WARN_EMULATED(isel, regs);
800 return emulate_isel(regs, instword); 800 return emulate_isel(regs, instword);
801 } 801 }
802 802
@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs)
995#ifdef CONFIG_MATH_EMULATION 995#ifdef CONFIG_MATH_EMULATION
996 errcode = do_mathemu(regs); 996 errcode = do_mathemu(regs);
997 if (errcode >= 0) 997 if (errcode >= 0)
998 PPC_WARN_EMULATED(math); 998 PPC_WARN_EMULATED(math, regs);
999 999
1000 switch (errcode) { 1000 switch (errcode) {
1001 case 0: 1001 case 0:
@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs)
1018#elif defined(CONFIG_8XX_MINIMAL_FPEMU) 1018#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1019 errcode = Soft_emulate_8xx(regs); 1019 errcode = Soft_emulate_8xx(regs);
1020 if (errcode >= 0) 1020 if (errcode >= 0)
1021 PPC_WARN_EMULATED(8xx); 1021 PPC_WARN_EMULATED(8xx, regs);
1022 1022
1023 switch (errcode) { 1023 switch (errcode) {
1024 case 0: 1024 case 0:
@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
1129 1129
1130 flush_altivec_to_thread(current); 1130 flush_altivec_to_thread(current);
1131 1131
1132 PPC_WARN_EMULATED(altivec); 1132 PPC_WARN_EMULATED(altivec, regs);
1133 err = emulate_altivec(regs); 1133 err = emulate_altivec(regs);
1134 if (err == 0) { 1134 if (err == 0) {
1135 regs->nip += 4; /* skip emulated instruction */ 1135 regs->nip += 4; /* skip emulated instruction */
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 75f3267fdc30..e68beac0a171 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -26,11 +26,11 @@ BEGIN_FTR_SECTION
26 srd r8,r5,r11 26 srd r8,r5,r11
27 27
28 mtctr r8 28 mtctr r8
29setup: 29.Lsetup:
30 dcbt r9,r4 30 dcbt r9,r4
31 dcbz r9,r3 31 dcbz r9,r3
32 add r9,r9,r12 32 add r9,r9,r12
33 bdnz setup 33 bdnz .Lsetup
34END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) 34END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
35 addi r3,r3,-8 35 addi r3,r3,-8
36 srdi r8,r5,7 /* page is copied in 128 byte strides */ 36 srdi r8,r5,7 /* page is copied in 128 byte strides */
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..383a5d0e9818 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,68 +14,94 @@
14 14
15#define STK_PARM(i) (48 + ((i)-3)*8) 15#define STK_PARM(i) (48 + ((i)-3)*8)
16 16
17#ifdef CONFIG_HCALL_STATS 17#ifdef CONFIG_TRACEPOINTS
18
19 .section ".toc","aw"
20
21 .globl hcall_tracepoint_refcount
22hcall_tracepoint_refcount:
23 .llong 0
24
25 .section ".text"
26
18/* 27/*
19 * precall must preserve all registers. use unused STK_PARM() 28 * precall must preserve all registers. use unused STK_PARM()
20 * areas to save snapshots and opcode. 29 * areas to save snapshots and opcode. We branch around this
30 * in early init (eg when populating the MMU hashtable) by using an
31 * unconditional cpu feature.
21 */ 32 */
22#define HCALL_INST_PRECALL \ 33#define HCALL_INST_PRECALL(FIRST_REG) \
23 std r3,STK_PARM(r3)(r1); /* save opcode */ \
24 mftb r0; /* get timebase and */ \
25 std r0,STK_PARM(r5)(r1); /* save for later */ \
26BEGIN_FTR_SECTION; \ 34BEGIN_FTR_SECTION; \
27 mfspr r0,SPRN_PURR; /* get PURR and */ \ 35 b 1f; \
28 std r0,STK_PARM(r6)(r1); /* save for later */ \ 36END_FTR_SECTION(0, 1); \
29END_FTR_SECTION_IFSET(CPU_FTR_PURR); 37 ld r12,hcall_tracepoint_refcount@toc(r2); \
30 38 cmpdi r12,0; \
39 beq+ 1f; \
40 mflr r0; \
41 std r3,STK_PARM(r3)(r1); \
42 std r4,STK_PARM(r4)(r1); \
43 std r5,STK_PARM(r5)(r1); \
44 std r6,STK_PARM(r6)(r1); \
45 std r7,STK_PARM(r7)(r1); \
46 std r8,STK_PARM(r8)(r1); \
47 std r9,STK_PARM(r9)(r1); \
48 std r10,STK_PARM(r10)(r1); \
49 std r0,16(r1); \
50 addi r4,r1,STK_PARM(FIRST_REG); \
51 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
52 bl .__trace_hcall_entry; \
53 addi r1,r1,STACK_FRAME_OVERHEAD; \
54 ld r0,16(r1); \
55 ld r3,STK_PARM(r3)(r1); \
56 ld r4,STK_PARM(r4)(r1); \
57 ld r5,STK_PARM(r5)(r1); \
58 ld r6,STK_PARM(r6)(r1); \
59 ld r7,STK_PARM(r7)(r1); \
60 ld r8,STK_PARM(r8)(r1); \
61 ld r9,STK_PARM(r9)(r1); \
62 ld r10,STK_PARM(r10)(r1); \
63 mtlr r0; \
641:
65
31/* 66/*
32 * postcall is performed immediately before function return which 67 * postcall is performed immediately before function return which
33 * allows liberal use of volatile registers. We branch around this 68 * allows liberal use of volatile registers. We branch around this
34 * in early init (eg when populating the MMU hashtable) by using an 69 * in early init (eg when populating the MMU hashtable) by using an
35 * unconditional cpu feature. 70 * unconditional cpu feature.
36 */ 71 */
37#define HCALL_INST_POSTCALL \ 72#define __HCALL_INST_POSTCALL \
38BEGIN_FTR_SECTION; \ 73BEGIN_FTR_SECTION; \
39 b 1f; \ 74 b 1f; \
40END_FTR_SECTION(0, 1); \ 75END_FTR_SECTION(0, 1); \
41 ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ 76 ld r12,hcall_tracepoint_refcount@toc(r2); \
42 cmpldi cr7,r4,MAX_HCALL_OPCODE; \ 77 cmpdi r12,0; \
43 bgt- cr7,1f; \ 78 beq+ 1f; \
44 \ 79 mflr r0; \
45 /* get time and PURR snapshots after hcall */ \ 80 ld r6,STK_PARM(r3)(r1); \
46 mftb r7; /* timebase after */ \ 81 std r3,STK_PARM(r3)(r1); \
47BEGIN_FTR_SECTION; \ 82 mr r4,r3; \
48 mfspr r8,SPRN_PURR; /* PURR after */ \ 83 mr r3,r6; \
49 ld r6,STK_PARM(r6)(r1); /* PURR before */ \ 84 std r0,16(r1); \
50 subf r6,r6,r8; /* delta */ \ 85 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 86 bl .__trace_hcall_exit; \
52 ld r5,STK_PARM(r5)(r1); /* timebase before */ \ 87 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 subf r5,r5,r7; /* time delta */ \ 88 ld r0,16(r1); \
54 \ 89 ld r3,STK_PARM(r3)(r1); \
55 /* calculate address of stat structure r4 = opcode */ \ 90 mtlr r0; \
56 srdi r4,r4,2; /* index into array */ \
57 mulli r4,r4,HCALL_STAT_SIZE; \
58 LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
59 add r4,r4,r7; \
60 ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
61 add r4,r4,r7; \
62 \
63 /* update stats */ \
64 ld r7,HCALL_STAT_CALLS(r4); /* count */ \
65 addi r7,r7,1; \
66 std r7,HCALL_STAT_CALLS(r4); \
67 ld r7,HCALL_STAT_TB(r4); /* timebase */ \
68 add r7,r7,r5; \
69 std r7,HCALL_STAT_TB(r4); \
70BEGIN_FTR_SECTION; \
71 ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
72 add r7,r7,r6; \
73 std r7,HCALL_STAT_PURR(r4); \
74END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
751: 911:
92
93#define HCALL_INST_POSTCALL_NORETS \
94 li r5,0; \
95 __HCALL_INST_POSTCALL
96
97#define HCALL_INST_POSTCALL(BUFREG) \
98 mr r5,BUFREG; \
99 __HCALL_INST_POSTCALL
100
76#else 101#else
77#define HCALL_INST_PRECALL 102#define HCALL_INST_PRECALL(FIRST_ARG)
78#define HCALL_INST_POSTCALL 103#define HCALL_INST_POSTCALL_NORETS
104#define HCALL_INST_POSTCALL(BUFREG)
79#endif 105#endif
80 106
81 .text 107 .text
@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
86 mfcr r0 112 mfcr r0
87 stw r0,8(r1) 113 stw r0,8(r1)
88 114
89 HCALL_INST_PRECALL 115 HCALL_INST_PRECALL(r4)
90 116
91 HVSC /* invoke the hypervisor */ 117 HVSC /* invoke the hypervisor */
92 118
93 HCALL_INST_POSTCALL 119 HCALL_INST_POSTCALL_NORETS
94 120
95 lwz r0,8(r1) 121 lwz r0,8(r1)
96 mtcrf 0xff,r0 122 mtcrf 0xff,r0
@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
102 mfcr r0 128 mfcr r0
103 stw r0,8(r1) 129 stw r0,8(r1)
104 130
105 HCALL_INST_PRECALL 131 HCALL_INST_PRECALL(r5)
106 132
107 std r4,STK_PARM(r4)(r1) /* Save ret buffer */ 133 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
108 134
@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
121 std r6, 16(r12) 147 std r6, 16(r12)
122 std r7, 24(r12) 148 std r7, 24(r12)
123 149
124 HCALL_INST_POSTCALL 150 HCALL_INST_POSTCALL(r12)
125 151
126 lwz r0,8(r1) 152 lwz r0,8(r1)
127 mtcrf 0xff,r0 153 mtcrf 0xff,r0
@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
168 mfcr r0 194 mfcr r0
169 stw r0,8(r1) 195 stw r0,8(r1)
170 196
171 HCALL_INST_PRECALL 197 HCALL_INST_PRECALL(r5)
172 198
173 std r4,STK_PARM(r4)(r1) /* Save ret buffer */ 199 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
174 200
@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
196 std r11,56(r12) 222 std r11,56(r12)
197 std r0, 64(r12) 223 std r0, 64(r12)
198 224
199 HCALL_INST_POSTCALL 225 HCALL_INST_POSTCALL(r12)
200 226
201 lwz r0,8(r1) 227 lwz r0,8(r1)
202 mtcrf 0xff,r0 228 mtcrf 0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f277eb..2f58c71b7259 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/firmware.h> 27#include <asm/firmware.h>
28#include <asm/cputable.h> 28#include <asm/cputable.h>
29#include <asm/trace.h>
29 30
30DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); 31DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
31 32
@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
100#define HCALL_ROOT_DIR "hcall_inst" 101#define HCALL_ROOT_DIR "hcall_inst"
101#define CPU_NAME_BUF_SIZE 32 102#define CPU_NAME_BUF_SIZE 32
102 103
104
105static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
106{
107 struct hcall_stats *h;
108
109 if (opcode > MAX_HCALL_OPCODE)
110 return;
111
112 h = &get_cpu_var(hcall_stats)[opcode / 4];
113 h->tb_start = mftb();
114 h->purr_start = mfspr(SPRN_PURR);
115}
116
117static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
118 unsigned long *retbuf)
119{
120 struct hcall_stats *h;
121
122 if (opcode > MAX_HCALL_OPCODE)
123 return;
124
125 h = &__get_cpu_var(hcall_stats)[opcode / 4];
126 h->num_calls++;
127 h->tb_total = mftb() - h->tb_start;
128 h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
129
130 put_cpu_var(hcall_stats);
131}
132
103static int __init hcall_inst_init(void) 133static int __init hcall_inst_init(void)
104{ 134{
105 struct dentry *hcall_root; 135 struct dentry *hcall_root;
@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
110 if (!firmware_has_feature(FW_FEATURE_LPAR)) 140 if (!firmware_has_feature(FW_FEATURE_LPAR))
111 return 0; 141 return 0;
112 142
143 if (register_trace_hcall_entry(probe_hcall_entry))
144 return -EINVAL;
145
146 if (register_trace_hcall_exit(probe_hcall_exit)) {
147 unregister_trace_hcall_entry(probe_hcall_entry);
148 return -EINVAL;
149 }
150
113 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); 151 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
114 if (!hcall_root) 152 if (!hcall_root)
115 return -ENOMEM; 153 return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 903eb9eec687..0707653612ba 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
39#include <asm/cputable.h> 39#include <asm/cputable.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/smp.h> 41#include <asm/smp.h>
42#include <asm/trace.h>
42 43
43#include "plpar_wrappers.h" 44#include "plpar_wrappers.h"
44#include "pseries.h" 45#include "pseries.h"
@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
661EXPORT_SYMBOL(arch_free_page); 662EXPORT_SYMBOL(arch_free_page);
662 663
663#endif 664#endif
665
666#ifdef CONFIG_TRACEPOINTS
667/*
668 * We optimise our hcall path by placing hcall_tracepoint_refcount
669 * directly in the TOC so we can check if the hcall tracepoints are
670 * enabled via a single load.
671 */
672
673/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
674extern long hcall_tracepoint_refcount;
675
676void hcall_tracepoint_regfunc(void)
677{
678 hcall_tracepoint_refcount++;
679}
680
681void hcall_tracepoint_unregfunc(void)
682{
683 hcall_tracepoint_refcount--;
684}
685
686void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
687{
688 trace_hcall_entry(opcode, args);
689}
690
691void __trace_hcall_exit(long opcode, unsigned long retval,
692 unsigned long *retbuf)
693{
694 trace_hcall_exit(opcode, retval, retbuf);
695}
696#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72ace9515a07..178084b4377c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
49 select HAVE_KERNEL_GZIP 49 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 50 select HAVE_KERNEL_BZIP2
51 select HAVE_KERNEL_LZMA 51 select HAVE_KERNEL_LZMA
52 select HAVE_HW_BREAKPOINT
52 select HAVE_ARCH_KMEMCHECK 53 select HAVE_ARCH_KMEMCHECK
53 54
54config OUTPUT_FORMAT 55config OUTPUT_FORMAT
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d105f29bb6bb..7d0b681a132b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,6 +186,15 @@ config X86_DS_SELFTEST
186config HAVE_MMIOTRACE_SUPPORT 186config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 187 def_bool y
188 188
189config X86_DECODER_SELFTEST
190 bool "x86 instruction decoder selftest"
191 depends on DEBUG_KERNEL
192 ---help---
193 Perform x86 instruction decoder selftests at build time.
194 This option is useful for checking the sanity of x86 instruction
195 decoder code.
196 If unsure, say "N".
197
189# 198#
190# IO delay types: 199# IO delay types:
191# 200#
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d2d24c9ee64d..78b32be55e9e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -155,6 +155,9 @@ all: bzImage
155KBUILD_IMAGE := $(boot)/bzImage 155KBUILD_IMAGE := $(boot)/bzImage
156 156
157bzImage: vmlinux 157bzImage: vmlinux
158ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
159 $(Q)$(MAKE) $(build)=arch/x86/tools posttest
160endif
158 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) 161 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
159 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot 162 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
160 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ 163 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
10header-y += sigcontext32.h 10header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h
13 14
14unifdef-y += e820.h 15unifdef-y += e820.h
15unifdef-y += ist.h 16unifdef-y += ist.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..7a15588e45d4 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/user.h> 18#include <linux/user.h>
19#include <linux/elfcore.h> 19#include <linux/elfcore.h>
20#include <asm/debugreg.h>
20 21
21/* 22/*
22 * fill in the user structure for an a.out core dump 23 * fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
32 >> PAGE_SHIFT; 33 >> PAGE_SHIFT;
33 dump->u_dsize -= dump->u_tsize; 34 dump->u_dsize -= dump->u_tsize;
34 dump->u_ssize = 0; 35 dump->u_ssize = 0;
35 dump->u_debugreg[0] = current->thread.debugreg0; 36 aout_dump_debugregs(dump);
36 dump->u_debugreg[1] = current->thread.debugreg1;
37 dump->u_debugreg[2] = current->thread.debugreg2;
38 dump->u_debugreg[3] = current->thread.debugreg3;
39 dump->u_debugreg[4] = 0;
40 dump->u_debugreg[5] = 0;
41 dump->u_debugreg[6] = current->thread.debugreg6;
42 dump->u_debugreg[7] = current->thread.debugreg7;
43 37
44 if (dump->start_stack < TASK_SIZE) 38 if (dump->start_stack < TASK_SIZE)
45 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) 39 dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..8240f76b531e 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
18#define DR_TRAP1 (0x2) /* db1 */ 18#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 19#define DR_TRAP2 (0x4) /* db2 */
20#define DR_TRAP3 (0x8) /* db3 */ 20#define DR_TRAP3 (0x8) /* db3 */
21#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
21 22
22#define DR_STEP (0x4000) /* single-step */ 23#define DR_STEP (0x4000) /* single-step */
23#define DR_SWITCH (0x8000) /* task switch */ 24#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
49 50
50#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ 51#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
51#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ 52#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
53#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
54#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
52#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ 55#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
53 56
54#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ 57#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,34 @@
67#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ 70#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
68#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ 71#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
69 72
73/*
74 * HW breakpoint additions
75 */
76#ifdef __KERNEL__
77
78DECLARE_PER_CPU(unsigned long, cpu_dr7);
79
80static inline void hw_breakpoint_disable(void)
81{
82 /* Zero the control register for HW Breakpoint */
83 set_debugreg(0UL, 7);
84
85 /* Zero-out the individual HW breakpoint address registers */
86 set_debugreg(0UL, 0);
87 set_debugreg(0UL, 1);
88 set_debugreg(0UL, 2);
89 set_debugreg(0UL, 3);
90}
91
92static inline int hw_breakpoint_active(void)
93{
94 return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
95}
96
97extern void aout_dump_debugregs(struct user *dump);
98
99extern void hw_breakpoint_restore(void);
100
101#endif /* __KERNEL__ */
102
70#endif /* _ASM_X86_DEBUGREG_H */ 103#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 82e3e8f01043..108eb6fd1ae7 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -20,11 +20,11 @@ typedef struct {
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
21 unsigned int irq_tlb_count; 21 unsigned int irq_tlb_count;
22#endif 22#endif
23#ifdef CONFIG_X86_MCE 23#ifdef CONFIG_X86_THERMAL_VECTOR
24 unsigned int irq_thermal_count; 24 unsigned int irq_thermal_count;
25# ifdef CONFIG_X86_MCE_THRESHOLD 25#endif
26#ifdef CONFIG_X86_MCE_THRESHOLD
26 unsigned int irq_threshold_count; 27 unsigned int irq_threshold_count;
27# endif
28#endif 28#endif
29} ____cacheline_aligned irq_cpustat_t; 29} ____cacheline_aligned irq_cpustat_t;
30 30
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..0675a7c4c20e
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
1#ifndef _I386_HW_BREAKPOINT_H
2#define _I386_HW_BREAKPOINT_H
3
4#ifdef __KERNEL__
5#define __ARCH_HW_BREAKPOINT_H
6
7/*
8 * The name should probably be something dealt in
9 * a higher level. While dealing with the user
10 * (display/resolving)
11 */
12struct arch_hw_breakpoint {
13 char *name; /* Contains name of the symbol to set bkpt */
14 unsigned long address;
15 u8 len;
16 u8 type;
17};
18
19#include <linux/kdebug.h>
20#include <linux/percpu.h>
21#include <linux/list.h>
22
23/* Available HW breakpoint length encodings */
24#define X86_BREAKPOINT_LEN_1 0x40
25#define X86_BREAKPOINT_LEN_2 0x44
26#define X86_BREAKPOINT_LEN_4 0x4c
27#define X86_BREAKPOINT_LEN_EXECUTE 0x40
28
29#ifdef CONFIG_X86_64
30#define X86_BREAKPOINT_LEN_8 0x48
31#endif
32
33/* Available HW breakpoint type encodings */
34
35/* trigger on instruction execute */
36#define X86_BREAKPOINT_EXECUTE 0x80
37/* trigger on memory write */
38#define X86_BREAKPOINT_WRITE 0x81
39/* trigger on memory read or write */
40#define X86_BREAKPOINT_RW 0x83
41
42/* Total number of available HW breakpoint registers */
43#define HBP_NUM 4
44
45struct perf_event;
46struct pmu;
47
48extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
49extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
50 struct task_struct *tsk);
51extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
52 unsigned long val, void *data);
53
54
55int arch_install_hw_breakpoint(struct perf_event *bp);
56void arch_uninstall_hw_breakpoint(struct perf_event *bp);
57void hw_breakpoint_pmu_read(struct perf_event *bp);
58void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
59
60extern void
61arch_fill_perf_breakpoint(struct perf_event *bp);
62
63unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
64int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
65
66extern int arch_bp_generic_fields(int x86_len, int x86_type,
67 int *gen_len, int *gen_type);
68
69extern struct pmu perf_ops_bp;
70
71#endif /* __KERNEL__ */
72#endif /* _I386_HW_BREAKPOINT_H */
73
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644
index 000000000000..205b063e3e32
--- /dev/null
+++ b/arch/x86/include/asm/inat.h
@@ -0,0 +1,220 @@
1#ifndef _ASM_X86_INAT_H
2#define _ASM_X86_INAT_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#include <asm/inat_types.h>
24
25/*
26 * Internal bits. Don't use bitmasks directly, because these bits are
27 * unstable. You should use checking functions.
28 */
29
30#define INAT_OPCODE_TABLE_SIZE 256
31#define INAT_GROUP_TABLE_SIZE 8
32
33/* Legacy last prefixes */
34#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
35#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
36#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
37/* Other Legacy prefixes */
38#define INAT_PFX_LOCK 4 /* 0xF0 */
39#define INAT_PFX_CS 5 /* 0x2E */
40#define INAT_PFX_DS 6 /* 0x3E */
41#define INAT_PFX_ES 7 /* 0x26 */
42#define INAT_PFX_FS 8 /* 0x64 */
43#define INAT_PFX_GS 9 /* 0x65 */
44#define INAT_PFX_SS 10 /* 0x36 */
45#define INAT_PFX_ADDRSZ 11 /* 0x67 */
46/* x86-64 REX prefix */
47#define INAT_PFX_REX 12 /* 0x4X */
48/* AVX VEX prefixes */
49#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
50#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
51
52#define INAT_LSTPFX_MAX 3
53#define INAT_LGCPFX_MAX 11
54
55/* Immediate size */
56#define INAT_IMM_BYTE 1
57#define INAT_IMM_WORD 2
58#define INAT_IMM_DWORD 3
59#define INAT_IMM_QWORD 4
60#define INAT_IMM_PTR 5
61#define INAT_IMM_VWORD32 6
62#define INAT_IMM_VWORD 7
63
64/* Legacy prefix */
65#define INAT_PFX_OFFS 0
66#define INAT_PFX_BITS 4
67#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
68#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
69/* Escape opcodes */
70#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
71#define INAT_ESC_BITS 2
72#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
73#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
74/* Group opcodes (1-16) */
75#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
76#define INAT_GRP_BITS 5
77#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
78#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
79/* Immediates */
80#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
81#define INAT_IMM_BITS 3
82#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
83/* Flags */
84#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
85#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
86#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
87#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
88#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
89#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
90#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
91#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
92/* Attribute making macros for attribute tables */
93#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
94#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
95#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
96#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
97
98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
101 insn_byte_t last_pfx,
102 insn_attr_t esc_attr);
103extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
104 insn_byte_t last_pfx,
105 insn_attr_t esc_attr);
106extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
107 insn_byte_t vex_m,
108 insn_byte_t vex_pp);
109
110/* Attribute checking functions */
111static inline int inat_is_legacy_prefix(insn_attr_t attr)
112{
113 attr &= INAT_PFX_MASK;
114 return attr && attr <= INAT_LGCPFX_MAX;
115}
116
117static inline int inat_is_address_size_prefix(insn_attr_t attr)
118{
119 return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
120}
121
122static inline int inat_is_operand_size_prefix(insn_attr_t attr)
123{
124 return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
125}
126
127static inline int inat_is_rex_prefix(insn_attr_t attr)
128{
129 return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
130}
131
132static inline int inat_last_prefix_id(insn_attr_t attr)
133{
134 if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
135 return 0;
136 else
137 return attr & INAT_PFX_MASK;
138}
139
140static inline int inat_is_vex_prefix(insn_attr_t attr)
141{
142 attr &= INAT_PFX_MASK;
143 return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
144}
145
146static inline int inat_is_vex3_prefix(insn_attr_t attr)
147{
148 return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
149}
150
151static inline int inat_is_escape(insn_attr_t attr)
152{
153 return attr & INAT_ESC_MASK;
154}
155
156static inline int inat_escape_id(insn_attr_t attr)
157{
158 return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
159}
160
161static inline int inat_is_group(insn_attr_t attr)
162{
163 return attr & INAT_GRP_MASK;
164}
165
166static inline int inat_group_id(insn_attr_t attr)
167{
168 return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
169}
170
171static inline int inat_group_common_attribute(insn_attr_t attr)
172{
173 return attr & ~INAT_GRP_MASK;
174}
175
176static inline int inat_has_immediate(insn_attr_t attr)
177{
178 return attr & INAT_IMM_MASK;
179}
180
181static inline int inat_immediate_size(insn_attr_t attr)
182{
183 return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
184}
185
186static inline int inat_has_modrm(insn_attr_t attr)
187{
188 return attr & INAT_MODRM;
189}
190
191static inline int inat_is_force64(insn_attr_t attr)
192{
193 return attr & INAT_FORCE64;
194}
195
196static inline int inat_has_second_immediate(insn_attr_t attr)
197{
198 return attr & INAT_SCNDIMM;
199}
200
201static inline int inat_has_moffset(insn_attr_t attr)
202{
203 return attr & INAT_MOFFSET;
204}
205
206static inline int inat_has_variant(insn_attr_t attr)
207{
208 return attr & INAT_VARIANT;
209}
210
211static inline int inat_accept_vex(insn_attr_t attr)
212{
213 return attr & INAT_VEXOK;
214}
215
216static inline int inat_must_vex(insn_attr_t attr)
217{
218 return attr & INAT_VEXONLY;
219}
220#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
1#ifndef _ASM_X86_INAT_TYPES_H
2#define _ASM_X86_INAT_TYPES_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23
24/* Instruction attributes */
25typedef unsigned int insn_attr_t;
26typedef unsigned char insn_byte_t;
27typedef signed int insn_value_t;
28
29#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644
index 000000000000..96c2e0ad04ca
--- /dev/null
+++ b/arch/x86/include/asm/insn.h
@@ -0,0 +1,184 @@
1#ifndef _ASM_X86_INSN_H
2#define _ASM_X86_INSN_H
3/*
4 * x86 instruction analysis
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2009
21 */
22
23/* insn_attr_t is defined in inat.h */
24#include <asm/inat.h>
25
26struct insn_field {
27 union {
28 insn_value_t value;
29 insn_byte_t bytes[4];
30 };
31 /* !0 if we've run insn_get_xxx() for this field */
32 unsigned char got;
33 unsigned char nbytes;
34};
35
36struct insn {
37 struct insn_field prefixes; /*
38 * Prefixes
39 * prefixes.bytes[3]: last prefix
40 */
41 struct insn_field rex_prefix; /* REX prefix */
42 struct insn_field vex_prefix; /* VEX prefix */
43 struct insn_field opcode; /*
44 * opcode.bytes[0]: opcode1
45 * opcode.bytes[1]: opcode2
46 * opcode.bytes[2]: opcode3
47 */
48 struct insn_field modrm;
49 struct insn_field sib;
50 struct insn_field displacement;
51 union {
52 struct insn_field immediate;
53 struct insn_field moffset1; /* for 64bit MOV */
54 struct insn_field immediate1; /* for 64bit imm or off16/32 */
55 };
56 union {
57 struct insn_field moffset2; /* for 64bit MOV */
58 struct insn_field immediate2; /* for 64bit imm or seg16 */
59 };
60
61 insn_attr_t attr;
62 unsigned char opnd_bytes;
63 unsigned char addr_bytes;
64 unsigned char length;
65 unsigned char x86_64;
66
67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
68 const insn_byte_t *next_byte;
69};
70
71#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
72#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
73#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
74
75#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
76#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
77#define X86_SIB_BASE(sib) ((sib) & 0x07)
78
79#define X86_REX_W(rex) ((rex) & 8)
80#define X86_REX_R(rex) ((rex) & 4)
81#define X86_REX_X(rex) ((rex) & 2)
82#define X86_REX_B(rex) ((rex) & 1)
83
84/* VEX bit flags */
85#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
86#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
87#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
88#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
89#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
90/* VEX bit fields */
91#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
92#define X86_VEX2_M 1 /* VEX2.M always 1 */
93#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
94#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
95#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
96
97/* The last prefix is needed for two-byte and three-byte opcodes */
98static inline insn_byte_t insn_last_prefix(struct insn *insn)
99{
100 return insn->prefixes.bytes[3];
101}
102
103extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
104extern void insn_get_prefixes(struct insn *insn);
105extern void insn_get_opcode(struct insn *insn);
106extern void insn_get_modrm(struct insn *insn);
107extern void insn_get_sib(struct insn *insn);
108extern void insn_get_displacement(struct insn *insn);
109extern void insn_get_immediate(struct insn *insn);
110extern void insn_get_length(struct insn *insn);
111
112/* Attribute will be determined after getting ModRM (for opcode groups) */
113static inline void insn_get_attribute(struct insn *insn)
114{
115 insn_get_modrm(insn);
116}
117
118/* Instruction uses RIP-relative addressing */
119extern int insn_rip_relative(struct insn *insn);
120
121/* Init insn for kernel text */
122static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
123{
124#ifdef CONFIG_X86_64
125 insn_init(insn, kaddr, 1);
126#else /* CONFIG_X86_32 */
127 insn_init(insn, kaddr, 0);
128#endif
129}
130
131static inline int insn_is_avx(struct insn *insn)
132{
133 if (!insn->prefixes.got)
134 insn_get_prefixes(insn);
135 return (insn->vex_prefix.value != 0);
136}
137
138static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
139{
140 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
141 return X86_VEX2_M;
142 else
143 return X86_VEX3_M(insn->vex_prefix.bytes[1]);
144}
145
146static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
147{
148 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
149 return X86_VEX_P(insn->vex_prefix.bytes[1]);
150 else
151 return X86_VEX_P(insn->vex_prefix.bytes[2]);
152}
153
154/* Offset of each field from kaddr */
155static inline int insn_offset_rex_prefix(struct insn *insn)
156{
157 return insn->prefixes.nbytes;
158}
159static inline int insn_offset_vex_prefix(struct insn *insn)
160{
161 return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
162}
163static inline int insn_offset_opcode(struct insn *insn)
164{
165 return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
166}
167static inline int insn_offset_modrm(struct insn *insn)
168{
169 return insn_offset_opcode(insn) + insn->opcode.nbytes;
170}
171static inline int insn_offset_sib(struct insn *insn)
172{
173 return insn_offset_modrm(insn) + insn->modrm.nbytes;
174}
175static inline int insn_offset_displacement(struct insn *insn)
176{
177 return insn_offset_sib(insn) + insn->sib.nbytes;
178}
179static inline int insn_offset_immediate(struct insn *insn)
180{
181 return insn_offset_displacement(insn) + insn->displacement.nbytes;
182}
183
184#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f1363b72364f..858baa061cfc 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@ struct mce_log {
108#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) 108#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
109#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) 109#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
110 110
111extern struct atomic_notifier_head x86_mce_decoder_chain;
112
111#ifdef __KERNEL__ 113#ifdef __KERNEL__
112 114
113#include <linux/percpu.h> 115#include <linux/percpu.h>
@@ -118,9 +120,11 @@ extern int mce_disabled;
118extern int mce_p5_enabled; 120extern int mce_p5_enabled;
119 121
120#ifdef CONFIG_X86_MCE 122#ifdef CONFIG_X86_MCE
121void mcheck_init(struct cpuinfo_x86 *c); 123int mcheck_init(void);
124void mcheck_cpu_init(struct cpuinfo_x86 *c);
122#else 125#else
123static inline void mcheck_init(struct cpuinfo_x86 *c) {} 126static inline int mcheck_init(void) { return 0; }
127static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
124#endif 128#endif
125 129
126#ifdef CONFIG_X86_ANCIENT_MCE 130#ifdef CONFIG_X86_ANCIENT_MCE
@@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
214 218
215void mce_log_therm_throt_event(__u64 status); 219void mce_log_therm_throt_event(__u64 status);
216 220
221#ifdef CONFIG_X86_THERMAL_VECTOR
222extern void mcheck_intel_therm_init(void);
223#else
224static inline void mcheck_intel_therm_init(void) { }
225#endif
226
217#endif /* __KERNEL__ */ 227#endif /* __KERNEL__ */
218#endif /* _ASM_X86_MCE_H */ 228#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index ad7ce3fd5065..8d9f8548a870 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -28,9 +28,20 @@
28 */ 28 */
29#define ARCH_PERFMON_EVENT_MASK 0xffff 29#define ARCH_PERFMON_EVENT_MASK 0xffff
30 30
31/*
32 * filter mask to validate fixed counter events.
33 * the following filters disqualify for fixed counters:
34 * - inv
35 * - edge
36 * - cnt-mask
37 * The other filters are supported by fixed counters.
38 * The any-thread option is supported starting with v3.
39 */
40#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000
41
31#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 42#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
32#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 43#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
33#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 44#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
34#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ 45#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
35 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) 46 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
36 47
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c9786480f0fe..6f8ec1c37e0a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
30#include <linux/math64.h> 30#include <linux/math64.h>
31#include <linux/init.h> 31#include <linux/init.h>
32 32
33#define HBP_NUM 4
33/* 34/*
34 * Default implementation of macro that returns current 35 * Default implementation of macro that returns current
35 * instruction pointer ("program counter"). 36 * instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
422extern void free_thread_xstate(struct task_struct *); 423extern void free_thread_xstate(struct task_struct *);
423extern struct kmem_cache *task_xstate_cachep; 424extern struct kmem_cache *task_xstate_cachep;
424 425
426struct perf_event;
427
425struct thread_struct { 428struct thread_struct {
426 /* Cached TLS descriptors: */ 429 /* Cached TLS descriptors: */
427 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 430 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
443 unsigned long fs; 446 unsigned long fs;
444#endif 447#endif
445 unsigned long gs; 448 unsigned long gs;
446 /* Hardware debugging registers: */ 449 /* Save middle states of ptrace breakpoints */
447 unsigned long debugreg0; 450 struct perf_event *ptrace_bps[HBP_NUM];
448 unsigned long debugreg1; 451 /* Debug status used for traps, single steps, etc... */
449 unsigned long debugreg2; 452 unsigned long debugreg6;
450 unsigned long debugreg3;
451 unsigned long debugreg6;
452 unsigned long debugreg7;
453 /* Fault info: */ 453 /* Fault info: */
454 unsigned long cr2; 454 unsigned long cr2;
455 unsigned long trap_no; 455 unsigned long trap_no;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 0f0d908349aa..3d11fd0f44c5 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -7,6 +7,7 @@
7 7
8#ifdef __KERNEL__ 8#ifdef __KERNEL__
9#include <asm/segment.h> 9#include <asm/segment.h>
10#include <asm/page_types.h>
10#endif 11#endif
11 12
12#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
@@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
216 return regs->sp; 217 return regs->sp;
217} 218}
218 219
220/* Query offset/name of register from its name/offset */
221extern int regs_query_register_offset(const char *name);
222extern const char *regs_query_register_name(unsigned int offset);
223#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss))
224
225/**
226 * regs_get_register() - get register value from its offset
227 * @regs: pt_regs from which register value is gotten.
228 * @offset: offset number of the register.
229 *
230 * regs_get_register returns the value of a register. The @offset is the
231 * offset of the register in struct pt_regs address which specified by @regs.
232 * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
233 */
234static inline unsigned long regs_get_register(struct pt_regs *regs,
235 unsigned int offset)
236{
237 if (unlikely(offset > MAX_REG_OFFSET))
238 return 0;
239 return *(unsigned long *)((unsigned long)regs + offset);
240}
241
242/**
243 * regs_within_kernel_stack() - check the address in the stack
244 * @regs: pt_regs which contains kernel stack pointer.
245 * @addr: address which is checked.
246 *
247 * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
248 * If @addr is within the kernel stack, it returns true. If not, returns false.
249 */
250static inline int regs_within_kernel_stack(struct pt_regs *regs,
251 unsigned long addr)
252{
253 return ((addr & ~(THREAD_SIZE - 1)) ==
254 (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
255}
256
257/**
258 * regs_get_kernel_stack_nth() - get Nth entry of the stack
259 * @regs: pt_regs which contains kernel stack pointer.
260 * @n: stack entry number.
261 *
262 * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
263 * is specified by @regs. If the @n th entry is NOT in the kernel stack,
264 * this returns 0.
265 */
266static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
267 unsigned int n)
268{
269 unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
270 addr += n;
271 if (regs_within_kernel_stack(regs, (unsigned long)addr))
272 return *addr;
273 else
274 return 0;
275}
276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
219/* 281/*
220 * These are defined as per linux/ptrace.h, which see. 282 * These are defined as per linux/ptrace.h, which see.
221 */ 283 */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 40obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
41obj-y += bootflag.o e820.o 41obj-y += bootflag.o e820.o
42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 42obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
43obj-y += alternative.o i8253.o pci-nommu.o 43obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
44obj-y += tsc.o io_delay.o rtc.o 44obj-y += tsc.o io_delay.o rtc.o
45 45
46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 46obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 68537e957a9b..1d2cb383410e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
6ifdef CONFIG_FUNCTION_TRACER 6ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg 7CFLAGS_REMOVE_common.o = -pg
8CFLAGS_REMOVE_perf_event.o = -pg
8endif 9endif
9 10
10# Make sure load_percpu_segment has no stackprotector 11# Make sure load_percpu_segment has no stackprotector
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b4a567..9053be5d95cd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
837 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 837 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
838 } 838 }
839 839
840#ifdef CONFIG_X86_MCE
841 /* Init Machine Check Exception if available. */ 840 /* Init Machine Check Exception if available. */
842 mcheck_init(c); 841 mcheck_cpu_init(c);
843#endif
844 842
845 select_idle_routine(c); 843 select_idle_routine(c);
846 844
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 721a77ca8115..0bcaa3875863 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@
46 46
47#include "mce-internal.h" 47#include "mce-internal.h"
48 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/mce.h>
51
49int mce_disabled __read_mostly; 52int mce_disabled __read_mostly;
50 53
51#define MISC_MCELOG_MINOR 227 54#define MISC_MCELOG_MINOR 227
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 88static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 89static int cpu_missing;
87 90
88static void default_decode_mce(struct mce *m) 91/*
92 * CPU/chipset specific EDAC code can register a notifier call here to print
93 * MCE errors in a human-readable form.
94 */
95ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
96EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
97
98static int default_decode_mce(struct notifier_block *nb, unsigned long val,
99 void *data)
89{ 100{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n"); 101 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); 102 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
103
104 return NOTIFY_STOP;
92} 105}
93 106
94/* 107static struct notifier_block mce_dec_nb = {
95 * CPU/chipset specific EDAC code can register a callback here to print 108 .notifier_call = default_decode_mce,
96 * MCE errors in a human-readable form: 109 .priority = -1,
97 */ 110};
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
100 111
101/* MCA banks polled by the period polling timer for corrected events */ 112/* MCA banks polled by the period polling timer for corrected events */
102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 113DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
141{ 152{
142 unsigned next, entry; 153 unsigned next, entry;
143 154
155 /* Emit the trace record: */
156 trace_mce_record(mce);
157
144 mce->finished = 0; 158 mce->finished = 0;
145 wmb(); 159 wmb();
146 for (;;) { 160 for (;;) {
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
204 218
205 /* 219 /*
206 * Print out human-readable details about the MCE error, 220 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that): 221 * (if the CPU has an implementation for that)
208 */ 222 */
209 x86_mce_decode_callback(m); 223 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
210} 224}
211 225
212static void print_mce_head(void) 226static void print_mce_head(void)
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
1122static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 1136static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
1123static DEFINE_PER_CPU(struct timer_list, mce_timer); 1137static DEFINE_PER_CPU(struct timer_list, mce_timer);
1124 1138
1125static void mcheck_timer(unsigned long data) 1139static void mce_start_timer(unsigned long data)
1126{ 1140{
1127 struct timer_list *t = &per_cpu(mce_timer, data); 1141 struct timer_list *t = &per_cpu(mce_timer, data);
1128 int *n; 1142 int *n;
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
1187} 1201}
1188EXPORT_SYMBOL_GPL(mce_notify_irq); 1202EXPORT_SYMBOL_GPL(mce_notify_irq);
1189 1203
1190static int mce_banks_init(void) 1204static int __cpuinit __mcheck_cpu_mce_banks_init(void)
1191{ 1205{
1192 int i; 1206 int i;
1193 1207
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
1206/* 1220/*
1207 * Initialize Machine Checks for a CPU. 1221 * Initialize Machine Checks for a CPU.
1208 */ 1222 */
1209static int __cpuinit mce_cap_init(void) 1223static int __cpuinit __mcheck_cpu_cap_init(void)
1210{ 1224{
1211 unsigned b; 1225 unsigned b;
1212 u64 cap; 1226 u64 cap;
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
1228 WARN_ON(banks != 0 && b != banks); 1242 WARN_ON(banks != 0 && b != banks);
1229 banks = b; 1243 banks = b;
1230 if (!mce_banks) { 1244 if (!mce_banks) {
1231 int err = mce_banks_init(); 1245 int err = __mcheck_cpu_mce_banks_init();
1232 1246
1233 if (err) 1247 if (err)
1234 return err; 1248 return err;
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
1244 return 0; 1258 return 0;
1245} 1259}
1246 1260
1247static void mce_init(void) 1261static void __mcheck_cpu_init_generic(void)
1248{ 1262{
1249 mce_banks_t all_banks; 1263 mce_banks_t all_banks;
1250 u64 cap; 1264 u64 cap;
@@ -1273,7 +1287,7 @@ static void mce_init(void)
1273} 1287}
1274 1288
1275/* Add per CPU specific workarounds here */ 1289/* Add per CPU specific workarounds here */
1276static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 1290static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1277{ 1291{
1278 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1292 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1279 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1293 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1341 return 0; 1355 return 0;
1342} 1356}
1343 1357
1344static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1358static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1345{ 1359{
1346 if (c->x86 != 5) 1360 if (c->x86 != 5)
1347 return; 1361 return;
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
1355 } 1369 }
1356} 1370}
1357 1371
1358static void mce_cpu_features(struct cpuinfo_x86 *c) 1372static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1359{ 1373{
1360 switch (c->x86_vendor) { 1374 switch (c->x86_vendor) {
1361 case X86_VENDOR_INTEL: 1375 case X86_VENDOR_INTEL:
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
1369 } 1383 }
1370} 1384}
1371 1385
1372static void mce_init_timer(void) 1386static void __mcheck_cpu_init_timer(void)
1373{ 1387{
1374 struct timer_list *t = &__get_cpu_var(mce_timer); 1388 struct timer_list *t = &__get_cpu_var(mce_timer);
1375 int *n = &__get_cpu_var(mce_next_interval); 1389 int *n = &__get_cpu_var(mce_next_interval);
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
1380 *n = check_interval * HZ; 1394 *n = check_interval * HZ;
1381 if (!*n) 1395 if (!*n)
1382 return; 1396 return;
1383 setup_timer(t, mcheck_timer, smp_processor_id()); 1397 setup_timer(t, mce_start_timer, smp_processor_id());
1384 t->expires = round_jiffies(jiffies + *n); 1398 t->expires = round_jiffies(jiffies + *n);
1385 add_timer_on(t, smp_processor_id()); 1399 add_timer_on(t, smp_processor_id());
1386} 1400}
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
1400 * Called for each booted CPU to set up machine checks. 1414 * Called for each booted CPU to set up machine checks.
1401 * Must be called with preempt off: 1415 * Must be called with preempt off:
1402 */ 1416 */
1403void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 1417void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1404{ 1418{
1405 if (mce_disabled) 1419 if (mce_disabled)
1406 return; 1420 return;
1407 1421
1408 mce_ancient_init(c); 1422 __mcheck_cpu_ancient_init(c);
1409 1423
1410 if (!mce_available(c)) 1424 if (!mce_available(c))
1411 return; 1425 return;
1412 1426
1413 if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { 1427 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
1414 mce_disabled = 1; 1428 mce_disabled = 1;
1415 return; 1429 return;
1416 } 1430 }
1417 1431
1418 machine_check_vector = do_machine_check; 1432 machine_check_vector = do_machine_check;
1419 1433
1420 mce_init(); 1434 __mcheck_cpu_init_generic();
1421 mce_cpu_features(c); 1435 __mcheck_cpu_init_vendor(c);
1422 mce_init_timer(); 1436 __mcheck_cpu_init_timer();
1423 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); 1437 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
1438
1424} 1439}
1425 1440
1426/* 1441/*
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
1640} 1655}
1641__setup("mce", mcheck_enable); 1656__setup("mce", mcheck_enable);
1642 1657
1658int __init mcheck_init(void)
1659{
1660 atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
1661
1662 mcheck_intel_therm_init();
1663
1664 return 0;
1665}
1666
1643/* 1667/*
1644 * Sysfs support 1668 * Sysfs support
1645 */ 1669 */
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
1648 * Disable machine checks on suspend and shutdown. We can't really handle 1672 * Disable machine checks on suspend and shutdown. We can't really handle
1649 * them later. 1673 * them later.
1650 */ 1674 */
1651static int mce_disable(void) 1675static int mce_disable_error_reporting(void)
1652{ 1676{
1653 int i; 1677 int i;
1654 1678
@@ -1663,12 +1687,12 @@ static int mce_disable(void)
1663 1687
1664static int mce_suspend(struct sys_device *dev, pm_message_t state) 1688static int mce_suspend(struct sys_device *dev, pm_message_t state)
1665{ 1689{
1666 return mce_disable(); 1690 return mce_disable_error_reporting();
1667} 1691}
1668 1692
1669static int mce_shutdown(struct sys_device *dev) 1693static int mce_shutdown(struct sys_device *dev)
1670{ 1694{
1671 return mce_disable(); 1695 return mce_disable_error_reporting();
1672} 1696}
1673 1697
1674/* 1698/*
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
1678 */ 1702 */
1679static int mce_resume(struct sys_device *dev) 1703static int mce_resume(struct sys_device *dev)
1680{ 1704{
1681 mce_init(); 1705 __mcheck_cpu_init_generic();
1682 mce_cpu_features(&current_cpu_data); 1706 __mcheck_cpu_init_vendor(&current_cpu_data);
1683 1707
1684 return 0; 1708 return 0;
1685} 1709}
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
1689 del_timer_sync(&__get_cpu_var(mce_timer)); 1713 del_timer_sync(&__get_cpu_var(mce_timer));
1690 if (!mce_available(&current_cpu_data)) 1714 if (!mce_available(&current_cpu_data))
1691 return; 1715 return;
1692 mce_init(); 1716 __mcheck_cpu_init_generic();
1693 mce_init_timer(); 1717 __mcheck_cpu_init_timer();
1694} 1718}
1695 1719
1696/* Reinit MCEs after user configuration changes */ 1720/* Reinit MCEs after user configuration changes */
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
1716 cmci_reenable(); 1740 cmci_reenable();
1717 cmci_recheck(); 1741 cmci_recheck();
1718 if (all) 1742 if (all)
1719 mce_init_timer(); 1743 __mcheck_cpu_init_timer();
1720} 1744}
1721 1745
1722static struct sysdev_class mce_sysclass = { 1746static struct sysdev_class mce_sysclass = {
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1929} 1953}
1930 1954
1931/* Make sure there are no machine checks on offlined CPUs. */ 1955/* Make sure there are no machine checks on offlined CPUs. */
1932static void mce_disable_cpu(void *h) 1956static void __cpuinit mce_disable_cpu(void *h)
1933{ 1957{
1934 unsigned long action = *(unsigned long *)h; 1958 unsigned long action = *(unsigned long *)h;
1935 int i; 1959 int i;
1936 1960
1937 if (!mce_available(&current_cpu_data)) 1961 if (!mce_available(&current_cpu_data))
1938 return; 1962 return;
1963
1939 if (!(action & CPU_TASKS_FROZEN)) 1964 if (!(action & CPU_TASKS_FROZEN))
1940 cmci_clear(); 1965 cmci_clear();
1941 for (i = 0; i < banks; i++) { 1966 for (i = 0; i < banks; i++) {
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
1946 } 1971 }
1947} 1972}
1948 1973
1949static void mce_reenable_cpu(void *h) 1974static void __cpuinit mce_reenable_cpu(void *h)
1950{ 1975{
1951 unsigned long action = *(unsigned long *)h; 1976 unsigned long action = *(unsigned long *)h;
1952 int i; 1977 int i;
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void)
2025 } 2050 }
2026} 2051}
2027 2052
2028static __init int mce_init_device(void) 2053static __init int mcheck_init_device(void)
2029{ 2054{
2030 int err; 2055 int err;
2031 int i = 0; 2056 int i = 0;
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void)
2053 return err; 2078 return err;
2054} 2079}
2055 2080
2056device_initcall(mce_init_device); 2081device_initcall(mcheck_init_device);
2057 2082
2058/* 2083/*
2059 * Old style boot options parsing. Only for compatibility. 2084 * Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val)
2101DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, 2126DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2102 fake_panic_set, "%llu\n"); 2127 fake_panic_set, "%llu\n");
2103 2128
2104static int __init mce_debugfs_init(void) 2129static int __init mcheck_debugfs_init(void)
2105{ 2130{
2106 struct dentry *dmce, *ffake_panic; 2131 struct dentry *dmce, *ffake_panic;
2107 2132
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void)
2115 2140
2116 return 0; 2141 return 0;
2117} 2142}
2118late_initcall(mce_debugfs_init); 2143late_initcall(mcheck_debugfs_init);
2119#endif 2144#endif
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index b3a1dba75330..4fef985fc221 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
49 49
50static atomic_t therm_throt_en = ATOMIC_INIT(0); 50static atomic_t therm_throt_en = ATOMIC_INIT(0);
51 51
52static u32 lvtthmr_init __read_mostly;
53
52#ifdef CONFIG_SYSFS 54#ifdef CONFIG_SYSFS
53#define define_therm_throt_sysdev_one_ro(_name) \ 55#define define_therm_throt_sysdev_one_ro(_name) \
54 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 56 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
254 ack_APIC_irq(); 256 ack_APIC_irq();
255} 257}
256 258
259void __init mcheck_intel_therm_init(void)
260{
261 /*
262 * This function is only called on boot CPU. Save the init thermal
263 * LVT value on BSP and use that value to restore APs' thermal LVT
264 * entry BIOS programmed later
265 */
266 if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
267 cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
268 lvtthmr_init = apic_read(APIC_LVTTHMR);
269}
270
257void intel_init_thermal(struct cpuinfo_x86 *c) 271void intel_init_thermal(struct cpuinfo_x86 *c)
258{ 272{
259 unsigned int cpu = smp_processor_id(); 273 unsigned int cpu = smp_processor_id();
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
270 * since it might be delivered via SMI already: 284 * since it might be delivered via SMI already:
271 */ 285 */
272 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 286 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
273 h = apic_read(APIC_LVTTHMR); 287
288 /*
289 * The initial value of thermal LVT entries on all APs always reads
290 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
291 * sequence to them and LVT registers are reset to 0s except for
292 * the mask bits which are set to 1s when APs receive INIT IPI.
293 * Always restore the value that BIOS has programmed on AP based on
294 * BSP's info we saved since BIOS is always setting the same value
295 * for all threads/cores
296 */
297 apic_write(APIC_LVTTHMR, lvtthmr_init);
298
299 h = lvtthmr_init;
300
274 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 301 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
275 printk(KERN_DEBUG 302 printk(KERN_DEBUG
276 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 303 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b5801c311846..c1bbed1021d9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -77,6 +77,18 @@ struct cpu_hw_events {
77 struct debug_store *ds; 77 struct debug_store *ds;
78}; 78};
79 79
80struct event_constraint {
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
82 int code;
83};
84
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
87
88#define for_each_event_constraint(e, c) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++)
90
91
80/* 92/*
81 * struct x86_pmu - generic x86 pmu 93 * struct x86_pmu - generic x86 pmu
82 */ 94 */
@@ -102,6 +114,8 @@ struct x86_pmu {
102 u64 intel_ctrl; 114 u64 intel_ctrl;
103 void (*enable_bts)(u64 config); 115 void (*enable_bts)(u64 config);
104 void (*disable_bts)(void); 116 void (*disable_bts)(void);
117 int (*get_event_idx)(struct cpu_hw_events *cpuc,
118 struct hw_perf_event *hwc);
105}; 119};
106 120
107static struct x86_pmu x86_pmu __read_mostly; 121static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
110 .enabled = 1, 124 .enabled = 1,
111}; 125};
112 126
127static const struct event_constraint *event_constraints;
128
113/* 129/*
114 * Not sure about some of these 130 * Not sure about some of these
115 */ 131 */
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
155 return hw_event & P6_EVNTSEL_MASK; 171 return hw_event & P6_EVNTSEL_MASK;
156} 172}
157 173
174static const struct event_constraint intel_p6_event_constraints[] =
175{
176 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
177 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
178 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
179 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
180 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
181 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
182 EVENT_CONSTRAINT_END
183};
158 184
159/* 185/*
160 * Intel PerfMon v3. Used on Core2 and later. 186 * Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 196 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
171}; 197};
172 198
199static const struct event_constraint intel_core_event_constraints[] =
200{
201 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
202 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
203 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
204 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
205 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
206 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
207 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
208 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
209 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
210 EVENT_CONSTRAINT_END
211};
212
213static const struct event_constraint intel_nehalem_event_constraints[] =
214{
215 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
216 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
217 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
218 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
219 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
220 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
221 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
222 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
224 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
225 EVENT_CONSTRAINT_END
226};
227
173static u64 intel_pmu_event_map(int hw_event) 228static u64 intel_pmu_event_map(int hw_event)
174{ 229{
175 return intel_perfmon_event_map[hw_event]; 230 return intel_perfmon_event_map[hw_event];
@@ -190,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids
190 [PERF_COUNT_HW_CACHE_OP_MAX] 245 [PERF_COUNT_HW_CACHE_OP_MAX]
191 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 246 [PERF_COUNT_HW_CACHE_RESULT_MAX];
192 247
193static const u64 nehalem_hw_cache_event_ids 248static __initconst u64 nehalem_hw_cache_event_ids
194 [PERF_COUNT_HW_CACHE_MAX] 249 [PERF_COUNT_HW_CACHE_MAX]
195 [PERF_COUNT_HW_CACHE_OP_MAX] 250 [PERF_COUNT_HW_CACHE_OP_MAX]
196 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 251 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -281,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids
281 }, 336 },
282}; 337};
283 338
284static const u64 core2_hw_cache_event_ids 339static __initconst u64 core2_hw_cache_event_ids
285 [PERF_COUNT_HW_CACHE_MAX] 340 [PERF_COUNT_HW_CACHE_MAX]
286 [PERF_COUNT_HW_CACHE_OP_MAX] 341 [PERF_COUNT_HW_CACHE_OP_MAX]
287 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 342 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -372,7 +427,7 @@ static const u64 core2_hw_cache_event_ids
372 }, 427 },
373}; 428};
374 429
375static const u64 atom_hw_cache_event_ids 430static __initconst u64 atom_hw_cache_event_ids
376 [PERF_COUNT_HW_CACHE_MAX] 431 [PERF_COUNT_HW_CACHE_MAX]
377 [PERF_COUNT_HW_CACHE_OP_MAX] 432 [PERF_COUNT_HW_CACHE_OP_MAX]
378 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 433 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
469#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL 524#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
470#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL 525#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
471#define CORE_EVNTSEL_INV_MASK 0x00800000ULL 526#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
472#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL 527#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
473 528
474#define CORE_EVNTSEL_MASK \ 529#define CORE_EVNTSEL_MASK \
475 (CORE_EVNTSEL_EVENT_MASK | \ 530 (CORE_EVNTSEL_EVENT_MASK | \
@@ -481,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
481 return hw_event & CORE_EVNTSEL_MASK; 536 return hw_event & CORE_EVNTSEL_MASK;
482} 537}
483 538
484static const u64 amd_hw_cache_event_ids 539static __initconst u64 amd_hw_cache_event_ids
485 [PERF_COUNT_HW_CACHE_MAX] 540 [PERF_COUNT_HW_CACHE_MAX]
486 [PERF_COUNT_HW_CACHE_OP_MAX] 541 [PERF_COUNT_HW_CACHE_OP_MAX]
487 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 542 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
932 */ 987 */
933 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 988 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
934 989
990 hwc->idx = -1;
991
935 /* 992 /*
936 * Count user and OS events unless requested not to. 993 * Count user and OS events unless requested not to.
937 */ 994 */
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1334 x86_pmu_enable_event(hwc, idx); 1391 x86_pmu_enable_event(hwc, idx);
1335} 1392}
1336 1393
1337static int 1394static int fixed_mode_idx(struct hw_perf_event *hwc)
1338fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1339{ 1395{
1340 unsigned int hw_event; 1396 unsigned int hw_event;
1341 1397
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1349 if (!x86_pmu.num_events_fixed) 1405 if (!x86_pmu.num_events_fixed)
1350 return -1; 1406 return -1;
1351 1407
1408 /*
1409 * fixed counters do not take all possible filters
1410 */
1411 if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
1412 return -1;
1413
1352 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 1414 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1353 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 1415 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1354 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) 1416 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1360} 1422}
1361 1423
1362/* 1424/*
1363 * Find a PMC slot for the freshly enabled / scheduled in event: 1425 * generic counter allocator: get next free counter
1364 */ 1426 */
1365static int x86_pmu_enable(struct perf_event *event) 1427static int
1428gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1429{
1430 int idx;
1431
1432 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1433 return idx == x86_pmu.num_events ? -1 : idx;
1434}
1435
1436/*
1437 * intel-specific counter allocator: check event constraints
1438 */
1439static int
1440intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1441{
1442 const struct event_constraint *event_constraint;
1443 int i, code;
1444
1445 if (!event_constraints)
1446 goto skip;
1447
1448 code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1449
1450 for_each_event_constraint(event_constraint, event_constraints) {
1451 if (code == event_constraint->code) {
1452 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1453 if (!test_and_set_bit(i, cpuc->used_mask))
1454 return i;
1455 }
1456 return -1;
1457 }
1458 }
1459skip:
1460 return gen_get_event_idx(cpuc, hwc);
1461}
1462
1463static int
1464x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1366{ 1465{
1367 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1368 struct hw_perf_event *hwc = &event->hw;
1369 int idx; 1466 int idx;
1370 1467
1371 idx = fixed_mode_idx(event, hwc); 1468 idx = fixed_mode_idx(hwc);
1372 if (idx == X86_PMC_IDX_FIXED_BTS) { 1469 if (idx == X86_PMC_IDX_FIXED_BTS) {
1373 /* BTS is already occupied. */ 1470 /* BTS is already occupied. */
1374 if (test_and_set_bit(idx, cpuc->used_mask)) 1471 if (test_and_set_bit(idx, cpuc->used_mask))
1375 return -EAGAIN; 1472 return -EAGAIN;
1376 1473
1377 hwc->config_base = 0; 1474 hwc->config_base = 0;
1378 hwc->event_base = 0; 1475 hwc->event_base = 0;
1379 hwc->idx = idx; 1476 hwc->idx = idx;
1380 } else if (idx >= 0) { 1477 } else if (idx >= 0) {
1381 /* 1478 /*
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
1396 } else { 1493 } else {
1397 idx = hwc->idx; 1494 idx = hwc->idx;
1398 /* Try to get the previous generic event again */ 1495 /* Try to get the previous generic event again */
1399 if (test_and_set_bit(idx, cpuc->used_mask)) { 1496 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1400try_generic: 1497try_generic:
1401 idx = find_first_zero_bit(cpuc->used_mask, 1498 idx = x86_pmu.get_event_idx(cpuc, hwc);
1402 x86_pmu.num_events); 1499 if (idx == -1)
1403 if (idx == x86_pmu.num_events)
1404 return -EAGAIN; 1500 return -EAGAIN;
1405 1501
1406 set_bit(idx, cpuc->used_mask); 1502 set_bit(idx, cpuc->used_mask);
1407 hwc->idx = idx; 1503 hwc->idx = idx;
1408 } 1504 }
1409 hwc->config_base = x86_pmu.eventsel; 1505 hwc->config_base = x86_pmu.eventsel;
1410 hwc->event_base = x86_pmu.perfctr; 1506 hwc->event_base = x86_pmu.perfctr;
1411 } 1507 }
1412 1508
1509 return idx;
1510}
1511
1512/*
1513 * Find a PMC slot for the freshly enabled / scheduled in event:
1514 */
1515static int x86_pmu_enable(struct perf_event *event)
1516{
1517 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1518 struct hw_perf_event *hwc = &event->hw;
1519 int idx;
1520
1521 idx = x86_schedule_event(cpuc, hwc);
1522 if (idx < 0)
1523 return idx;
1524
1413 perf_events_lapic_init(); 1525 perf_events_lapic_init();
1414 1526
1415 x86_pmu.disable(hwc, idx); 1527 x86_pmu.disable(hwc, idx);
@@ -1852,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1852 .priority = 1 1964 .priority = 1
1853}; 1965};
1854 1966
1855static struct x86_pmu p6_pmu = { 1967static __initconst struct x86_pmu p6_pmu = {
1856 .name = "p6", 1968 .name = "p6",
1857 .handle_irq = p6_pmu_handle_irq, 1969 .handle_irq = p6_pmu_handle_irq,
1858 .disable_all = p6_pmu_disable_all, 1970 .disable_all = p6_pmu_disable_all,
@@ -1877,9 +1989,10 @@ static struct x86_pmu p6_pmu = {
1877 */ 1989 */
1878 .event_bits = 32, 1990 .event_bits = 32,
1879 .event_mask = (1ULL << 32) - 1, 1991 .event_mask = (1ULL << 32) - 1,
1992 .get_event_idx = intel_get_event_idx,
1880}; 1993};
1881 1994
1882static struct x86_pmu intel_pmu = { 1995static __initconst struct x86_pmu intel_pmu = {
1883 .name = "Intel", 1996 .name = "Intel",
1884 .handle_irq = intel_pmu_handle_irq, 1997 .handle_irq = intel_pmu_handle_irq,
1885 .disable_all = intel_pmu_disable_all, 1998 .disable_all = intel_pmu_disable_all,
@@ -1900,9 +2013,10 @@ static struct x86_pmu intel_pmu = {
1900 .max_period = (1ULL << 31) - 1, 2013 .max_period = (1ULL << 31) - 1,
1901 .enable_bts = intel_pmu_enable_bts, 2014 .enable_bts = intel_pmu_enable_bts,
1902 .disable_bts = intel_pmu_disable_bts, 2015 .disable_bts = intel_pmu_disable_bts,
2016 .get_event_idx = intel_get_event_idx,
1903}; 2017};
1904 2018
1905static struct x86_pmu amd_pmu = { 2019static __initconst struct x86_pmu amd_pmu = {
1906 .name = "AMD", 2020 .name = "AMD",
1907 .handle_irq = amd_pmu_handle_irq, 2021 .handle_irq = amd_pmu_handle_irq,
1908 .disable_all = amd_pmu_disable_all, 2022 .disable_all = amd_pmu_disable_all,
@@ -1920,9 +2034,10 @@ static struct x86_pmu amd_pmu = {
1920 .apic = 1, 2034 .apic = 1,
1921 /* use highest bit to detect overflow */ 2035 /* use highest bit to detect overflow */
1922 .max_period = (1ULL << 47) - 1, 2036 .max_period = (1ULL << 47) - 1,
2037 .get_event_idx = gen_get_event_idx,
1923}; 2038};
1924 2039
1925static int p6_pmu_init(void) 2040static __init int p6_pmu_init(void)
1926{ 2041{
1927 switch (boot_cpu_data.x86_model) { 2042 switch (boot_cpu_data.x86_model) {
1928 case 1: 2043 case 1:
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void)
1932 case 7: 2047 case 7:
1933 case 8: 2048 case 8:
1934 case 11: /* Pentium III */ 2049 case 11: /* Pentium III */
2050 event_constraints = intel_p6_event_constraints;
1935 break; 2051 break;
1936 case 9: 2052 case 9:
1937 case 13: 2053 case 13:
1938 /* Pentium M */ 2054 /* Pentium M */
2055 event_constraints = intel_p6_event_constraints;
1939 break; 2056 break;
1940 default: 2057 default:
1941 pr_cont("unsupported p6 CPU model %d ", 2058 pr_cont("unsupported p6 CPU model %d ",
@@ -1954,7 +2071,7 @@ static int p6_pmu_init(void)
1954 return 0; 2071 return 0;
1955} 2072}
1956 2073
1957static int intel_pmu_init(void) 2074static __init int intel_pmu_init(void)
1958{ 2075{
1959 union cpuid10_edx edx; 2076 union cpuid10_edx edx;
1960 union cpuid10_eax eax; 2077 union cpuid10_eax eax;
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void)
2007 sizeof(hw_cache_event_ids)); 2124 sizeof(hw_cache_event_ids));
2008 2125
2009 pr_cont("Core2 events, "); 2126 pr_cont("Core2 events, ");
2127 event_constraints = intel_core_event_constraints;
2010 break; 2128 break;
2011 default: 2129 default:
2012 case 26: 2130 case 26:
2013 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 2131 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2014 sizeof(hw_cache_event_ids)); 2132 sizeof(hw_cache_event_ids));
2015 2133
2134 event_constraints = intel_nehalem_event_constraints;
2016 pr_cont("Nehalem/Corei7 events, "); 2135 pr_cont("Nehalem/Corei7 events, ");
2017 break; 2136 break;
2018 case 28: 2137 case 28:
@@ -2025,7 +2144,7 @@ static int intel_pmu_init(void)
2025 return 0; 2144 return 0;
2026} 2145}
2027 2146
2028static int amd_pmu_init(void) 2147static __init int amd_pmu_init(void)
2029{ 2148{
2030 /* Performance-monitoring supported from K7 and later: */ 2149 /* Performance-monitoring supported from K7 and later: */
2031 if (boot_cpu_data.x86 < 6) 2150 if (boot_cpu_data.x86 < 6)
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = {
2105 .unthrottle = x86_pmu_unthrottle, 2224 .unthrottle = x86_pmu_unthrottle,
2106}; 2225};
2107 2226
2227static int
2228validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
2229{
2230 struct hw_perf_event fake_event = event->hw;
2231
2232 if (event->pmu && event->pmu != &pmu)
2233 return 0;
2234
2235 return x86_schedule_event(cpuc, &fake_event) >= 0;
2236}
2237
2238static int validate_group(struct perf_event *event)
2239{
2240 struct perf_event *sibling, *leader = event->group_leader;
2241 struct cpu_hw_events fake_pmu;
2242
2243 memset(&fake_pmu, 0, sizeof(fake_pmu));
2244
2245 if (!validate_event(&fake_pmu, leader))
2246 return -ENOSPC;
2247
2248 list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
2249 if (!validate_event(&fake_pmu, sibling))
2250 return -ENOSPC;
2251 }
2252
2253 if (!validate_event(&fake_pmu, event))
2254 return -ENOSPC;
2255
2256 return 0;
2257}
2258
2108const struct pmu *hw_perf_event_init(struct perf_event *event) 2259const struct pmu *hw_perf_event_init(struct perf_event *event)
2109{ 2260{
2110 int err; 2261 int err;
2111 2262
2112 err = __hw_perf_event_init(event); 2263 err = __hw_perf_event_init(event);
2264 if (!err) {
2265 if (event->group_leader != event)
2266 err = validate_group(event);
2267 }
2113 if (err) { 2268 if (err) {
2114 if (event->destroy) 2269 if (event->destroy)
2115 event->destroy(event); 2270 event->destroy(event);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7d52e9da5e0c..50b9c220e121 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork)
334END(ret_from_fork) 334END(ret_from_fork)
335 335
336/* 336/*
337 * Interrupt exit functions should be protected against kprobes
338 */
339 .pushsection .kprobes.text, "ax"
340/*
337 * Return to user mode is not as complex as all this looks, 341 * Return to user mode is not as complex as all this looks,
338 * but we want the default path for a system call return to 342 * but we want the default path for a system call return to
339 * go as quickly as possible which is why some of this is 343 * go as quickly as possible which is why some of this is
@@ -383,6 +387,10 @@ need_resched:
383END(resume_kernel) 387END(resume_kernel)
384#endif 388#endif
385 CFI_ENDPROC 389 CFI_ENDPROC
390/*
391 * End of kprobes section
392 */
393 .popsection
386 394
387/* SYSENTER_RETURN points to after the "sysenter" instruction in 395/* SYSENTER_RETURN points to after the "sysenter" instruction in
388 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 396 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -513,6 +521,10 @@ sysexit_audit:
513 PTGS_TO_GS_EX 521 PTGS_TO_GS_EX
514ENDPROC(ia32_sysenter_target) 522ENDPROC(ia32_sysenter_target)
515 523
524/*
525 * syscall stub including irq exit should be protected against kprobes
526 */
527 .pushsection .kprobes.text, "ax"
516 # system call handler stub 528 # system call handler stub
517ENTRY(system_call) 529ENTRY(system_call)
518 RING0_INT_FRAME # can't unwind into user space anyway 530 RING0_INT_FRAME # can't unwind into user space anyway
@@ -705,6 +717,10 @@ syscall_badsys:
705 jmp resume_userspace 717 jmp resume_userspace
706END(syscall_badsys) 718END(syscall_badsys)
707 CFI_ENDPROC 719 CFI_ENDPROC
720/*
721 * End of kprobes section
722 */
723 .popsection
708 724
709/* 725/*
710 * System calls that need a pt_regs pointer. 726 * System calls that need a pt_regs pointer.
@@ -814,6 +830,10 @@ common_interrupt:
814ENDPROC(common_interrupt) 830ENDPROC(common_interrupt)
815 CFI_ENDPROC 831 CFI_ENDPROC
816 832
833/*
834 * Irq entries should be protected against kprobes
835 */
836 .pushsection .kprobes.text, "ax"
817#define BUILD_INTERRUPT3(name, nr, fn) \ 837#define BUILD_INTERRUPT3(name, nr, fn) \
818ENTRY(name) \ 838ENTRY(name) \
819 RING0_INT_FRAME; \ 839 RING0_INT_FRAME; \
@@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug)
980 jmp error_code 1000 jmp error_code
981 CFI_ENDPROC 1001 CFI_ENDPROC
982END(spurious_interrupt_bug) 1002END(spurious_interrupt_bug)
1003/*
1004 * End of kprobes section
1005 */
1006 .popsection
983 1007
984ENTRY(kernel_thread_helper) 1008ENTRY(kernel_thread_helper)
985 pushl $0 # fake return address for unwinder 1009 pushl $0 # fake return address for unwinder
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bd5bbddddf91..722df1b1152d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -803,6 +803,10 @@ END(interrupt)
803 call \func 803 call \func
804 .endm 804 .endm
805 805
806/*
807 * Interrupt entry/exit should be protected against kprobes
808 */
809 .pushsection .kprobes.text, "ax"
806 /* 810 /*
807 * The interrupt stubs push (~vector+0x80) onto the stack and 811 * The interrupt stubs push (~vector+0x80) onto the stack and
808 * then jump to common_interrupt. 812 * then jump to common_interrupt.
@@ -941,6 +945,10 @@ ENTRY(retint_kernel)
941 945
942 CFI_ENDPROC 946 CFI_ENDPROC
943END(common_interrupt) 947END(common_interrupt)
948/*
949 * End of kprobes section
950 */
951 .popsection
944 952
945/* 953/*
946 * APIC interrupts. 954 * APIC interrupts.
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..d42f65ac4927
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,555 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) 2009 IBM Corporation
18 * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
19 *
20 * Authors: Alan Stern <stern@rowland.harvard.edu>
21 * K.Prasad <prasad@linux.vnet.ibm.com>
22 * Frederic Weisbecker <fweisbec@gmail.com>
23 */
24
25/*
26 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
27 * using the CPU's debug registers.
28 */
29
30#include <linux/perf_event.h>
31#include <linux/hw_breakpoint.h>
32#include <linux/irqflags.h>
33#include <linux/notifier.h>
34#include <linux/kallsyms.h>
35#include <linux/kprobes.h>
36#include <linux/percpu.h>
37#include <linux/kdebug.h>
38#include <linux/kernel.h>
39#include <linux/module.h>
40#include <linux/sched.h>
41#include <linux/init.h>
42#include <linux/smp.h>
43
44#include <asm/hw_breakpoint.h>
45#include <asm/processor.h>
46#include <asm/debugreg.h>
47
48/* Per cpu debug control register value */
49DEFINE_PER_CPU(unsigned long, cpu_dr7);
50EXPORT_PER_CPU_SYMBOL(cpu_dr7);
51
52/* Per cpu debug address registers values */
53static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
54
55/*
56 * Stores the breakpoints currently in use on each breakpoint address
57 * register for each cpus
58 */
59static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
60
61
62static inline unsigned long
63__encode_dr7(int drnum, unsigned int len, unsigned int type)
64{
65 unsigned long bp_info;
66
67 bp_info = (len | type) & 0xf;
68 bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
69 bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
70
71 return bp_info;
72}
73
74/*
75 * Encode the length, type, Exact, and Enable bits for a particular breakpoint
76 * as stored in debug register 7.
77 */
78unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
79{
80 return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
81}
82
83/*
84 * Decode the length and type bits for a particular breakpoint as
85 * stored in debug register 7. Return the "enabled" status.
86 */
87int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
88{
89 int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
90
91 *len = (bp_info & 0xc) | 0x40;
92 *type = (bp_info & 0x3) | 0x80;
93
94 return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
95}
96
97/*
98 * Install a perf counter breakpoint.
99 *
100 * We seek a free debug address register and use it for this
101 * breakpoint. Eventually we enable it in the debug control register.
102 *
103 * Atomic: we hold the counter->ctx->lock and we only handle variables
104 * and registers local to this cpu.
105 */
106int arch_install_hw_breakpoint(struct perf_event *bp)
107{
108 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
109 unsigned long *dr7;
110 int i;
111
112 for (i = 0; i < HBP_NUM; i++) {
113 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
114
115 if (!*slot) {
116 *slot = bp;
117 break;
118 }
119 }
120
121 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
122 return -EBUSY;
123
124 set_debugreg(info->address, i);
125 __get_cpu_var(cpu_debugreg[i]) = info->address;
126
127 dr7 = &__get_cpu_var(cpu_dr7);
128 *dr7 |= encode_dr7(i, info->len, info->type);
129
130 set_debugreg(*dr7, 7);
131
132 return 0;
133}
134
135/*
136 * Uninstall the breakpoint contained in the given counter.
137 *
138 * First we search the debug address register it uses and then we disable
139 * it.
140 *
141 * Atomic: we hold the counter->ctx->lock and we only handle variables
142 * and registers local to this cpu.
143 */
144void arch_uninstall_hw_breakpoint(struct perf_event *bp)
145{
146 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
147 unsigned long *dr7;
148 int i;
149
150 for (i = 0; i < HBP_NUM; i++) {
151 struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
152
153 if (*slot == bp) {
154 *slot = NULL;
155 break;
156 }
157 }
158
159 if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
160 return;
161
162 dr7 = &__get_cpu_var(cpu_dr7);
163 *dr7 &= ~__encode_dr7(i, info->len, info->type);
164
165 set_debugreg(*dr7, 7);
166}
167
168static int get_hbp_len(u8 hbp_len)
169{
170 unsigned int len_in_bytes = 0;
171
172 switch (hbp_len) {
173 case X86_BREAKPOINT_LEN_1:
174 len_in_bytes = 1;
175 break;
176 case X86_BREAKPOINT_LEN_2:
177 len_in_bytes = 2;
178 break;
179 case X86_BREAKPOINT_LEN_4:
180 len_in_bytes = 4;
181 break;
182#ifdef CONFIG_X86_64
183 case X86_BREAKPOINT_LEN_8:
184 len_in_bytes = 8;
185 break;
186#endif
187 }
188 return len_in_bytes;
189}
190
191/*
192 * Check for virtual address in user space.
193 */
194int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
195{
196 unsigned int len;
197
198 len = get_hbp_len(hbp_len);
199
200 return (va <= TASK_SIZE - len);
201}
202
203/*
204 * Check for virtual address in kernel space.
205 */
206static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
207{
208 unsigned int len;
209
210 len = get_hbp_len(hbp_len);
211
212 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
213}
214
215/*
216 * Store a breakpoint's encoded address, length, and type.
217 */
218static int arch_store_info(struct perf_event *bp)
219{
220 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
221 /*
222 * For kernel-addresses, either the address or symbol name can be
223 * specified.
224 */
225 if (info->name)
226 info->address = (unsigned long)
227 kallsyms_lookup_name(info->name);
228 if (info->address)
229 return 0;
230
231 return -EINVAL;
232}
233
234int arch_bp_generic_fields(int x86_len, int x86_type,
235 int *gen_len, int *gen_type)
236{
237 /* Len */
238 switch (x86_len) {
239 case X86_BREAKPOINT_LEN_1:
240 *gen_len = HW_BREAKPOINT_LEN_1;
241 break;
242 case X86_BREAKPOINT_LEN_2:
243 *gen_len = HW_BREAKPOINT_LEN_2;
244 break;
245 case X86_BREAKPOINT_LEN_4:
246 *gen_len = HW_BREAKPOINT_LEN_4;
247 break;
248#ifdef CONFIG_X86_64
249 case X86_BREAKPOINT_LEN_8:
250 *gen_len = HW_BREAKPOINT_LEN_8;
251 break;
252#endif
253 default:
254 return -EINVAL;
255 }
256
257 /* Type */
258 switch (x86_type) {
259 case X86_BREAKPOINT_EXECUTE:
260 *gen_type = HW_BREAKPOINT_X;
261 break;
262 case X86_BREAKPOINT_WRITE:
263 *gen_type = HW_BREAKPOINT_W;
264 break;
265 case X86_BREAKPOINT_RW:
266 *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
267 break;
268 default:
269 return -EINVAL;
270 }
271
272 return 0;
273}
274
275
276static int arch_build_bp_info(struct perf_event *bp)
277{
278 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
279
280 info->address = bp->attr.bp_addr;
281
282 /* Len */
283 switch (bp->attr.bp_len) {
284 case HW_BREAKPOINT_LEN_1:
285 info->len = X86_BREAKPOINT_LEN_1;
286 break;
287 case HW_BREAKPOINT_LEN_2:
288 info->len = X86_BREAKPOINT_LEN_2;
289 break;
290 case HW_BREAKPOINT_LEN_4:
291 info->len = X86_BREAKPOINT_LEN_4;
292 break;
293#ifdef CONFIG_X86_64
294 case HW_BREAKPOINT_LEN_8:
295 info->len = X86_BREAKPOINT_LEN_8;
296 break;
297#endif
298 default:
299 return -EINVAL;
300 }
301
302 /* Type */
303 switch (bp->attr.bp_type) {
304 case HW_BREAKPOINT_W:
305 info->type = X86_BREAKPOINT_WRITE;
306 break;
307 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
308 info->type = X86_BREAKPOINT_RW;
309 break;
310 case HW_BREAKPOINT_X:
311 info->type = X86_BREAKPOINT_EXECUTE;
312 break;
313 default:
314 return -EINVAL;
315 }
316
317 return 0;
318}
319/*
320 * Validate the arch-specific HW Breakpoint register settings
321 */
322int arch_validate_hwbkpt_settings(struct perf_event *bp,
323 struct task_struct *tsk)
324{
325 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
326 unsigned int align;
327 int ret;
328
329
330 ret = arch_build_bp_info(bp);
331 if (ret)
332 return ret;
333
334 ret = -EINVAL;
335
336 if (info->type == X86_BREAKPOINT_EXECUTE)
337 /*
338 * Ptrace-refactoring code
339 * For now, we'll allow instruction breakpoint only for user-space
340 * addresses
341 */
342 if ((!arch_check_va_in_userspace(info->address, info->len)) &&
343 info->len != X86_BREAKPOINT_EXECUTE)
344 return ret;
345
346 switch (info->len) {
347 case X86_BREAKPOINT_LEN_1:
348 align = 0;
349 break;
350 case X86_BREAKPOINT_LEN_2:
351 align = 1;
352 break;
353 case X86_BREAKPOINT_LEN_4:
354 align = 3;
355 break;
356#ifdef CONFIG_X86_64
357 case X86_BREAKPOINT_LEN_8:
358 align = 7;
359 break;
360#endif
361 default:
362 return ret;
363 }
364
365 if (bp->callback)
366 ret = arch_store_info(bp);
367
368 if (ret < 0)
369 return ret;
370 /*
371 * Check that the low-order bits of the address are appropriate
372 * for the alignment implied by len.
373 */
374 if (info->address & align)
375 return -EINVAL;
376
377 /* Check that the virtual address is in the proper range */
378 if (tsk) {
379 if (!arch_check_va_in_userspace(info->address, info->len))
380 return -EFAULT;
381 } else {
382 if (!arch_check_va_in_kernelspace(info->address, info->len))
383 return -EFAULT;
384 }
385
386 return 0;
387}
388
389/*
390 * Dump the debug register contents to the user.
391 * We can't dump our per cpu values because it
392 * may contain cpu wide breakpoint, something that
393 * doesn't belong to the current task.
394 *
395 * TODO: include non-ptrace user breakpoints (perf)
396 */
397void aout_dump_debugregs(struct user *dump)
398{
399 int i;
400 int dr7 = 0;
401 struct perf_event *bp;
402 struct arch_hw_breakpoint *info;
403 struct thread_struct *thread = &current->thread;
404
405 for (i = 0; i < HBP_NUM; i++) {
406 bp = thread->ptrace_bps[i];
407
408 if (bp && !bp->attr.disabled) {
409 dump->u_debugreg[i] = bp->attr.bp_addr;
410 info = counter_arch_bp(bp);
411 dr7 |= encode_dr7(i, info->len, info->type);
412 } else {
413 dump->u_debugreg[i] = 0;
414 }
415 }
416
417 dump->u_debugreg[4] = 0;
418 dump->u_debugreg[5] = 0;
419 dump->u_debugreg[6] = current->thread.debugreg6;
420
421 dump->u_debugreg[7] = dr7;
422}
423EXPORT_SYMBOL_GPL(aout_dump_debugregs);
424
425/*
426 * Release the user breakpoints used by ptrace
427 */
428void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
429{
430 int i;
431 struct thread_struct *t = &tsk->thread;
432
433 for (i = 0; i < HBP_NUM; i++) {
434 unregister_hw_breakpoint(t->ptrace_bps[i]);
435 t->ptrace_bps[i] = NULL;
436 }
437}
438
439void hw_breakpoint_restore(void)
440{
441 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
442 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
443 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
444 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
445 set_debugreg(current->thread.debugreg6, 6);
446 set_debugreg(__get_cpu_var(cpu_dr7), 7);
447}
448EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
449
450/*
451 * Handle debug exception notifications.
452 *
453 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
454 *
455 * NOTIFY_DONE returned if one of the following conditions is true.
456 * i) When the causative address is from user-space and the exception
457 * is a valid one, i.e. not triggered as a result of lazy debug register
458 * switching
459 * ii) When there are more bits than trap<n> set in DR6 register (such
460 * as BD, BS or BT) indicating that more than one debug condition is
461 * met and requires some more action in do_debug().
462 *
463 * NOTIFY_STOP returned for all other cases
464 *
465 */
466static int __kprobes hw_breakpoint_handler(struct die_args *args)
467{
468 int i, cpu, rc = NOTIFY_STOP;
469 struct perf_event *bp;
470 unsigned long dr7, dr6;
471 unsigned long *dr6_p;
472
473 /* The DR6 value is pointed by args->err */
474 dr6_p = (unsigned long *)ERR_PTR(args->err);
475 dr6 = *dr6_p;
476
477 /* Do an early return if no trap bits are set in DR6 */
478 if ((dr6 & DR_TRAP_BITS) == 0)
479 return NOTIFY_DONE;
480
481 get_debugreg(dr7, 7);
482 /* Disable breakpoints during exception handling */
483 set_debugreg(0UL, 7);
484 /*
485 * Assert that local interrupts are disabled
486 * Reset the DRn bits in the virtualized register value.
487 * The ptrace trigger routine will add in whatever is needed.
488 */
489 current->thread.debugreg6 &= ~DR_TRAP_BITS;
490 cpu = get_cpu();
491
492 /* Handle all the breakpoints that were triggered */
493 for (i = 0; i < HBP_NUM; ++i) {
494 if (likely(!(dr6 & (DR_TRAP0 << i))))
495 continue;
496
497 /*
498 * The counter may be concurrently released but that can only
499 * occur from a call_rcu() path. We can then safely fetch
500 * the breakpoint, use its callback, touch its counter
501 * while we are in an rcu_read_lock() path.
502 */
503 rcu_read_lock();
504
505 bp = per_cpu(bp_per_reg[i], cpu);
506 if (bp)
507 rc = NOTIFY_DONE;
508 /*
509 * Reset the 'i'th TRAP bit in dr6 to denote completion of
510 * exception handling
511 */
512 (*dr6_p) &= ~(DR_TRAP0 << i);
513 /*
514 * bp can be NULL due to lazy debug register switching
515 * or due to concurrent perf counter removing.
516 */
517 if (!bp) {
518 rcu_read_unlock();
519 break;
520 }
521
522 (bp->callback)(bp, args->regs);
523
524 rcu_read_unlock();
525 }
526 if (dr6 & (~DR_TRAP_BITS))
527 rc = NOTIFY_DONE;
528
529 set_debugreg(dr7, 7);
530 put_cpu();
531
532 return rc;
533}
534
535/*
536 * Handle debug exception notifications.
537 */
538int __kprobes hw_breakpoint_exceptions_notify(
539 struct notifier_block *unused, unsigned long val, void *data)
540{
541 if (val != DIE_DEBUG)
542 return NOTIFY_DONE;
543
544 return hw_breakpoint_handler(data);
545}
546
547void hw_breakpoint_pmu_read(struct perf_event *bp)
548{
549 /* TODO */
550}
551
552void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
553{
554 /* TODO */
555}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 04bbd5278568..19212cb01558 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
92 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 92 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
93 seq_printf(p, " TLB shootdowns\n"); 93 seq_printf(p, " TLB shootdowns\n");
94#endif 94#endif
95#ifdef CONFIG_X86_MCE 95#ifdef CONFIG_X86_THERMAL_VECTOR
96 seq_printf(p, "%*s: ", prec, "TRM"); 96 seq_printf(p, "%*s: ", prec, "TRM");
97 for_each_online_cpu(j) 97 for_each_online_cpu(j)
98 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 98 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
99 seq_printf(p, " Thermal event interrupts\n"); 99 seq_printf(p, " Thermal event interrupts\n");
100# ifdef CONFIG_X86_MCE_THRESHOLD 100#endif
101#ifdef CONFIG_X86_MCE_THRESHOLD
101 seq_printf(p, "%*s: ", prec, "THR"); 102 seq_printf(p, "%*s: ", prec, "THR");
102 for_each_online_cpu(j) 103 for_each_online_cpu(j)
103 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 104 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
104 seq_printf(p, " Threshold APIC interrupts\n"); 105 seq_printf(p, " Threshold APIC interrupts\n");
105# endif
106#endif 106#endif
107#ifdef CONFIG_X86_MCE 107#ifdef CONFIG_X86_MCE
108 seq_printf(p, "%*s: ", prec, "MCE"); 108 seq_printf(p, "%*s: ", prec, "MCE");
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
194 sum += irq_stats(cpu)->irq_call_count; 194 sum += irq_stats(cpu)->irq_call_count;
195 sum += irq_stats(cpu)->irq_tlb_count; 195 sum += irq_stats(cpu)->irq_tlb_count;
196#endif 196#endif
197#ifdef CONFIG_X86_MCE 197#ifdef CONFIG_X86_THERMAL_VECTOR
198 sum += irq_stats(cpu)->irq_thermal_count; 198 sum += irq_stats(cpu)->irq_thermal_count;
199# ifdef CONFIG_X86_MCE_THRESHOLD 199#endif
200#ifdef CONFIG_X86_MCE_THRESHOLD
200 sum += irq_stats(cpu)->irq_threshold_count; 201 sum += irq_stats(cpu)->irq_threshold_count;
201# endif
202#endif 202#endif
203#ifdef CONFIG_X86_MCE 203#ifdef CONFIG_X86_MCE
204 sum += per_cpu(mce_exception_count, cpu); 204 sum += per_cpu(mce_exception_count, cpu);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45 45
46#include <asm/debugreg.h>
46#include <asm/apicdef.h> 47#include <asm/apicdef.h>
47#include <asm/system.h> 48#include <asm/system.h>
48 49
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
434 "resuming...\n"); 435 "resuming...\n");
435 kgdb_arch_handle_exception(args->trapnr, args->signr, 436 kgdb_arch_handle_exception(args->trapnr, args->signr,
436 args->err, "c", "", regs); 437 args->err, "c", "", regs);
438 /*
439 * Reset the BS bit in dr6 (pointed by args->err) to
440 * denote completion of processing
441 */
442 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
437 443
438 return NOTIFY_STOP; 444 return NOTIFY_STOP;
439} 445}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7b5169d2b000..3fe86d706a14 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -48,12 +48,15 @@
48#include <linux/preempt.h> 48#include <linux/preempt.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kdebug.h> 50#include <linux/kdebug.h>
51#include <linux/kallsyms.h>
51 52
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
53#include <asm/desc.h> 54#include <asm/desc.h>
54#include <asm/pgtable.h> 55#include <asm/pgtable.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/alternative.h> 57#include <asm/alternative.h>
58#include <asm/insn.h>
59#include <asm/debugreg.h>
57 60
58void jprobe_return_end(void); 61void jprobe_return_end(void);
59 62
@@ -106,50 +109,6 @@ static const u32 twobyte_is_boostable[256 / 32] = {
106 /* ----------------------------------------------- */ 109 /* ----------------------------------------------- */
107 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 110 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
108}; 111};
109static const u32 onebyte_has_modrm[256 / 32] = {
110 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
111 /* ----------------------------------------------- */
112 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
113 W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
114 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
115 W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
116 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
117 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
118 W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
119 W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
120 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
121 W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
122 W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
123 W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
124 W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
125 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
126 W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
127 W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
128 /* ----------------------------------------------- */
129 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
130};
131static const u32 twobyte_has_modrm[256 / 32] = {
132 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
133 /* ----------------------------------------------- */
134 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
135 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
136 W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
137 W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
138 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
139 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
140 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
141 W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
142 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
143 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
144 W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
145 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
146 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
147 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
148 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
149 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
150 /* ----------------------------------------------- */
151 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
152};
153#undef W 112#undef W
154 113
155struct kretprobe_blackpoint kretprobe_blacklist[] = { 114struct kretprobe_blackpoint kretprobe_blacklist[] = {
@@ -244,6 +203,75 @@ retry:
244 } 203 }
245} 204}
246 205
206/* Recover the probed instruction at addr for further analysis. */
207static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
208{
209 struct kprobe *kp;
210 kp = get_kprobe((void *)addr);
211 if (!kp)
212 return -EINVAL;
213
214 /*
215 * Basically, kp->ainsn.insn has an original instruction.
216 * However, RIP-relative instruction can not do single-stepping
217 * at different place, fix_riprel() tweaks the displacement of
218 * that instruction. In that case, we can't recover the instruction
219 * from the kp->ainsn.insn.
220 *
221 * On the other hand, kp->opcode has a copy of the first byte of
222 * the probed instruction, which is overwritten by int3. And
223 * the instruction at kp->addr is not modified by kprobes except
224 * for the first byte, we can recover the original instruction
225 * from it and kp->opcode.
226 */
227 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
228 buf[0] = kp->opcode;
229 return 0;
230}
231
232/* Dummy buffers for kallsyms_lookup */
233static char __dummy_buf[KSYM_NAME_LEN];
234
235/* Check if paddr is at an instruction boundary */
236static int __kprobes can_probe(unsigned long paddr)
237{
238 int ret;
239 unsigned long addr, offset = 0;
240 struct insn insn;
241 kprobe_opcode_t buf[MAX_INSN_SIZE];
242
243 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
244 return 0;
245
246 /* Decode instructions */
247 addr = paddr - offset;
248 while (addr < paddr) {
249 kernel_insn_init(&insn, (void *)addr);
250 insn_get_opcode(&insn);
251
252 /*
253 * Check if the instruction has been modified by another
254 * kprobe, in which case we replace the breakpoint by the
255 * original instruction in our buffer.
256 */
257 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
258 ret = recover_probed_instruction(buf, addr);
259 if (ret)
260 /*
261 * Another debugging subsystem might insert
262 * this breakpoint. In that case, we can't
263 * recover it.
264 */
265 return 0;
266 kernel_insn_init(&insn, buf);
267 }
268 insn_get_length(&insn);
269 addr += insn.length;
270 }
271
272 return (addr == paddr);
273}
274
247/* 275/*
248 * Returns non-zero if opcode modifies the interrupt flag. 276 * Returns non-zero if opcode modifies the interrupt flag.
249 */ 277 */
@@ -277,68 +305,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
277static void __kprobes fix_riprel(struct kprobe *p) 305static void __kprobes fix_riprel(struct kprobe *p)
278{ 306{
279#ifdef CONFIG_X86_64 307#ifdef CONFIG_X86_64
280 u8 *insn = p->ainsn.insn; 308 struct insn insn;
281 s64 disp; 309 kernel_insn_init(&insn, p->ainsn.insn);
282 int need_modrm;
283
284 /* Skip legacy instruction prefixes. */
285 while (1) {
286 switch (*insn) {
287 case 0x66:
288 case 0x67:
289 case 0x2e:
290 case 0x3e:
291 case 0x26:
292 case 0x64:
293 case 0x65:
294 case 0x36:
295 case 0xf0:
296 case 0xf3:
297 case 0xf2:
298 ++insn;
299 continue;
300 }
301 break;
302 }
303 310
304 /* Skip REX instruction prefix. */ 311 if (insn_rip_relative(&insn)) {
305 if (is_REX_prefix(insn)) 312 s64 newdisp;
306 ++insn; 313 u8 *disp;
307 314 insn_get_displacement(&insn);
308 if (*insn == 0x0f) { 315 /*
309 /* Two-byte opcode. */ 316 * The copied instruction uses the %rip-relative addressing
310 ++insn; 317 * mode. Adjust the displacement for the difference between
311 need_modrm = test_bit(*insn, 318 * the original location of this instruction and the location
312 (unsigned long *)twobyte_has_modrm); 319 * of the copy that will actually be run. The tricky bit here
313 } else 320 * is making sure that the sign extension happens correctly in
314 /* One-byte opcode. */ 321 * this calculation, since we need a signed 32-bit result to
315 need_modrm = test_bit(*insn, 322 * be sign-extended to 64 bits when it's added to the %rip
316 (unsigned long *)onebyte_has_modrm); 323 * value and yield the same 64-bit result that the sign-
317 324 * extension of the original signed 32-bit displacement would
318 if (need_modrm) { 325 * have given.
319 u8 modrm = *++insn; 326 */
320 if ((modrm & 0xc7) == 0x05) { 327 newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
321 /* %rip+disp32 addressing mode */ 328 (u8 *) p->ainsn.insn;
322 /* Displacement follows ModRM byte. */ 329 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
323 ++insn; 330 disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
324 /* 331 *(s32 *) disp = (s32) newdisp;
325 * The copied instruction uses the %rip-relative
326 * addressing mode. Adjust the displacement for the
327 * difference between the original location of this
328 * instruction and the location of the copy that will
329 * actually be run. The tricky bit here is making sure
330 * that the sign extension happens correctly in this
331 * calculation, since we need a signed 32-bit result to
332 * be sign-extended to 64 bits when it's added to the
333 * %rip value and yield the same 64-bit result that the
334 * sign-extension of the original signed 32-bit
335 * displacement would have given.
336 */
337 disp = (u8 *) p->addr + *((s32 *) insn) -
338 (u8 *) p->ainsn.insn;
339 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
340 *(s32 *)insn = (s32) disp;
341 }
342 } 332 }
343#endif 333#endif
344} 334}
@@ -359,6 +349,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
359 349
360int __kprobes arch_prepare_kprobe(struct kprobe *p) 350int __kprobes arch_prepare_kprobe(struct kprobe *p)
361{ 351{
352 if (!can_probe((unsigned long)p->addr))
353 return -EILSEQ;
362 /* insn: must be on special executable page on x86. */ 354 /* insn: must be on special executable page on x86. */
363 p->ainsn.insn = get_insn_slot(); 355 p->ainsn.insn = get_insn_slot();
364 if (!p->ainsn.insn) 356 if (!p->ainsn.insn)
@@ -472,17 +464,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
472{ 464{
473 switch (kcb->kprobe_status) { 465 switch (kcb->kprobe_status) {
474 case KPROBE_HIT_SSDONE: 466 case KPROBE_HIT_SSDONE:
475#ifdef CONFIG_X86_64
476 /* TODO: Provide re-entrancy from post_kprobes_handler() and
477 * avoid exception stack corruption while single-stepping on
478 * the instruction of the new probe.
479 */
480 arch_disarm_kprobe(p);
481 regs->ip = (unsigned long)p->addr;
482 reset_current_kprobe();
483 preempt_enable_no_resched();
484 break;
485#endif
486 case KPROBE_HIT_ACTIVE: 467 case KPROBE_HIT_ACTIVE:
487 save_previous_kprobe(kcb); 468 save_previous_kprobe(kcb);
488 set_current_kprobe(p, regs, kcb); 469 set_current_kprobe(p, regs, kcb);
@@ -491,18 +472,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
491 kcb->kprobe_status = KPROBE_REENTER; 472 kcb->kprobe_status = KPROBE_REENTER;
492 break; 473 break;
493 case KPROBE_HIT_SS: 474 case KPROBE_HIT_SS:
494 if (p == kprobe_running()) { 475 /* A probe has been hit in the codepath leading up to, or just
495 regs->flags &= ~X86_EFLAGS_TF; 476 * after, single-stepping of a probed instruction. This entire
496 regs->flags |= kcb->kprobe_saved_flags; 477 * codepath should strictly reside in .kprobes.text section.
497 return 0; 478 * Raise a BUG or we'll continue in an endless reentering loop
498 } else { 479 * and eventually a stack overflow.
499 /* A probe has been hit in the codepath leading up 480 */
500 * to, or just after, single-stepping of a probed 481 printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
501 * instruction. This entire codepath should strictly 482 p->addr);
502 * reside in .kprobes.text section. Raise a warning 483 dump_kprobe(p);
503 * to highlight this peculiar case. 484 BUG();
504 */
505 }
506 default: 485 default:
507 /* impossible cases */ 486 /* impossible cases */
508 WARN_ON(1); 487 WARN_ON(1);
@@ -967,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
967 ret = NOTIFY_STOP; 946 ret = NOTIFY_STOP;
968 break; 947 break;
969 case DIE_DEBUG: 948 case DIE_DEBUG:
970 if (post_kprobe_handler(args->regs)) 949 if (post_kprobe_handler(args->regs)) {
950 /*
951 * Reset the BS bit in dr6 (pointed by args->err) to
952 * denote completion of processing
953 */
954 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
971 ret = NOTIFY_STOP; 955 ret = NOTIFY_STOP;
956 }
972 break; 957 break;
973 case DIE_GPF: 958 case DIE_GPF:
974 /* 959 /*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
25#include <asm/desc.h> 25#include <asm/desc.h>
26#include <asm/system.h> 26#include <asm/system.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debugreg.h>
28 29
29static void set_idt(void *newidt, __u16 limit) 30static void set_idt(void *newidt, __u16 limit)
30{ 31{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
202 203
203 /* Interrupts aren't acceptable while we reboot */ 204 /* Interrupts aren't acceptable while we reboot */
204 local_irq_disable(); 205 local_irq_disable();
206 hw_breakpoint_disable();
205 207
206 if (image->preserve_context) { 208 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 209#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/debugreg.h>
21 22
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd, 23static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr) 24 unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
282 283
283 /* Interrupts aren't acceptable while we reboot */ 284 /* Interrupts aren't acceptable while we reboot */
284 local_irq_disable(); 285 local_irq_disable();
286 hw_breakpoint_disable();
285 287
286 if (image->preserve_context) { 288 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC 289#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/events/power.h> 12#include <trace/events/power.h>
13#include <linux/hw_breakpoint.h>
13#include <asm/system.h> 14#include <asm/system.h>
14#include <asm/apic.h> 15#include <asm/apic.h>
15#include <asm/syscalls.h> 16#include <asm/syscalls.h>
@@ -17,6 +18,7 @@
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include <asm/i387.h> 19#include <asm/i387.h>
19#include <asm/ds.h> 20#include <asm/ds.h>
21#include <asm/debugreg.h>
20 22
21unsigned long idle_halt; 23unsigned long idle_halt;
22EXPORT_SYMBOL(idle_halt); 24EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
103 } 105 }
104#endif 106#endif
105 107
106 clear_tsk_thread_flag(tsk, TIF_DEBUG); 108 flush_ptrace_hw_breakpoint(tsk);
107
108 tsk->thread.debugreg0 = 0;
109 tsk->thread.debugreg1 = 0;
110 tsk->thread.debugreg2 = 0;
111 tsk->thread.debugreg3 = 0;
112 tsk->thread.debugreg6 = 0;
113 tsk->thread.debugreg7 = 0;
114 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 109 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
115 /* 110 /*
116 * Forget coprocessor state.. 111 * Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
192 else if (next->debugctlmsr != prev->debugctlmsr) 187 else if (next->debugctlmsr != prev->debugctlmsr)
193 update_debugctlmsr(next->debugctlmsr); 188 update_debugctlmsr(next->debugctlmsr);
194 189
195 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
196 set_debugreg(next->debugreg0, 0);
197 set_debugreg(next->debugreg1, 1);
198 set_debugreg(next->debugreg2, 2);
199 set_debugreg(next->debugreg3, 3);
200 /* no 4 and 5 */
201 set_debugreg(next->debugreg6, 6);
202 set_debugreg(next->debugreg7, 7);
203 }
204
205 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 190 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
206 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 191 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
207 /* prev and next are different */ 192 /* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cdab..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,7 @@
58#include <asm/idle.h> 58#include <asm/idle.h>
59#include <asm/syscalls.h> 59#include <asm/syscalls.h>
60#include <asm/ds.h> 60#include <asm/ds.h>
61#include <asm/debugreg.h>
61 62
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 63asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 64
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
259 260
260 task_user_gs(p) = get_user_gs(regs); 261 task_user_gs(p) = get_user_gs(regs);
261 262
263 p->thread.io_bitmap_ptr = NULL;
262 tsk = current; 264 tsk = current;
265 err = -ENOMEM;
266
267 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
268
263 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 269 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
264 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, 270 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
265 IO_BITMAP_BYTES, GFP_KERNEL); 271 IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbcaa490..70cf15873f3d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,7 @@
52#include <asm/idle.h> 52#include <asm/idle.h>
53#include <asm/syscalls.h> 53#include <asm/syscalls.h>
54#include <asm/ds.h> 54#include <asm/ds.h>
55#include <asm/debugreg.h>
55 56
56asmlinkage extern void ret_from_fork(void); 57asmlinkage extern void ret_from_fork(void);
57 58
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
297 298
298 p->thread.fs = me->thread.fs; 299 p->thread.fs = me->thread.fs;
299 p->thread.gs = me->thread.gs; 300 p->thread.gs = me->thread.gs;
301 p->thread.io_bitmap_ptr = NULL;
300 302
301 savesegment(gs, p->thread.gsindex); 303 savesegment(gs, p->thread.gsindex);
302 savesegment(fs, p->thread.fsindex); 304 savesegment(fs, p->thread.fsindex);
303 savesegment(es, p->thread.es); 305 savesegment(es, p->thread.es);
304 savesegment(ds, p->thread.ds); 306 savesegment(ds, p->thread.ds);
305 307
308 err = -ENOMEM;
309 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
310
306 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 311 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
307 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 312 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
308 if (!p->thread.io_bitmap_ptr) { 313 if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@ out:
341 kfree(p->thread.io_bitmap_ptr); 346 kfree(p->thread.io_bitmap_ptr);
342 p->thread.io_bitmap_max = 0; 347 p->thread.io_bitmap_max = 0;
343 } 348 }
349
344 return err; 350 return err;
345} 351}
346 352
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
495 */ 501 */
496 if (preload_fpu) 502 if (preload_fpu)
497 __math_state_restore(); 503 __math_state_restore();
504
498 return prev_p; 505 return prev_p;
499} 506}
500 507
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7b058a2dc66a..04d182a7cfdb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25#include <linux/perf_event.h>
26#include <linux/hw_breakpoint.h>
25 27
26#include <asm/uaccess.h> 28#include <asm/uaccess.h>
27#include <asm/pgtable.h> 29#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
34#include <asm/prctl.h> 36#include <asm/prctl.h>
35#include <asm/proto.h> 37#include <asm/proto.h>
36#include <asm/ds.h> 38#include <asm/ds.h>
39#include <asm/hw_breakpoint.h>
37 40
38#include "tls.h" 41#include "tls.h"
39 42
@@ -49,6 +52,118 @@ enum x86_regset {
49 REGSET_IOPERM32, 52 REGSET_IOPERM32,
50}; 53};
51 54
55struct pt_regs_offset {
56 const char *name;
57 int offset;
58};
59
60#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
61#define REG_OFFSET_END {.name = NULL, .offset = 0}
62
63static const struct pt_regs_offset regoffset_table[] = {
64#ifdef CONFIG_X86_64
65 REG_OFFSET_NAME(r15),
66 REG_OFFSET_NAME(r14),
67 REG_OFFSET_NAME(r13),
68 REG_OFFSET_NAME(r12),
69 REG_OFFSET_NAME(r11),
70 REG_OFFSET_NAME(r10),
71 REG_OFFSET_NAME(r9),
72 REG_OFFSET_NAME(r8),
73#endif
74 REG_OFFSET_NAME(bx),
75 REG_OFFSET_NAME(cx),
76 REG_OFFSET_NAME(dx),
77 REG_OFFSET_NAME(si),
78 REG_OFFSET_NAME(di),
79 REG_OFFSET_NAME(bp),
80 REG_OFFSET_NAME(ax),
81#ifdef CONFIG_X86_32
82 REG_OFFSET_NAME(ds),
83 REG_OFFSET_NAME(es),
84 REG_OFFSET_NAME(fs),
85 REG_OFFSET_NAME(gs),
86#endif
87 REG_OFFSET_NAME(orig_ax),
88 REG_OFFSET_NAME(ip),
89 REG_OFFSET_NAME(cs),
90 REG_OFFSET_NAME(flags),
91 REG_OFFSET_NAME(sp),
92 REG_OFFSET_NAME(ss),
93 REG_OFFSET_END,
94};
95
96/**
97 * regs_query_register_offset() - query register offset from its name
98 * @name: the name of a register
99 *
100 * regs_query_register_offset() returns the offset of a register in struct
101 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
102 */
103int regs_query_register_offset(const char *name)
104{
105 const struct pt_regs_offset *roff;
106 for (roff = regoffset_table; roff->name != NULL; roff++)
107 if (!strcmp(roff->name, name))
108 return roff->offset;
109 return -EINVAL;
110}
111
112/**
113 * regs_query_register_name() - query register name from its offset
114 * @offset: the offset of a register in struct pt_regs.
115 *
116 * regs_query_register_name() returns the name of a register from its
117 * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
118 */
119const char *regs_query_register_name(unsigned int offset)
120{
121 const struct pt_regs_offset *roff;
122 for (roff = regoffset_table; roff->name != NULL; roff++)
123 if (roff->offset == offset)
124 return roff->name;
125 return NULL;
126}
127
128static const int arg_offs_table[] = {
129#ifdef CONFIG_X86_32
130 [0] = offsetof(struct pt_regs, ax),
131 [1] = offsetof(struct pt_regs, dx),
132 [2] = offsetof(struct pt_regs, cx)
133#else /* CONFIG_X86_64 */
134 [0] = offsetof(struct pt_regs, di),
135 [1] = offsetof(struct pt_regs, si),
136 [2] = offsetof(struct pt_regs, dx),
137 [3] = offsetof(struct pt_regs, cx),
138 [4] = offsetof(struct pt_regs, r8),
139 [5] = offsetof(struct pt_regs, r9)
140#endif
141};
142
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
52/* 167/*
53 * does not yet catch signals sent when the child dies. 168 * does not yet catch signals sent when the child dies.
54 * in exit.c or in signal.c. 169 * in exit.c or in signal.c.
@@ -137,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
137 return 0; 252 return 0;
138} 253}
139 254
140static unsigned long debugreg_addr_limit(struct task_struct *task)
141{
142 return TASK_SIZE - 3;
143}
144
145#else /* CONFIG_X86_64 */ 255#else /* CONFIG_X86_64 */
146 256
147#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 257#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -266,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
266 return 0; 376 return 0;
267} 377}
268 378
269static unsigned long debugreg_addr_limit(struct task_struct *task)
270{
271#ifdef CONFIG_IA32_EMULATION
272 if (test_tsk_thread_flag(task, TIF_IA32))
273 return IA32_PAGE_OFFSET - 3;
274#endif
275 return TASK_SIZE_MAX - 7;
276}
277
278#endif /* CONFIG_X86_32 */ 379#endif /* CONFIG_X86_32 */
279 380
280static unsigned long get_flags(struct task_struct *task) 381static unsigned long get_flags(struct task_struct *task)
@@ -454,99 +555,239 @@ static int genregs_set(struct task_struct *target,
454 return ret; 555 return ret;
455} 556}
456 557
558static void ptrace_triggered(struct perf_event *bp, void *data)
559{
560 int i;
561 struct thread_struct *thread = &(current->thread);
562
563 /*
564 * Store in the virtual DR6 register the fact that the breakpoint
565 * was hit so the thread's debugger will see it.
566 */
567 for (i = 0; i < HBP_NUM; i++) {
568 if (thread->ptrace_bps[i] == bp)
569 break;
570 }
571
572 thread->debugreg6 |= (DR_TRAP0 << i);
573}
574
457/* 575/*
458 * This function is trivial and will be inlined by the compiler. 576 * Walk through every ptrace breakpoints for this thread and
459 * Having it separates the implementation details of debug 577 * build the dr7 value on top of their attributes.
460 * registers from the interface details of ptrace. 578 *
461 */ 579 */
462static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) 580static unsigned long ptrace_get_dr7(struct perf_event *bp[])
463{ 581{
464 switch (n) { 582 int i;
465 case 0: return child->thread.debugreg0; 583 int dr7 = 0;
466 case 1: return child->thread.debugreg1; 584 struct arch_hw_breakpoint *info;
467 case 2: return child->thread.debugreg2; 585
468 case 3: return child->thread.debugreg3; 586 for (i = 0; i < HBP_NUM; i++) {
469 case 6: return child->thread.debugreg6; 587 if (bp[i] && !bp[i]->attr.disabled) {
470 case 7: return child->thread.debugreg7; 588 info = counter_arch_bp(bp[i]);
589 dr7 |= encode_dr7(i, info->len, info->type);
590 }
471 } 591 }
472 return 0; 592
593 return dr7;
473} 594}
474 595
475static int ptrace_set_debugreg(struct task_struct *child, 596static struct perf_event *
476 int n, unsigned long data) 597ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
598 struct task_struct *tsk, int disabled)
477{ 599{
478 int i; 600 int err;
601 int gen_len, gen_type;
602 DEFINE_BREAKPOINT_ATTR(attr);
479 603
480 if (unlikely(n == 4 || n == 5)) 604 /*
481 return -EIO; 605 * We shoud have at least an inactive breakpoint at this
606 * slot. It means the user is writing dr7 without having
607 * written the address register first
608 */
609 if (!bp)
610 return ERR_PTR(-EINVAL);
482 611
483 if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) 612 err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
484 return -EIO; 613 if (err)
614 return ERR_PTR(err);
485 615
486 switch (n) { 616 attr = bp->attr;
487 case 0: child->thread.debugreg0 = data; break; 617 attr.bp_len = gen_len;
488 case 1: child->thread.debugreg1 = data; break; 618 attr.bp_type = gen_type;
489 case 2: child->thread.debugreg2 = data; break; 619 attr.disabled = disabled;
490 case 3: child->thread.debugreg3 = data; break;
491 620
492 case 6: 621 return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
493 if ((data & ~0xffffffffUL) != 0) 622}
494 return -EIO; 623
495 child->thread.debugreg6 = data; 624/*
496 break; 625 * Handle ptrace writes to debug register 7.
626 */
627static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
628{
629 struct thread_struct *thread = &(tsk->thread);
630 unsigned long old_dr7;
631 int i, orig_ret = 0, rc = 0;
632 int enabled, second_pass = 0;
633 unsigned len, type;
634 struct perf_event *bp;
635
636 data &= ~DR_CONTROL_RESERVED;
637 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
638restore:
639 /*
640 * Loop through all the hardware breakpoints, making the
641 * appropriate changes to each.
642 */
643 for (i = 0; i < HBP_NUM; i++) {
644 enabled = decode_dr7(data, i, &len, &type);
645 bp = thread->ptrace_bps[i];
646
647 if (!enabled) {
648 if (bp) {
649 /*
650 * Don't unregister the breakpoints right-away,
651 * unless all register_user_hw_breakpoint()
652 * requests have succeeded. This prevents
653 * any window of opportunity for debug
654 * register grabbing by other users.
655 */
656 if (!second_pass)
657 continue;
658
659 thread->ptrace_bps[i] = NULL;
660 bp = ptrace_modify_breakpoint(bp, len, type,
661 tsk, 1);
662 if (IS_ERR(bp)) {
663 rc = PTR_ERR(bp);
664 thread->ptrace_bps[i] = NULL;
665 break;
666 }
667 thread->ptrace_bps[i] = bp;
668 }
669 continue;
670 }
671
672 bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
673
674 /* Incorrect bp, or we have a bug in bp API */
675 if (IS_ERR(bp)) {
676 rc = PTR_ERR(bp);
677 thread->ptrace_bps[i] = NULL;
678 break;
679 }
680 thread->ptrace_bps[i] = bp;
681 }
682 /*
683 * Make a second pass to free the remaining unused breakpoints
684 * or to restore the original breakpoints if an error occurred.
685 */
686 if (!second_pass) {
687 second_pass = 1;
688 if (rc < 0) {
689 orig_ret = rc;
690 data = old_dr7;
691 }
692 goto restore;
693 }
694 return ((orig_ret < 0) ? orig_ret : rc);
695}
696
697/*
698 * Handle PTRACE_PEEKUSR calls for the debug register area.
699 */
700static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
701{
702 struct thread_struct *thread = &(tsk->thread);
703 unsigned long val = 0;
497 704
498 case 7: 705 if (n < HBP_NUM) {
706 struct perf_event *bp;
707 bp = thread->ptrace_bps[n];
708 if (!bp)
709 return 0;
710 val = bp->hw.info.address;
711 } else if (n == 6) {
712 val = thread->debugreg6;
713 } else if (n == 7) {
714 val = ptrace_get_dr7(thread->ptrace_bps);
715 }
716 return val;
717}
718
719static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
720 unsigned long addr)
721{
722 struct perf_event *bp;
723 struct thread_struct *t = &tsk->thread;
724 DEFINE_BREAKPOINT_ATTR(attr);
725
726 if (!t->ptrace_bps[nr]) {
499 /* 727 /*
500 * Sanity-check data. Take one half-byte at once with 728 * Put stub len and type to register (reserve) an inactive but
501 * check = (val >> (16 + 4*i)) & 0xf. It contains the 729 * correct bp
502 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
503 * 2 and 3 are LENi. Given a list of invalid values,
504 * we do mask |= 1 << invalid_value, so that
505 * (mask >> check) & 1 is a correct test for invalid
506 * values.
507 *
508 * R/Wi contains the type of the breakpoint /
509 * watchpoint, LENi contains the length of the watched
510 * data in the watchpoint case.
511 *
512 * The invalid values are:
513 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
514 * - R/Wi == 0x10 (break on I/O reads or writes), so
515 * mask |= 0x4444.
516 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
517 * 0x1110.
518 *
519 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
520 *
521 * See the Intel Manual "System Programming Guide",
522 * 15.2.4
523 *
524 * Note that LENi == 0x10 is defined on x86_64 in long
525 * mode (i.e. even for 32-bit userspace software, but
526 * 64-bit kernel), so the x86_64 mask value is 0x5454.
527 * See the AMD manual no. 24593 (AMD64 System Programming)
528 */ 730 */
529#ifdef CONFIG_X86_32 731 attr.bp_addr = addr;
530#define DR7_MASK 0x5f54 732 attr.bp_len = HW_BREAKPOINT_LEN_1;
531#else 733 attr.bp_type = HW_BREAKPOINT_W;
532#define DR7_MASK 0x5554 734 attr.disabled = 1;
533#endif 735
534 data &= ~DR_CONTROL_RESERVED; 736 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
535 for (i = 0; i < 4; i++) 737 } else {
536 if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) 738 bp = t->ptrace_bps[nr];
537 return -EIO; 739 t->ptrace_bps[nr] = NULL;
538 child->thread.debugreg7 = data; 740
539 if (data) 741 attr = bp->attr;
540 set_tsk_thread_flag(child, TIF_DEBUG); 742 attr.bp_addr = addr;
541 else 743 bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
542 clear_tsk_thread_flag(child, TIF_DEBUG);
543 break;
544 } 744 }
745 /*
746 * CHECKME: the previous code returned -EIO if the addr wasn't a
747 * valid task virtual addr. The new one will return -EINVAL in this
748 * case.
749 * -EINVAL may be what we want for in-kernel breakpoints users, but
750 * -EIO looks better for ptrace, since we refuse a register writing
751 * for the user. And anyway this is the previous behaviour.
752 */
753 if (IS_ERR(bp))
754 return PTR_ERR(bp);
755
756 t->ptrace_bps[nr] = bp;
545 757
546 return 0; 758 return 0;
547} 759}
548 760
549/* 761/*
762 * Handle PTRACE_POKEUSR calls for the debug register area.
763 */
764int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
765{
766 struct thread_struct *thread = &(tsk->thread);
767 int rc = 0;
768
769 /* There are no DR4 or DR5 registers */
770 if (n == 4 || n == 5)
771 return -EIO;
772
773 if (n == 6) {
774 thread->debugreg6 = val;
775 goto ret_path;
776 }
777 if (n < HBP_NUM) {
778 rc = ptrace_set_breakpoint_addr(tsk, n, val);
779 if (rc)
780 return rc;
781 }
782 /* All that's left is DR7 */
783 if (n == 7)
784 rc = ptrace_write_dr7(tsk, val);
785
786ret_path:
787 return rc;
788}
789
790/*
550 * These access the current or another (stopped) task's io permission 791 * These access the current or another (stopped) task's io permission
551 * bitmap for debugging or core dump. 792 * bitmap for debugging or core dump.
552 */ 793 */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2a34f9c5be21..c0ca8f921c91 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -109,6 +109,7 @@
109#ifdef CONFIG_X86_64 109#ifdef CONFIG_X86_64
110#include <asm/numa_64.h> 110#include <asm/numa_64.h>
111#endif 111#endif
112#include <asm/mce.h>
112 113
113/* 114/*
114 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 115 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p)
1031#endif 1032#endif
1032#endif 1033#endif
1033 x86_init.oem.banner(); 1034 x86_init.oem.banner();
1035
1036 mcheck_init();
1034} 1037}
1035 1038
1036#ifdef CONFIG_X86_32 1039#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..fbf3b07c8567 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
799 799
800 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 800 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
801 if (signr > 0) { 801 if (signr > 0) {
802 /*
803 * Re-enable any watchpoints before delivering the
804 * signal to user space. The processor register will
805 * have been cleared if the watchpoint triggered
806 * inside the kernel.
807 */
808 if (current->thread.debugreg7)
809 set_debugreg(current->thread.debugreg7, 7);
810
811 /* Whee! Actually deliver the signal. */ 802 /* Whee! Actually deliver the signal. */
812 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { 803 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
813 /* 804 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc3..33399176512a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 529dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
530{ 530{
531 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
532 unsigned long condition; 532 unsigned long dr6;
533 int si_code; 533 int si_code;
534 534
535 get_debugreg(condition, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Catch kmemcheck conditions first of all! */ 537 /* Catch kmemcheck conditions first of all! */
538 if (condition & DR_STEP && kmemcheck_trap(regs)) 538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 539 return;
540 540
541 /* DR6 may or may not be cleared by the CPU */
542 set_debugreg(0, 6);
541 /* 543 /*
542 * The processor cleared BTF, so don't mark that we need it set. 544 * The processor cleared BTF, so don't mark that we need it set.
543 */ 545 */
544 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 546 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
545 tsk->thread.debugctlmsr = 0; 547 tsk->thread.debugctlmsr = 0;
546 548
547 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 549 /* Store the virtualized DR6 value */
548 SIGTRAP) == NOTIFY_STOP) 550 tsk->thread.debugreg6 = dr6;
551
552 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
553 SIGTRAP) == NOTIFY_STOP)
549 return; 554 return;
550 555
551 /* It's safe to allow irq's after DR6 has been saved */ 556 /* It's safe to allow irq's after DR6 has been saved */
552 preempt_conditional_sti(regs); 557 preempt_conditional_sti(regs);
553 558
554 /* Mask out spurious debug traps due to lazy DR7 setting */ 559 if (regs->flags & X86_VM_MASK) {
555 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 560 handle_vm86_trap((struct kernel_vm86_regs *) regs,
556 if (!tsk->thread.debugreg7) 561 error_code, 1);
557 goto clear_dr7; 562 return;
558 } 563 }
559 564
560#ifdef CONFIG_X86_32
561 if (regs->flags & X86_VM_MASK)
562 goto debug_vm86;
563#endif
564
565 /* Save debug status register where ptrace can see it */
566 tsk->thread.debugreg6 = condition;
567
568 /* 565 /*
569 * Single-stepping through TF: make sure we ignore any events in 566 * Single-stepping through system calls: ignore any exceptions in
570 * kernel space (but re-enable TF when returning to user mode). 567 * kernel space, but re-enable TF when returning to user mode.
568 *
569 * We already checked v86 mode above, so we can check for kernel mode
570 * by just checking the CPL of CS.
571 */ 571 */
572 if (condition & DR_STEP) { 572 if ((dr6 & DR_STEP) && !user_mode(regs)) {
573 if (!user_mode(regs)) 573 tsk->thread.debugreg6 &= ~DR_STEP;
574 goto clear_TF_reenable; 574 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
575 regs->flags &= ~X86_EFLAGS_TF;
575 } 576 }
576 577 si_code = get_si_code(tsk->thread.debugreg6);
577 si_code = get_si_code(condition); 578 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
578 /* Ok, finally something we can handle */ 579 send_sigtrap(tsk, regs, error_code, si_code);
579 send_sigtrap(tsk, regs, error_code, si_code);
580
581 /*
582 * Disable additional traps. They'll be re-enabled when
583 * the signal is delivered.
584 */
585clear_dr7:
586 set_debugreg(0, 7);
587 preempt_conditional_cli(regs); 580 preempt_conditional_cli(regs);
588 return;
589 581
590#ifdef CONFIG_X86_32
591debug_vm86:
592 /* reenable preemption: handle_vm86_trap() might sleep */
593 dec_preempt_count();
594 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
595 conditional_cli(regs);
596 return;
597#endif
598
599clear_TF_reenable:
600 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
601 regs->flags &= ~X86_EFLAGS_TF;
602 preempt_conditional_cli(regs);
603 return; 582 return;
604} 583}
605 584
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d261527c..4fc80174191c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
42#define CREATE_TRACE_POINTS 42#define CREATE_TRACE_POINTS
43#include "trace.h" 43#include "trace.h"
44 44
45#include <asm/debugreg.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/msr.h> 47#include <asm/msr.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3643 trace_kvm_entry(vcpu->vcpu_id); 3644 trace_kvm_entry(vcpu->vcpu_id);
3644 kvm_x86_ops->run(vcpu, kvm_run); 3645 kvm_x86_ops->run(vcpu, kvm_run);
3645 3646
3646 if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { 3647 /*
3647 set_debugreg(current->thread.debugreg0, 0); 3648 * If the guest has used debug registers, at least dr7
3648 set_debugreg(current->thread.debugreg1, 1); 3649 * will be disabled while returning to the host.
3649 set_debugreg(current->thread.debugreg2, 2); 3650 * If we don't have active breakpoints in the host, we don't
3650 set_debugreg(current->thread.debugreg3, 3); 3651 * care about the messed up debug address registers. But if
3651 set_debugreg(current->thread.debugreg6, 6); 3652 * we have some of them active, restore the old state.
3652 set_debugreg(current->thread.debugreg7, 7); 3653 */
3653 } 3654 if (hw_breakpoint_active())
3655 hw_breakpoint_restore();
3654 3656
3655 set_bit(KVM_REQ_KICK, &vcpu->requests); 3657 set_bit(KVM_REQ_KICK, &vcpu->requests);
3656 local_irq_enable(); 3658 local_irq_enable();
diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore
new file mode 100644
index 000000000000..8df89f0a3fe6
--- /dev/null
+++ b/arch/x86/lib/.gitignore
@@ -0,0 +1 @@
inat-tables.c
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 85f5db95c60f..a2d6472895fb 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,12 +2,25 @@
2# Makefile for x86 specific library files. 2# Makefile for x86 specific library files.
3# 3#
4 4
5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
7quiet_cmd_inat_tables = GEN $@
8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
9
10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
11 $(call cmd,inat_tables)
12
13$(obj)/inat.o: $(obj)/inat-tables.c
14
15clean-files := inat-tables.c
16
5obj-$(CONFIG_SMP) := msr.o 17obj-$(CONFIG_SMP) := msr.o
6 18
7lib-y := delay.o 19lib-y := delay.o
8lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
9lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
10lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-y += insn.o inat.o
11 24
12obj-y += msr-reg.o msr-reg-export.o 25obj-y += msr-reg.o msr-reg-export.o
13 26
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
new file mode 100644
index 000000000000..46fc4ee09fc4
--- /dev/null
+++ b/arch/x86/lib/inat.c
@@ -0,0 +1,90 @@
1/*
2 * x86 instruction attribute tables
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21#include <asm/insn.h>
22
23/* Attribute tables are generated from opcode map */
24#include "inat-tables.c"
25
26/* Attribute search APIs */
27insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
28{
29 return inat_primary_table[opcode];
30}
31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
33 insn_attr_t esc_attr)
34{
35 const insn_attr_t *table;
36 insn_attr_t lpfx_attr;
37 int n, m = 0;
38
39 n = inat_escape_id(esc_attr);
40 if (last_pfx) {
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0];
45 if (!table)
46 return 0;
47 if (inat_has_variant(table[opcode]) && m) {
48 table = inat_escape_tables[n][m];
49 if (!table)
50 return 0;
51 }
52 return table[opcode];
53}
54
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
56 insn_attr_t grp_attr)
57{
58 const insn_attr_t *table;
59 insn_attr_t lpfx_attr;
60 int n, m = 0;
61
62 n = inat_group_id(grp_attr);
63 if (last_pfx) {
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0];
68 if (!table)
69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
71 table = inat_group_tables[n][m];
72 if (!table)
73 return inat_group_common_attribute(grp_attr);
74 }
75 return table[X86_MODRM_REG(modrm)] |
76 inat_group_common_attribute(grp_attr);
77}
78
79insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
80 insn_byte_t vex_p)
81{
82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0;
85 table = inat_avx_tables[vex_m][vex_p];
86 if (!table)
87 return 0;
88 return table[opcode];
89}
90
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 000000000000..9f33b984d0ef
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,516 @@
1/*
2 * x86 instruction analysis
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */
20
21#include <linux/string.h>
22#include <asm/inat.h>
23#include <asm/insn.h>
24
25#define get_next(t, insn) \
26 ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
27
28#define peek_next(t, insn) \
29 ({t r; r = *(t*)insn->next_byte; r; })
30
31#define peek_nbyte_next(t, insn, n) \
32 ({t r; r = *(t*)((insn)->next_byte + n); r; })
33
34/**
35 * insn_init() - initialize struct insn
36 * @insn: &struct insn to be initialized
37 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
38 * @x86_64: !0 for 64-bit kernel or 64-bit app
39 */
40void insn_init(struct insn *insn, const void *kaddr, int x86_64)
41{
42 memset(insn, 0, sizeof(*insn));
43 insn->kaddr = kaddr;
44 insn->next_byte = kaddr;
45 insn->x86_64 = x86_64 ? 1 : 0;
46 insn->opnd_bytes = 4;
47 if (x86_64)
48 insn->addr_bytes = 8;
49 else
50 insn->addr_bytes = 4;
51}
52
53/**
54 * insn_get_prefixes - scan x86 instruction prefix bytes
55 * @insn: &struct insn containing instruction
56 *
57 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
58 * to point to the (first) opcode. No effect if @insn->prefixes.got
59 * is already set.
60 */
61void insn_get_prefixes(struct insn *insn)
62{
63 struct insn_field *prefixes = &insn->prefixes;
64 insn_attr_t attr;
65 insn_byte_t b, lb;
66 int i, nb;
67
68 if (prefixes->got)
69 return;
70
71 nb = 0;
72 lb = 0;
73 b = peek_next(insn_byte_t, insn);
74 attr = inat_get_opcode_attribute(b);
75 while (inat_is_legacy_prefix(attr)) {
76 /* Skip if same prefix */
77 for (i = 0; i < nb; i++)
78 if (prefixes->bytes[i] == b)
79 goto found;
80 if (nb == 4)
81 /* Invalid instruction */
82 break;
83 prefixes->bytes[nb++] = b;
84 if (inat_is_address_size_prefix(attr)) {
85 /* address size switches 2/4 or 4/8 */
86 if (insn->x86_64)
87 insn->addr_bytes ^= 12;
88 else
89 insn->addr_bytes ^= 6;
90 } else if (inat_is_operand_size_prefix(attr)) {
91 /* oprand size switches 2/4 */
92 insn->opnd_bytes ^= 6;
93 }
94found:
95 prefixes->nbytes++;
96 insn->next_byte++;
97 lb = b;
98 b = peek_next(insn_byte_t, insn);
99 attr = inat_get_opcode_attribute(b);
100 }
101 /* Set the last prefix */
102 if (lb && lb != insn->prefixes.bytes[3]) {
103 if (unlikely(insn->prefixes.bytes[3])) {
104 /* Swap the last prefix */
105 b = insn->prefixes.bytes[3];
106 for (i = 0; i < nb; i++)
107 if (prefixes->bytes[i] == lb)
108 prefixes->bytes[i] = b;
109 }
110 insn->prefixes.bytes[3] = lb;
111 }
112
113 /* Decode REX prefix */
114 if (insn->x86_64) {
115 b = peek_next(insn_byte_t, insn);
116 attr = inat_get_opcode_attribute(b);
117 if (inat_is_rex_prefix(attr)) {
118 insn->rex_prefix.value = b;
119 insn->rex_prefix.nbytes = 1;
120 insn->next_byte++;
121 if (X86_REX_W(b))
122 /* REX.W overrides opnd_size */
123 insn->opnd_bytes = 8;
124 }
125 }
126 insn->rex_prefix.got = 1;
127
128 /* Decode VEX prefix */
129 b = peek_next(insn_byte_t, insn);
130 attr = inat_get_opcode_attribute(b);
131 if (inat_is_vex_prefix(attr)) {
132 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
133 if (!insn->x86_64) {
134 /*
135 * In 32-bits mode, if the [7:6] bits (mod bits of
136 * ModRM) on the second byte are not 11b, it is
137 * LDS or LES.
138 */
139 if (X86_MODRM_MOD(b2) != 3)
140 goto vex_end;
141 }
142 insn->vex_prefix.bytes[0] = b;
143 insn->vex_prefix.bytes[1] = b2;
144 if (inat_is_vex3_prefix(attr)) {
145 b2 = peek_nbyte_next(insn_byte_t, insn, 2);
146 insn->vex_prefix.bytes[2] = b2;
147 insn->vex_prefix.nbytes = 3;
148 insn->next_byte += 3;
149 if (insn->x86_64 && X86_VEX_W(b2))
150 /* VEX.W overrides opnd_size */
151 insn->opnd_bytes = 8;
152 } else {
153 insn->vex_prefix.nbytes = 2;
154 insn->next_byte += 2;
155 }
156 }
157vex_end:
158 insn->vex_prefix.got = 1;
159
160 prefixes->got = 1;
161 return;
162}
163
164/**
165 * insn_get_opcode - collect opcode(s)
166 * @insn: &struct insn containing instruction
167 *
168 * Populates @insn->opcode, updates @insn->next_byte to point past the
169 * opcode byte(s), and set @insn->attr (except for groups).
170 * If necessary, first collects any preceding (prefix) bytes.
171 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
172 * is already 1.
173 */
174void insn_get_opcode(struct insn *insn)
175{
176 struct insn_field *opcode = &insn->opcode;
177 insn_byte_t op, pfx;
178 if (opcode->got)
179 return;
180 if (!insn->prefixes.got)
181 insn_get_prefixes(insn);
182
183 /* Get first opcode */
184 op = get_next(insn_byte_t, insn);
185 opcode->bytes[0] = op;
186 opcode->nbytes = 1;
187
188 /* Check if there is VEX prefix or not */
189 if (insn_is_avx(insn)) {
190 insn_byte_t m, p;
191 m = insn_vex_m_bits(insn);
192 p = insn_vex_p_bits(insn);
193 insn->attr = inat_get_avx_attribute(op, m, p);
194 if (!inat_accept_vex(insn->attr))
195 insn->attr = 0; /* This instruction is bad */
196 goto end; /* VEX has only 1 byte for opcode */
197 }
198
199 insn->attr = inat_get_opcode_attribute(op);
200 while (inat_is_escape(insn->attr)) {
201 /* Get escaped opcode */
202 op = get_next(insn_byte_t, insn);
203 opcode->bytes[opcode->nbytes++] = op;
204 pfx = insn_last_prefix(insn);
205 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
206 }
207 if (inat_must_vex(insn->attr))
208 insn->attr = 0; /* This instruction is bad */
209end:
210 opcode->got = 1;
211}
212
213/**
214 * insn_get_modrm - collect ModRM byte, if any
215 * @insn: &struct insn containing instruction
216 *
217 * Populates @insn->modrm and updates @insn->next_byte to point past the
218 * ModRM byte, if any. If necessary, first collects the preceding bytes
219 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
220 */
221void insn_get_modrm(struct insn *insn)
222{
223 struct insn_field *modrm = &insn->modrm;
224 insn_byte_t pfx, mod;
225 if (modrm->got)
226 return;
227 if (!insn->opcode.got)
228 insn_get_opcode(insn);
229
230 if (inat_has_modrm(insn->attr)) {
231 mod = get_next(insn_byte_t, insn);
232 modrm->value = mod;
233 modrm->nbytes = 1;
234 if (inat_is_group(insn->attr)) {
235 pfx = insn_last_prefix(insn);
236 insn->attr = inat_get_group_attribute(mod, pfx,
237 insn->attr);
238 }
239 }
240
241 if (insn->x86_64 && inat_is_force64(insn->attr))
242 insn->opnd_bytes = 8;
243 modrm->got = 1;
244}
245
246
247/**
248 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
249 * @insn: &struct insn containing instruction
250 *
251 * If necessary, first collects the instruction up to and including the
252 * ModRM byte. No effect if @insn->x86_64 is 0.
253 */
254int insn_rip_relative(struct insn *insn)
255{
256 struct insn_field *modrm = &insn->modrm;
257
258 if (!insn->x86_64)
259 return 0;
260 if (!modrm->got)
261 insn_get_modrm(insn);
262 /*
263 * For rip-relative instructions, the mod field (top 2 bits)
264 * is zero and the r/m field (bottom 3 bits) is 0x5.
265 */
266 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
267}
268
269/**
270 * insn_get_sib() - Get the SIB byte of instruction
271 * @insn: &struct insn containing instruction
272 *
273 * If necessary, first collects the instruction up to and including the
274 * ModRM byte.
275 */
276void insn_get_sib(struct insn *insn)
277{
278 insn_byte_t modrm;
279
280 if (insn->sib.got)
281 return;
282 if (!insn->modrm.got)
283 insn_get_modrm(insn);
284 if (insn->modrm.nbytes) {
285 modrm = (insn_byte_t)insn->modrm.value;
286 if (insn->addr_bytes != 2 &&
287 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
288 insn->sib.value = get_next(insn_byte_t, insn);
289 insn->sib.nbytes = 1;
290 }
291 }
292 insn->sib.got = 1;
293}
294
295
296/**
297 * insn_get_displacement() - Get the displacement of instruction
298 * @insn: &struct insn containing instruction
299 *
300 * If necessary, first collects the instruction up to and including the
301 * SIB byte.
302 * Displacement value is sign-expanded.
303 */
304void insn_get_displacement(struct insn *insn)
305{
306 insn_byte_t mod, rm, base;
307
308 if (insn->displacement.got)
309 return;
310 if (!insn->sib.got)
311 insn_get_sib(insn);
312 if (insn->modrm.nbytes) {
313 /*
314 * Interpreting the modrm byte:
315 * mod = 00 - no displacement fields (exceptions below)
316 * mod = 01 - 1-byte displacement field
317 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
318 * address size = 2 (0x67 prefix in 32-bit mode)
319 * mod = 11 - no memory operand
320 *
321 * If address size = 2...
322 * mod = 00, r/m = 110 - displacement field is 2 bytes
323 *
324 * If address size != 2...
325 * mod != 11, r/m = 100 - SIB byte exists
326 * mod = 00, SIB base = 101 - displacement field is 4 bytes
327 * mod = 00, r/m = 101 - rip-relative addressing, displacement
328 * field is 4 bytes
329 */
330 mod = X86_MODRM_MOD(insn->modrm.value);
331 rm = X86_MODRM_RM(insn->modrm.value);
332 base = X86_SIB_BASE(insn->sib.value);
333 if (mod == 3)
334 goto out;
335 if (mod == 1) {
336 insn->displacement.value = get_next(char, insn);
337 insn->displacement.nbytes = 1;
338 } else if (insn->addr_bytes == 2) {
339 if ((mod == 0 && rm == 6) || mod == 2) {
340 insn->displacement.value =
341 get_next(short, insn);
342 insn->displacement.nbytes = 2;
343 }
344 } else {
345 if ((mod == 0 && rm == 5) || mod == 2 ||
346 (mod == 0 && base == 5)) {
347 insn->displacement.value = get_next(int, insn);
348 insn->displacement.nbytes = 4;
349 }
350 }
351 }
352out:
353 insn->displacement.got = 1;
354}
355
356/* Decode moffset16/32/64 */
357static void __get_moffset(struct insn *insn)
358{
359 switch (insn->addr_bytes) {
360 case 2:
361 insn->moffset1.value = get_next(short, insn);
362 insn->moffset1.nbytes = 2;
363 break;
364 case 4:
365 insn->moffset1.value = get_next(int, insn);
366 insn->moffset1.nbytes = 4;
367 break;
368 case 8:
369 insn->moffset1.value = get_next(int, insn);
370 insn->moffset1.nbytes = 4;
371 insn->moffset2.value = get_next(int, insn);
372 insn->moffset2.nbytes = 4;
373 break;
374 }
375 insn->moffset1.got = insn->moffset2.got = 1;
376}
377
378/* Decode imm v32(Iz) */
379static void __get_immv32(struct insn *insn)
380{
381 switch (insn->opnd_bytes) {
382 case 2:
383 insn->immediate.value = get_next(short, insn);
384 insn->immediate.nbytes = 2;
385 break;
386 case 4:
387 case 8:
388 insn->immediate.value = get_next(int, insn);
389 insn->immediate.nbytes = 4;
390 break;
391 }
392}
393
394/* Decode imm v64(Iv/Ov) */
395static void __get_immv(struct insn *insn)
396{
397 switch (insn->opnd_bytes) {
398 case 2:
399 insn->immediate1.value = get_next(short, insn);
400 insn->immediate1.nbytes = 2;
401 break;
402 case 4:
403 insn->immediate1.value = get_next(int, insn);
404 insn->immediate1.nbytes = 4;
405 break;
406 case 8:
407 insn->immediate1.value = get_next(int, insn);
408 insn->immediate1.nbytes = 4;
409 insn->immediate2.value = get_next(int, insn);
410 insn->immediate2.nbytes = 4;
411 break;
412 }
413 insn->immediate1.got = insn->immediate2.got = 1;
414}
415
416/* Decode ptr16:16/32(Ap) */
417static void __get_immptr(struct insn *insn)
418{
419 switch (insn->opnd_bytes) {
420 case 2:
421 insn->immediate1.value = get_next(short, insn);
422 insn->immediate1.nbytes = 2;
423 break;
424 case 4:
425 insn->immediate1.value = get_next(int, insn);
426 insn->immediate1.nbytes = 4;
427 break;
428 case 8:
429 /* ptr16:64 is not exist (no segment) */
430 return;
431 }
432 insn->immediate2.value = get_next(unsigned short, insn);
433 insn->immediate2.nbytes = 2;
434 insn->immediate1.got = insn->immediate2.got = 1;
435}
436
437/**
438 * insn_get_immediate() - Get the immediates of instruction
439 * @insn: &struct insn containing instruction
440 *
441 * If necessary, first collects the instruction up to and including the
442 * displacement bytes.
443 * Basically, most of immediates are sign-expanded. Unsigned-value can be
444 * get by bit masking with ((1 << (nbytes * 8)) - 1)
445 */
446void insn_get_immediate(struct insn *insn)
447{
448 if (insn->immediate.got)
449 return;
450 if (!insn->displacement.got)
451 insn_get_displacement(insn);
452
453 if (inat_has_moffset(insn->attr)) {
454 __get_moffset(insn);
455 goto done;
456 }
457
458 if (!inat_has_immediate(insn->attr))
459 /* no immediates */
460 goto done;
461
462 switch (inat_immediate_size(insn->attr)) {
463 case INAT_IMM_BYTE:
464 insn->immediate.value = get_next(char, insn);
465 insn->immediate.nbytes = 1;
466 break;
467 case INAT_IMM_WORD:
468 insn->immediate.value = get_next(short, insn);
469 insn->immediate.nbytes = 2;
470 break;
471 case INAT_IMM_DWORD:
472 insn->immediate.value = get_next(int, insn);
473 insn->immediate.nbytes = 4;
474 break;
475 case INAT_IMM_QWORD:
476 insn->immediate1.value = get_next(int, insn);
477 insn->immediate1.nbytes = 4;
478 insn->immediate2.value = get_next(int, insn);
479 insn->immediate2.nbytes = 4;
480 break;
481 case INAT_IMM_PTR:
482 __get_immptr(insn);
483 break;
484 case INAT_IMM_VWORD32:
485 __get_immv32(insn);
486 break;
487 case INAT_IMM_VWORD:
488 __get_immv(insn);
489 break;
490 default:
491 break;
492 }
493 if (inat_has_second_immediate(insn->attr)) {
494 insn->immediate2.value = get_next(char, insn);
495 insn->immediate2.nbytes = 1;
496 }
497done:
498 insn->immediate.got = 1;
499}
500
501/**
502 * insn_get_length() - Get the length of instruction
503 * @insn: &struct insn containing instruction
504 *
505 * If necessary, first collects the instruction up to and including the
506 * immediates bytes.
507 */
508void insn_get_length(struct insn *insn)
509{
510 if (insn->length)
511 return;
512 if (!insn->immediate.got)
513 insn_get_immediate(insn);
514 insn->length = (unsigned char)((unsigned long)insn->next_byte
515 - (unsigned long)insn->kaddr);
516}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
new file mode 100644
index 000000000000..a793da5e560e
--- /dev/null
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -0,0 +1,893 @@
1# x86 Opcode Maps
2#
3#<Opcode maps>
4# Table: table-name
5# Referrer: escaped-name
6# AVXcode: avx-code
7# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
8# (or)
9# opcode: escape # escaped-name
10# EndTable
11#
12#<group maps>
13# GrpTable: GrpXXX
14# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
15# EndTable
16#
17# AVX Superscripts
18# (VEX): this opcode can accept VEX prefix.
19# (oVEX): this opcode requires VEX prefix.
20# (o128): this opcode only supports 128bit VEX.
21# (o256): this opcode only supports 256bit VEX.
22#
23
24Table: one byte opcode
25Referrer:
26AVXcode:
27# 0x00 - 0x0f
2800: ADD Eb,Gb
2901: ADD Ev,Gv
3002: ADD Gb,Eb
3103: ADD Gv,Ev
3204: ADD AL,Ib
3305: ADD rAX,Iz
3406: PUSH ES (i64)
3507: POP ES (i64)
3608: OR Eb,Gb
3709: OR Ev,Gv
380a: OR Gb,Eb
390b: OR Gv,Ev
400c: OR AL,Ib
410d: OR rAX,Iz
420e: PUSH CS (i64)
430f: escape # 2-byte escape
44# 0x10 - 0x1f
4510: ADC Eb,Gb
4611: ADC Ev,Gv
4712: ADC Gb,Eb
4813: ADC Gv,Ev
4914: ADC AL,Ib
5015: ADC rAX,Iz
5116: PUSH SS (i64)
5217: POP SS (i64)
5318: SBB Eb,Gb
5419: SBB Ev,Gv
551a: SBB Gb,Eb
561b: SBB Gv,Ev
571c: SBB AL,Ib
581d: SBB rAX,Iz
591e: PUSH DS (i64)
601f: POP DS (i64)
61# 0x20 - 0x2f
6220: AND Eb,Gb
6321: AND Ev,Gv
6422: AND Gb,Eb
6523: AND Gv,Ev
6624: AND AL,Ib
6725: AND rAx,Iz
6826: SEG=ES (Prefix)
6927: DAA (i64)
7028: SUB Eb,Gb
7129: SUB Ev,Gv
722a: SUB Gb,Eb
732b: SUB Gv,Ev
742c: SUB AL,Ib
752d: SUB rAX,Iz
762e: SEG=CS (Prefix)
772f: DAS (i64)
78# 0x30 - 0x3f
7930: XOR Eb,Gb
8031: XOR Ev,Gv
8132: XOR Gb,Eb
8233: XOR Gv,Ev
8334: XOR AL,Ib
8435: XOR rAX,Iz
8536: SEG=SS (Prefix)
8637: AAA (i64)
8738: CMP Eb,Gb
8839: CMP Ev,Gv
893a: CMP Gb,Eb
903b: CMP Gv,Ev
913c: CMP AL,Ib
923d: CMP rAX,Iz
933e: SEG=DS (Prefix)
943f: AAS (i64)
95# 0x40 - 0x4f
9640: INC eAX (i64) | REX (o64)
9741: INC eCX (i64) | REX.B (o64)
9842: INC eDX (i64) | REX.X (o64)
9943: INC eBX (i64) | REX.XB (o64)
10044: INC eSP (i64) | REX.R (o64)
10145: INC eBP (i64) | REX.RB (o64)
10246: INC eSI (i64) | REX.RX (o64)
10347: INC eDI (i64) | REX.RXB (o64)
10448: DEC eAX (i64) | REX.W (o64)
10549: DEC eCX (i64) | REX.WB (o64)
1064a: DEC eDX (i64) | REX.WX (o64)
1074b: DEC eBX (i64) | REX.WXB (o64)
1084c: DEC eSP (i64) | REX.WR (o64)
1094d: DEC eBP (i64) | REX.WRB (o64)
1104e: DEC eSI (i64) | REX.WRX (o64)
1114f: DEC eDI (i64) | REX.WRXB (o64)
112# 0x50 - 0x5f
11350: PUSH rAX/r8 (d64)
11451: PUSH rCX/r9 (d64)
11552: PUSH rDX/r10 (d64)
11653: PUSH rBX/r11 (d64)
11754: PUSH rSP/r12 (d64)
11855: PUSH rBP/r13 (d64)
11956: PUSH rSI/r14 (d64)
12057: PUSH rDI/r15 (d64)
12158: POP rAX/r8 (d64)
12259: POP rCX/r9 (d64)
1235a: POP rDX/r10 (d64)
1245b: POP rBX/r11 (d64)
1255c: POP rSP/r12 (d64)
1265d: POP rBP/r13 (d64)
1275e: POP rSI/r14 (d64)
1285f: POP rDI/r15 (d64)
129# 0x60 - 0x6f
13060: PUSHA/PUSHAD (i64)
13161: POPA/POPAD (i64)
13262: BOUND Gv,Ma (i64)
13363: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
13464: SEG=FS (Prefix)
13565: SEG=GS (Prefix)
13666: Operand-Size (Prefix)
13767: Address-Size (Prefix)
13868: PUSH Iz (d64)
13969: IMUL Gv,Ev,Iz
1406a: PUSH Ib (d64)
1416b: IMUL Gv,Ev,Ib
1426c: INS/INSB Yb,DX
1436d: INS/INSW/INSD Yz,DX
1446e: OUTS/OUTSB DX,Xb
1456f: OUTS/OUTSW/OUTSD DX,Xz
146# 0x70 - 0x7f
14770: JO Jb
14871: JNO Jb
14972: JB/JNAE/JC Jb
15073: JNB/JAE/JNC Jb
15174: JZ/JE Jb
15275: JNZ/JNE Jb
15376: JBE/JNA Jb
15477: JNBE/JA Jb
15578: JS Jb
15679: JNS Jb
1577a: JP/JPE Jb
1587b: JNP/JPO Jb
1597c: JL/JNGE Jb
1607d: JNL/JGE Jb
1617e: JLE/JNG Jb
1627f: JNLE/JG Jb
163# 0x80 - 0x8f
16480: Grp1 Eb,Ib (1A)
16581: Grp1 Ev,Iz (1A)
16682: Grp1 Eb,Ib (1A),(i64)
16783: Grp1 Ev,Ib (1A)
16884: TEST Eb,Gb
16985: TEST Ev,Gv
17086: XCHG Eb,Gb
17187: XCHG Ev,Gv
17288: MOV Eb,Gb
17389: MOV Ev,Gv
1748a: MOV Gb,Eb
1758b: MOV Gv,Ev
1768c: MOV Ev,Sw
1778d: LEA Gv,M
1788e: MOV Sw,Ew
1798f: Grp1A (1A) | POP Ev (d64)
180# 0x90 - 0x9f
18190: NOP | PAUSE (F3) | XCHG r8,rAX
18291: XCHG rCX/r9,rAX
18392: XCHG rDX/r10,rAX
18493: XCHG rBX/r11,rAX
18594: XCHG rSP/r12,rAX
18695: XCHG rBP/r13,rAX
18796: XCHG rSI/r14,rAX
18897: XCHG rDI/r15,rAX
18998: CBW/CWDE/CDQE
19099: CWD/CDQ/CQO
1919a: CALLF Ap (i64)
1929b: FWAIT/WAIT
1939c: PUSHF/D/Q Fv (d64)
1949d: POPF/D/Q Fv (d64)
1959e: SAHF
1969f: LAHF
197# 0xa0 - 0xaf
198a0: MOV AL,Ob
199a1: MOV rAX,Ov
200a2: MOV Ob,AL
201a3: MOV Ov,rAX
202a4: MOVS/B Xb,Yb
203a5: MOVS/W/D/Q Xv,Yv
204a6: CMPS/B Xb,Yb
205a7: CMPS/W/D Xv,Yv
206a8: TEST AL,Ib
207a9: TEST rAX,Iz
208aa: STOS/B Yb,AL
209ab: STOS/W/D/Q Yv,rAX
210ac: LODS/B AL,Xb
211ad: LODS/W/D/Q rAX,Xv
212ae: SCAS/B AL,Yb
213af: SCAS/W/D/Q rAX,Xv
214# 0xb0 - 0xbf
215b0: MOV AL/R8L,Ib
216b1: MOV CL/R9L,Ib
217b2: MOV DL/R10L,Ib
218b3: MOV BL/R11L,Ib
219b4: MOV AH/R12L,Ib
220b5: MOV CH/R13L,Ib
221b6: MOV DH/R14L,Ib
222b7: MOV BH/R15L,Ib
223b8: MOV rAX/r8,Iv
224b9: MOV rCX/r9,Iv
225ba: MOV rDX/r10,Iv
226bb: MOV rBX/r11,Iv
227bc: MOV rSP/r12,Iv
228bd: MOV rBP/r13,Iv
229be: MOV rSI/r14,Iv
230bf: MOV rDI/r15,Iv
231# 0xc0 - 0xcf
232c0: Grp2 Eb,Ib (1A)
233c1: Grp2 Ev,Ib (1A)
234c2: RETN Iw (f64)
235c3: RETN
236c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
237c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
238c6: Grp11 Eb,Ib (1A)
239c7: Grp11 Ev,Iz (1A)
240c8: ENTER Iw,Ib
241c9: LEAVE (d64)
242ca: RETF Iw
243cb: RETF
244cc: INT3
245cd: INT Ib
246ce: INTO (i64)
247cf: IRET/D/Q
248# 0xd0 - 0xdf
249d0: Grp2 Eb,1 (1A)
250d1: Grp2 Ev,1 (1A)
251d2: Grp2 Eb,CL (1A)
252d3: Grp2 Ev,CL (1A)
253d4: AAM Ib (i64)
254d5: AAD Ib (i64)
255d6:
256d7: XLAT/XLATB
257d8: ESC
258d9: ESC
259da: ESC
260db: ESC
261dc: ESC
262dd: ESC
263de: ESC
264df: ESC
265# 0xe0 - 0xef
266e0: LOOPNE/LOOPNZ Jb (f64)
267e1: LOOPE/LOOPZ Jb (f64)
268e2: LOOP Jb (f64)
269e3: JrCXZ Jb (f64)
270e4: IN AL,Ib
271e5: IN eAX,Ib
272e6: OUT Ib,AL
273e7: OUT Ib,eAX
274e8: CALL Jz (f64)
275e9: JMP-near Jz (f64)
276ea: JMP-far Ap (i64)
277eb: JMP-short Jb (f64)
278ec: IN AL,DX
279ed: IN eAX,DX
280ee: OUT DX,AL
281ef: OUT DX,eAX
282# 0xf0 - 0xff
283f0: LOCK (Prefix)
284f1:
285f2: REPNE (Prefix)
286f3: REP/REPE (Prefix)
287f4: HLT
288f5: CMC
289f6: Grp3_1 Eb (1A)
290f7: Grp3_2 Ev (1A)
291f8: CLC
292f9: STC
293fa: CLI
294fb: STI
295fc: CLD
296fd: STD
297fe: Grp4 (1A)
298ff: Grp5 (1A)
299EndTable
300
301Table: 2-byte opcode (0x0f)
302Referrer: 2-byte escape
303AVXcode: 1
304# 0x0f 0x00-0x0f
30500: Grp6 (1A)
30601: Grp7 (1A)
30702: LAR Gv,Ew
30803: LSL Gv,Ew
30904:
31005: SYSCALL (o64)
31106: CLTS
31207: SYSRET (o64)
31308: INVD
31409: WBINVD
3150a:
3160b: UD2 (1B)
3170c:
3180d: NOP Ev | GrpP
3190e: FEMMS
320# 3DNow! uses the last imm byte as opcode extension.
3210f: 3DNow! Pq,Qq,Ib
322# 0x0f 0x10-0x1f
32310: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
32411: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
32512: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
32613: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
32714: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
32815: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
32916: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
33017: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
33118: Grp16 (1A)
33219:
3331a:
3341b:
3351c:
3361d:
3371e:
3381f: NOP Ev
339# 0x0f 0x20-0x2f
34020: MOV Rd,Cd
34121: MOV Rd,Dd
34222: MOV Cd,Rd
34323: MOV Dd,Rd
34424:
34525:
34626:
34727:
34828: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
34929: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
3502a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
3512b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
3522c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
3532d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
3542e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128)
3552f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128)
356# 0x0f 0x30-0x3f
35730: WRMSR
35831: RDTSC
35932: RDMSR
36033: RDPMC
36134: SYSENTER
36235: SYSEXIT
36336:
36437: GETSEC
36538: escape # 3-byte escape 1
36639:
3673a: escape # 3-byte escape 2
3683b:
3693c:
3703d:
3713e:
3723f:
373# 0x0f 0x40-0x4f
37440: CMOVO Gv,Ev
37541: CMOVNO Gv,Ev
37642: CMOVB/C/NAE Gv,Ev
37743: CMOVAE/NB/NC Gv,Ev
37844: CMOVE/Z Gv,Ev
37945: CMOVNE/NZ Gv,Ev
38046: CMOVBE/NA Gv,Ev
38147: CMOVA/NBE Gv,Ev
38248: CMOVS Gv,Ev
38349: CMOVNS Gv,Ev
3844a: CMOVP/PE Gv,Ev
3854b: CMOVNP/PO Gv,Ev
3864c: CMOVL/NGE Gv,Ev
3874d: CMOVNL/GE Gv,Ev
3884e: CMOVLE/NG Gv,Ev
3894f: CMOVNLE/G Gv,Ev
390# 0x0f 0x50-0x5f
39150: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
39251: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
39352: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
39453: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
39554: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
39655: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
39756: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
39857: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
39958: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
40059: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
4015a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
4025b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
4035c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
4045d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
4055e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
4065f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
407# 0x0f 0x60-0x6f
40860: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
40961: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
41062: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
41163: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
41264: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
41365: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
41466: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
41567: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
41668: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
41769: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
4186a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
4196b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
4206c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
4216d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
4226e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
4236f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
424# 0x0f 0x70-0x7f
42570: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
42671: Grp12 (1A)
42772: Grp13 (1A)
42873: Grp14 (1A)
42974: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
43075: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
43176: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
43277: emms/vzeroupper/vzeroall (VEX)
43378: VMREAD Ed/q,Gd/q
43479: VMWRITE Gd/q,Ed/q
4357a:
4367b:
4377c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
4387d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
4397e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
4407f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
441# 0x0f 0x80-0x8f
44280: JO Jz (f64)
44381: JNO Jz (f64)
44482: JB/JNAE/JC Jz (f64)
44583: JNB/JAE/JNC Jz (f64)
44684: JZ/JE Jz (f64)
44785: JNZ/JNE Jz (f64)
44886: JBE/JNA Jz (f64)
44987: JNBE/JA Jz (f64)
45088: JS Jz (f64)
45189: JNS Jz (f64)
4528a: JP/JPE Jz (f64)
4538b: JNP/JPO Jz (f64)
4548c: JL/JNGE Jz (f64)
4558d: JNL/JGE Jz (f64)
4568e: JLE/JNG Jz (f64)
4578f: JNLE/JG Jz (f64)
458# 0x0f 0x90-0x9f
45990: SETO Eb
46091: SETNO Eb
46192: SETB/C/NAE Eb
46293: SETAE/NB/NC Eb
46394: SETE/Z Eb
46495: SETNE/NZ Eb
46596: SETBE/NA Eb
46697: SETA/NBE Eb
46798: SETS Eb
46899: SETNS Eb
4699a: SETP/PE Eb
4709b: SETNP/PO Eb
4719c: SETL/NGE Eb
4729d: SETNL/GE Eb
4739e: SETLE/NG Eb
4749f: SETNLE/G Eb
475# 0x0f 0xa0-0xaf
476a0: PUSH FS (d64)
477a1: POP FS (d64)
478a2: CPUID
479a3: BT Ev,Gv
480a4: SHLD Ev,Gv,Ib
481a5: SHLD Ev,Gv,CL
482a6: GrpPDLK
483a7: GrpRNG
484a8: PUSH GS (d64)
485a9: POP GS (d64)
486aa: RSM
487ab: BTS Ev,Gv
488ac: SHRD Ev,Gv,Ib
489ad: SHRD Ev,Gv,CL
490ae: Grp15 (1A),(1C)
491af: IMUL Gv,Ev
492# 0x0f 0xb0-0xbf
493b0: CMPXCHG Eb,Gb
494b1: CMPXCHG Ev,Gv
495b2: LSS Gv,Mp
496b3: BTR Ev,Gv
497b4: LFS Gv,Mp
498b5: LGS Gv,Mp
499b6: MOVZX Gv,Eb
500b7: MOVZX Gv,Ew
501b8: JMPE | POPCNT Gv,Ev (F3)
502b9: Grp10 (1A)
503ba: Grp8 Ev,Ib (1A)
504bb: BTC Ev,Gv
505bc: BSF Gv,Ev
506bd: BSR Gv,Ev
507be: MOVSX Gv,Eb
508bf: MOVSX Gv,Ew
509# 0x0f 0xc0-0xcf
510c0: XADD Eb,Gb
511c1: XADD Ev,Gv
512c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
513c3: movnti Md/q,Gd/q
514c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
515c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
516c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
517c7: Grp9 (1A)
518c8: BSWAP RAX/EAX/R8/R8D
519c9: BSWAP RCX/ECX/R9/R9D
520ca: BSWAP RDX/EDX/R10/R10D
521cb: BSWAP RBX/EBX/R11/R11D
522cc: BSWAP RSP/ESP/R12/R12D
523cd: BSWAP RBP/EBP/R13/R13D
524ce: BSWAP RSI/ESI/R14/R14D
525cf: BSWAP RDI/EDI/R15/R15D
526# 0x0f 0xd0-0xdf
527d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
528d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
529d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
530d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
531d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
532d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
533d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
534d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
535d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
536d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
537da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
538db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
539dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
540dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
541de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
542df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
543# 0x0f 0xe0-0xef
544e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
545e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
546e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
547e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
548e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
549e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
550e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
551e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
552e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
553e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
554ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
555eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
556ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
557ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
558ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
559ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
560# 0x0f 0xf0-0xff
561f0: lddqu Vdq,Mdq (F2),(VEX)
562f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
563f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
564f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
565f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
566f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
567f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
568f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
569f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
570f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
571fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
572fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
573fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
574fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
575fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
576ff:
577EndTable
578
579Table: 3-byte opcode 1 (0x0f 0x38)
580Referrer: 3-byte escape 1
581AVXcode: 2
582# 0x0f 0x38 0x00-0x0f
58300: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
58401: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
58502: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
58603: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
58704: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
58805: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
58906: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
59007: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
59108: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
59209: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
5930a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
5940b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
5950c: Vpermilps /r (66),(oVEX)
5960d: Vpermilpd /r (66),(oVEX)
5970e: vtestps /r (66),(oVEX)
5980f: vtestpd /r (66),(oVEX)
599# 0x0f 0x38 0x10-0x1f
60010: pblendvb Vdq,Wdq (66)
60111:
60212:
60313:
60414: blendvps Vdq,Wdq (66)
60515: blendvpd Vdq,Wdq (66)
60616:
60717: ptest Vdq,Wdq (66),(VEX)
60818: vbroadcastss /r (66),(oVEX)
60919: vbroadcastsd /r (66),(oVEX),(o256)
6101a: vbroadcastf128 /r (66),(oVEX),(o256)
6111b:
6121c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
6131d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
6141e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
6151f:
616# 0x0f 0x38 0x20-0x2f
61720: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
61821: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
61922: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
62023: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
62124: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
62225: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
62326:
62427:
62528: pmuldq Vdq,Wdq (66),(VEX),(o128)
62629: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
6272a: movntdqa Vdq,Mdq (66),(VEX),(o128)
6282b: packusdw Vdq,Wdq (66),(VEX),(o128)
6292c: vmaskmovps(ld) /r (66),(oVEX)
6302d: vmaskmovpd(ld) /r (66),(oVEX)
6312e: vmaskmovps(st) /r (66),(oVEX)
6322f: vmaskmovpd(st) /r (66),(oVEX)
633# 0x0f 0x38 0x30-0x3f
63430: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
63531: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
63632: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
63733: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
63834: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
63935: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
64036:
64137: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
64238: pminsb Vdq,Wdq (66),(VEX),(o128)
64339: pminsd Vdq,Wdq (66),(VEX),(o128)
6443a: pminuw Vdq,Wdq (66),(VEX),(o128)
6453b: pminud Vdq,Wdq (66),(VEX),(o128)
6463c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
6473d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
6483e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
6493f: pmaxud Vdq,Wdq (66),(VEX),(o128)
650# 0x0f 0x38 0x40-0x8f
65140: pmulld Vdq,Wdq (66),(VEX),(o128)
65241: phminposuw Vdq,Wdq (66),(VEX),(o128)
65380: INVEPT Gd/q,Mdq (66)
65481: INVPID Gd/q,Mdq (66)
655# 0x0f 0x38 0x90-0xbf (FMA)
65696: vfmaddsub132pd/ps /r (66),(VEX)
65797: vfmsubadd132pd/ps /r (66),(VEX)
65898: vfmadd132pd/ps /r (66),(VEX)
65999: vfmadd132sd/ss /r (66),(VEX),(o128)
6609a: vfmsub132pd/ps /r (66),(VEX)
6619b: vfmsub132sd/ss /r (66),(VEX),(o128)
6629c: vfnmadd132pd/ps /r (66),(VEX)
6639d: vfnmadd132sd/ss /r (66),(VEX),(o128)
6649e: vfnmsub132pd/ps /r (66),(VEX)
6659f: vfnmsub132sd/ss /r (66),(VEX),(o128)
666a6: vfmaddsub213pd/ps /r (66),(VEX)
667a7: vfmsubadd213pd/ps /r (66),(VEX)
668a8: vfmadd213pd/ps /r (66),(VEX)
669a9: vfmadd213sd/ss /r (66),(VEX),(o128)
670aa: vfmsub213pd/ps /r (66),(VEX)
671ab: vfmsub213sd/ss /r (66),(VEX),(o128)
672ac: vfnmadd213pd/ps /r (66),(VEX)
673ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
674ae: vfnmsub213pd/ps /r (66),(VEX)
675af: vfnmsub213sd/ss /r (66),(VEX),(o128)
676b6: vfmaddsub231pd/ps /r (66),(VEX)
677b7: vfmsubadd231pd/ps /r (66),(VEX)
678b8: vfmadd231pd/ps /r (66),(VEX)
679b9: vfmadd231sd/ss /r (66),(VEX),(o128)
680ba: vfmsub231pd/ps /r (66),(VEX)
681bb: vfmsub231sd/ss /r (66),(VEX),(o128)
682bc: vfnmadd231pd/ps /r (66),(VEX)
683bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
684be: vfnmsub231pd/ps /r (66),(VEX)
685bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
686# 0x0f 0x38 0xc0-0xff
687db: aesimc Vdq,Wdq (66),(VEX),(o128)
688dc: aesenc Vdq,Wdq (66),(VEX),(o128)
689dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
690de: aesdec Vdq,Wdq (66),(VEX),(o128)
691df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
692f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
693f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
694EndTable
695
696Table: 3-byte opcode 2 (0x0f 0x3a)
697Referrer: 3-byte escape 2
698AVXcode: 3
699# 0x0f 0x3a 0x00-0xff
70004: vpermilps /r,Ib (66),(oVEX)
70105: vpermilpd /r,Ib (66),(oVEX)
70206: vperm2f128 /r,Ib (66),(oVEX),(o256)
70308: roundps Vdq,Wdq,Ib (66),(VEX)
70409: roundpd Vdq,Wdq,Ib (66),(VEX)
7050a: roundss Vss,Wss,Ib (66),(VEX),(o128)
7060b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
7070c: blendps Vdq,Wdq,Ib (66),(VEX)
7080d: blendpd Vdq,Wdq,Ib (66),(VEX)
7090e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
7100f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
71114: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
71215: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
71316: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
71417: extractps Ed,Vdq,Ib (66),(VEX),(o128)
71518: vinsertf128 /r,Ib (66),(oVEX),(o256)
71619: vextractf128 /r,Ib (66),(oVEX),(o256)
71720: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
71821: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
71922: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
72040: dpps Vdq,Wdq,Ib (66),(VEX)
72141: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
72242: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
72344: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
7244a: vblendvps /r,Ib (66),(oVEX)
7254b: vblendvpd /r,Ib (66),(oVEX)
7264c: vpblendvb /r,Ib (66),(oVEX),(o128)
72760: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
72861: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
72962: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
73063: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
731df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
732EndTable
733
734GrpTable: Grp1
7350: ADD
7361: OR
7372: ADC
7383: SBB
7394: AND
7405: SUB
7416: XOR
7427: CMP
743EndTable
744
745GrpTable: Grp1A
7460: POP
747EndTable
748
749GrpTable: Grp2
7500: ROL
7511: ROR
7522: RCL
7533: RCR
7544: SHL/SAL
7555: SHR
7566:
7577: SAR
758EndTable
759
760GrpTable: Grp3_1
7610: TEST Eb,Ib
7621:
7632: NOT Eb
7643: NEG Eb
7654: MUL AL,Eb
7665: IMUL AL,Eb
7676: DIV AL,Eb
7687: IDIV AL,Eb
769EndTable
770
771GrpTable: Grp3_2
7720: TEST Ev,Iz
7731:
7742: NOT Ev
7753: NEG Ev
7764: MUL rAX,Ev
7775: IMUL rAX,Ev
7786: DIV rAX,Ev
7797: IDIV rAX,Ev
780EndTable
781
782GrpTable: Grp4
7830: INC Eb
7841: DEC Eb
785EndTable
786
787GrpTable: Grp5
7880: INC Ev
7891: DEC Ev
7902: CALLN Ev (f64)
7913: CALLF Ep
7924: JMPN Ev (f64)
7935: JMPF Ep
7946: PUSH Ev (d64)
7957:
796EndTable
797
798GrpTable: Grp6
7990: SLDT Rv/Mw
8001: STR Rv/Mw
8012: LLDT Ew
8023: LTR Ew
8034: VERR Ew
8045: VERW Ew
805EndTable
806
807GrpTable: Grp7
8080: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8091: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
8102: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
8113: LIDT Ms
8124: SMSW Mw/Rv
8135:
8146: LMSW Ew
8157: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
816EndTable
817
818GrpTable: Grp8
8194: BT
8205: BTS
8216: BTR
8227: BTC
823EndTable
824
825GrpTable: Grp9
8261: CMPXCHG8B/16B Mq/Mdq
8276: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
8287: VMPTRST Mq
829EndTable
830
831GrpTable: Grp10
832EndTable
833
834GrpTable: Grp11
8350: MOV
836EndTable
837
838GrpTable: Grp12
8392: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
8404: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
8416: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
842EndTable
843
844GrpTable: Grp13
8452: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
8464: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
8476: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
848EndTable
849
850GrpTable: Grp14
8512: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
8523: psrldq Udq,Ib (66),(11B),(VEX),(o128)
8536: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
8547: pslldq Udq,Ib (66),(11B),(VEX),(o128)
855EndTable
856
857GrpTable: Grp15
8580: fxsave
8591: fxstor
8602: ldmxcsr (VEX)
8613: stmxcsr (VEX)
8624: XSAVE
8635: XRSTOR | lfence (11B)
8646: mfence (11B)
8657: clflush | sfence (11B)
866EndTable
867
868GrpTable: Grp16
8690: prefetch NTA
8701: prefetch T0
8712: prefetch T1
8723: prefetch T2
873EndTable
874
875# AMD's Prefetch Group
876GrpTable: GrpP
8770: PREFETCH
8781: PREFETCHW
879EndTable
880
881GrpTable: GrpPDLK
8820: MONTMUL
8831: XSHA1
8842: XSHA2
885EndTable
886
887GrpTable: GrpRNG
8880: xstore-rng
8891: xcrypt-ecb
8902: xcrypt-cbc
8914: xcrypt-cfb
8925: xcrypt-ofb
893EndTable
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f4cee9028cf0..8f4e2ac93928 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
38 * Returns 0 if mmiotrace is disabled, or if the fault is not 38 * Returns 0 if mmiotrace is disabled, or if the fault is not
39 * handled by mmiotrace: 39 * handled by mmiotrace:
40 */ 40 */
41static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 41static inline int __kprobes
42kmmio_fault(struct pt_regs *regs, unsigned long addr)
42{ 43{
43 if (unlikely(is_kmmio_active())) 44 if (unlikely(is_kmmio_active()))
44 if (kmmio_handler(regs, addr) == 1) 45 if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
46 return 0; 47 return 0;
47} 48}
48 49
49static inline int notify_page_fault(struct pt_regs *regs) 50static inline int __kprobes notify_page_fault(struct pt_regs *regs)
50{ 51{
51 int ret = 0; 52 int ret = 0;
52 53
@@ -240,7 +241,7 @@ void vmalloc_sync_all(void)
240 * 241 *
241 * Handle a fault on the vmalloc or module mapping area 242 * Handle a fault on the vmalloc or module mapping area
242 */ 243 */
243static noinline int vmalloc_fault(unsigned long address) 244static noinline __kprobes int vmalloc_fault(unsigned long address)
244{ 245{
245 unsigned long pgd_paddr; 246 unsigned long pgd_paddr;
246 pmd_t *pmd_k; 247 pmd_t *pmd_k;
@@ -357,7 +358,7 @@ void vmalloc_sync_all(void)
357 * 358 *
358 * This assumes no large pages in there. 359 * This assumes no large pages in there.
359 */ 360 */
360static noinline int vmalloc_fault(unsigned long address) 361static noinline __kprobes int vmalloc_fault(unsigned long address)
361{ 362{
362 pgd_t *pgd, *pgd_ref; 363 pgd_t *pgd, *pgd_ref;
363 pud_t *pud, *pud_ref; 364 pud_t *pud, *pud_ref;
@@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
860 * There are no security implications to leaving a stale TLB when 861 * There are no security implications to leaving a stale TLB when
861 * increasing the permissions on a page. 862 * increasing the permissions on a page.
862 */ 863 */
863static noinline int 864static noinline __kprobes int
864spurious_fault(unsigned long error_code, unsigned long address) 865spurious_fault(unsigned long error_code, unsigned long address)
865{ 866{
866 pgd_t *pgd; 867 pgd_t *pgd;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
540 struct die_args *arg = args; 540 struct die_args *arg = args;
541 541
542 if (val == DIE_DEBUG && (arg->err & DR_STEP)) 542 if (val == DIE_DEBUG && (arg->err & DR_STEP))
543 if (post_kmmio_handler(arg->err, arg->regs) == 1) 543 if (post_kmmio_handler(arg->err, arg->regs) == 1) {
544 /*
545 * Reset the BS bit in dr6 (pointed by args->err) to
546 * denote completion of processing
547 */
548 (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
544 return NOTIFY_STOP; 549 return NOTIFY_STOP;
550 }
545 551
546 return NOTIFY_DONE; 552 return NOTIFY_DONE;
547} 553}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f17667e..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
18#include <asm/mce.h> 18#include <asm/mce.h>
19#include <asm/xcr.h> 19#include <asm/xcr.h>
20#include <asm/suspend.h> 20#include <asm/suspend.h>
21#include <asm/debugreg.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23static struct saved_context saved_context; 24static struct saved_context saved_context;
@@ -142,31 +143,6 @@ static void fix_processor_context(void)
142#endif 143#endif
143 load_TR_desc(); /* This does ltr */ 144 load_TR_desc(); /* This does ltr */
144 load_LDT(&current->active_mm->context); /* This does lldt */ 145 load_LDT(&current->active_mm->context); /* This does lldt */
145
146 /*
147 * Now maybe reload the debug registers
148 */
149 if (current->thread.debugreg7) {
150#ifdef CONFIG_X86_32
151 set_debugreg(current->thread.debugreg0, 0);
152 set_debugreg(current->thread.debugreg1, 1);
153 set_debugreg(current->thread.debugreg2, 2);
154 set_debugreg(current->thread.debugreg3, 3);
155 /* no 4 and 5 */
156 set_debugreg(current->thread.debugreg6, 6);
157 set_debugreg(current->thread.debugreg7, 7);
158#else
159 /* CONFIG_X86_64 */
160 loaddebug(&current->thread, 0);
161 loaddebug(&current->thread, 1);
162 loaddebug(&current->thread, 2);
163 loaddebug(&current->thread, 3);
164 /* no 4 and 5 */
165 loaddebug(&current->thread, 6);
166 loaddebug(&current->thread, 7);
167#endif
168 }
169
170} 146}
171 147
172/** 148/**
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
new file mode 100644
index 000000000000..f82082677337
--- /dev/null
+++ b/arch/x86/tools/Makefile
@@ -0,0 +1,31 @@
1PHONY += posttest
2
3ifeq ($(KBUILD_VERBOSE),1)
4 posttest_verbose = -v
5else
6 posttest_verbose =
7endif
8
9ifeq ($(CONFIG_64BIT),y)
10 posttest_64bit = -y
11else
12 posttest_64bit = -n
13endif
14
15distill_awk = $(srctree)/arch/x86/tools/distill.awk
16chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
17
18quiet_cmd_posttest = TEST $@
19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
20
21posttest: $(obj)/test_get_len vmlinux
22 $(call cmd,posttest)
23
24hostprogs-y := test_get_len
25
26# -I needed for generated C source and C source which in the kernel tree.
27HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
28
29# Dependencies are also needed.
30$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
31
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
new file mode 100644
index 000000000000..0d13cd9fdcff
--- /dev/null
+++ b/arch/x86/tools/chkobjdump.awk
@@ -0,0 +1,23 @@
1# GNU objdump version checker
2#
3# Usage:
4# objdump -v | awk -f chkobjdump.awk
5BEGIN {
6 # objdump version 2.19 or later is OK for the test.
7 od_ver = 2;
8 od_sver = 19;
9}
10
11/^GNU/ {
12 split($4, ver, ".");
13 if (ver[1] > od_ver ||
14 (ver[1] == od_ver && ver[2] >= od_sver)) {
15 exit 1;
16 } else {
17 printf("Warning: objdump version %s is older than %d.%d\n",
18 $4, od_ver, od_sver);
19 print("Warning: Skipping posttest.");
20 # Logic is inverted, because we just skip test without error.
21 exit 0;
22 }
23}
diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk
new file mode 100644
index 000000000000..c13c0ee48ab4
--- /dev/null
+++ b/arch/x86/tools/distill.awk
@@ -0,0 +1,47 @@
1#!/bin/awk -f
2# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
3# Distills the disassembly as follows:
4# - Removes all lines except the disassembled instructions.
5# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes
6# into a single line.
7# - Remove bad(or prefix only) instructions
8
9BEGIN {
10 prev_addr = ""
11 prev_hex = ""
12 prev_mnemonic = ""
13 bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))"
14 fwait_expr = "^9b "
15 fwait_str="9b\tfwait"
16}
17
18/^ *[0-9a-f]+ <[^>]*>:/ {
19 # Symbol entry
20 printf("%s%s\n", $2, $1)
21}
22
23/^ *[0-9a-f]+:/ {
24 if (split($0, field, "\t") < 3) {
25 # This is a continuation of the same insn.
26 prev_hex = prev_hex field[2]
27 } else {
28 # Skip bad instructions
29 if (match(prev_mnemonic, bad_expr))
30 prev_addr = ""
31 # Split fwait from other f* instructions
32 if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") {
33 printf "%s\t%s\n", prev_addr, fwait_str
34 sub(fwait_expr, "", prev_hex)
35 }
36 if (prev_addr != "")
37 printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
38 prev_addr = field[1]
39 prev_hex = field[2]
40 prev_mnemonic = field[3]
41 }
42}
43
44END {
45 if (prev_addr != "")
46 printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic
47}
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644
index 000000000000..e34e92a28eb6
--- /dev/null
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -0,0 +1,380 @@
1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9 if (!match("abc", "[[:lower:]]+"))
10 return "Your awk doesn't support charactor-class."
11 if (sprintf("%x", 0) != "0")
12 return "Your awk has a printf-format problem."
13 return ""
14}
15
16# Clear working vars
17function clear_vars() {
18 delete table
19 delete lptable2
20 delete lptable1
21 delete lptable3
22 eid = -1 # escape id
23 gid = -1 # group id
24 aid = -1 # AVX id
25 tname = ""
26}
27
28BEGIN {
29 # Implementation error checking
30 awkchecked = check_awk_implement()
31 if (awkchecked != "") {
32 print "Error: " awkchecked > "/dev/stderr"
33 print "Please try to use gawk." > "/dev/stderr"
34 exit 1
35 }
36
37 # Setup generating tables
38 print "/* x86 opcode map generated from x86-opcode-map.txt */"
39 print "/* Do not change this code. */\n"
40 ggid = 1
41 geid = 1
42 gaid = 0
43 delete etable
44 delete gtable
45 delete atable
46
47 opnd_expr = "^[[:alpha:]/]"
48 ext_expr = "^\\("
49 sep_expr = "^\\|$"
50 group_expr = "^Grp[[:alnum:]]+"
51
52 imm_expr = "^[IJAO][[:lower:]]"
53 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
55 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
56 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
57 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
58 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
59 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
60 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
61 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
62 imm_flag["Ob"] = "INAT_MOFFSET"
63 imm_flag["Ov"] = "INAT_MOFFSET"
64
65 modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])"
66 force64_expr = "\\([df]64\\)"
67 rex_expr = "^REX(\\.[XRWB]+)*"
68 fpu_expr = "^ESC" # TODO
69
70 lprefix1_expr = "\\(66\\)"
71 lprefix2_expr = "\\(F3\\)"
72 lprefix3_expr = "\\(F2\\)"
73 max_lprefix = 4
74
75 vexok_expr = "\\(VEX\\)"
76 vexonly_expr = "\\(oVEX\\)"
77
78 prefix_expr = "\\(Prefix\\)"
79 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
80 prefix_num["REPNE"] = "INAT_PFX_REPNE"
81 prefix_num["REP/REPE"] = "INAT_PFX_REPE"
82 prefix_num["LOCK"] = "INAT_PFX_LOCK"
83 prefix_num["SEG=CS"] = "INAT_PFX_CS"
84 prefix_num["SEG=DS"] = "INAT_PFX_DS"
85 prefix_num["SEG=ES"] = "INAT_PFX_ES"
86 prefix_num["SEG=FS"] = "INAT_PFX_FS"
87 prefix_num["SEG=GS"] = "INAT_PFX_GS"
88 prefix_num["SEG=SS"] = "INAT_PFX_SS"
89 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
90 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
91 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
92
93 clear_vars()
94}
95
96function semantic_error(msg) {
97 print "Semantic error at " NR ": " msg > "/dev/stderr"
98 exit 1
99}
100
101function debug(msg) {
102 print "DEBUG: " msg
103}
104
105function array_size(arr, i,c) {
106 c = 0
107 for (i in arr)
108 c++
109 return c
110}
111
112/^Table:/ {
113 print "/* " $0 " */"
114 if (tname != "")
115 semantic_error("Hit Table: before EndTable:.");
116}
117
118/^Referrer:/ {
119 if (NF != 1) {
120 # escape opcode table
121 ref = ""
122 for (i = 2; i <= NF; i++)
123 ref = ref $i
124 eid = escape[ref]
125 tname = sprintf("inat_escape_table_%d", eid)
126 }
127}
128
129/^AVXcode:/ {
130 if (NF != 1) {
131 # AVX/escape opcode table
132 aid = $2
133 if (gaid <= aid)
134 gaid = aid + 1
135 if (tname == "") # AVX only opcode table
136 tname = sprintf("inat_avx_table_%d", $2)
137 }
138 if (aid == -1 && eid == -1) # primary opcode table
139 tname = "inat_primary_table"
140}
141
142/^GrpTable:/ {
143 print "/* " $0 " */"
144 if (!($2 in group))
145 semantic_error("No group: " $2 )
146 gid = group[$2]
147 tname = "inat_group_table_" gid
148}
149
150function print_table(tbl,name,fmt,n)
151{
152 print "const insn_attr_t " name " = {"
153 for (i = 0; i < n; i++) {
154 id = sprintf(fmt, i)
155 if (tbl[id])
156 print " [" id "] = " tbl[id] ","
157 }
158 print "};"
159}
160
161/^EndTable/ {
162 if (gid != -1) {
163 # print group tables
164 if (array_size(table) != 0) {
165 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
166 "0x%x", 8)
167 gtable[gid,0] = tname
168 }
169 if (array_size(lptable1) != 0) {
170 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
171 "0x%x", 8)
172 gtable[gid,1] = tname "_1"
173 }
174 if (array_size(lptable2) != 0) {
175 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
176 "0x%x", 8)
177 gtable[gid,2] = tname "_2"
178 }
179 if (array_size(lptable3) != 0) {
180 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
181 "0x%x", 8)
182 gtable[gid,3] = tname "_3"
183 }
184 } else {
185 # print primary/escaped tables
186 if (array_size(table) != 0) {
187 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
188 "0x%02x", 256)
189 etable[eid,0] = tname
190 if (aid >= 0)
191 atable[aid,0] = tname
192 }
193 if (array_size(lptable1) != 0) {
194 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
195 "0x%02x", 256)
196 etable[eid,1] = tname "_1"
197 if (aid >= 0)
198 atable[aid,1] = tname "_1"
199 }
200 if (array_size(lptable2) != 0) {
201 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
202 "0x%02x", 256)
203 etable[eid,2] = tname "_2"
204 if (aid >= 0)
205 atable[aid,2] = tname "_2"
206 }
207 if (array_size(lptable3) != 0) {
208 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
209 "0x%02x", 256)
210 etable[eid,3] = tname "_3"
211 if (aid >= 0)
212 atable[aid,3] = tname "_3"
213 }
214 }
215 print ""
216 clear_vars()
217}
218
219function add_flags(old,new) {
220 if (old && new)
221 return old " | " new
222 else if (old)
223 return old
224 else
225 return new
226}
227
228# convert operands to flags.
229function convert_operands(opnd, i,imm,mod)
230{
231 imm = null
232 mod = null
233 for (i in opnd) {
234 i = opnd[i]
235 if (match(i, imm_expr) == 1) {
236 if (!imm_flag[i])
237 semantic_error("Unknown imm opnd: " i)
238 if (imm) {
239 if (i != "Ib")
240 semantic_error("Second IMM error")
241 imm = add_flags(imm, "INAT_SCNDIMM")
242 } else
243 imm = imm_flag[i]
244 } else if (match(i, modrm_expr))
245 mod = "INAT_MODRM"
246 }
247 return add_flags(imm, mod)
248}
249
250/^[0-9a-f]+\:/ {
251 if (NR == 1)
252 next
253 # get index
254 idx = "0x" substr($1, 1, index($1,":") - 1)
255 if (idx in table)
256 semantic_error("Redefine " idx " in " tname)
257
258 # check if escaped opcode
259 if ("escape" == $2) {
260 if ($3 != "#")
261 semantic_error("No escaped name")
262 ref = ""
263 for (i = 4; i <= NF; i++)
264 ref = ref $i
265 if (ref in escape)
266 semantic_error("Redefine escape (" ref ")")
267 escape[ref] = geid
268 geid++
269 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
270 next
271 }
272
273 variant = null
274 # converts
275 i = 2
276 while (i <= NF) {
277 opcode = $(i++)
278 delete opnds
279 ext = null
280 flags = null
281 opnd = null
282 # parse one opcode
283 if (match($i, opnd_expr)) {
284 opnd = $i
285 split($(i++), opnds, ",")
286 flags = convert_operands(opnds)
287 }
288 if (match($i, ext_expr))
289 ext = $(i++)
290 if (match($i, sep_expr))
291 i++
292 else if (i < NF)
293 semantic_error($i " is not a separator")
294
295 # check if group opcode
296 if (match(opcode, group_expr)) {
297 if (!(opcode in group)) {
298 group[opcode] = ggid
299 ggid++
300 }
301 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
302 }
303 # check force(or default) 64bit
304 if (match(ext, force64_expr))
305 flags = add_flags(flags, "INAT_FORCE64")
306
307 # check REX prefix
308 if (match(opcode, rex_expr))
309 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
310
311 # check coprocessor escape : TODO
312 if (match(opcode, fpu_expr))
313 flags = add_flags(flags, "INAT_MODRM")
314
315 # check VEX only code
316 if (match(ext, vexonly_expr))
317 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
318
319 # check VEX only code
320 if (match(ext, vexok_expr))
321 flags = add_flags(flags, "INAT_VEXOK")
322
323 # check prefixes
324 if (match(ext, prefix_expr)) {
325 if (!prefix_num[opcode])
326 semantic_error("Unknown prefix: " opcode)
327 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
328 }
329 if (length(flags) == 0)
330 continue
331 # check if last prefix
332 if (match(ext, lprefix1_expr)) {
333 lptable1[idx] = add_flags(lptable1[idx],flags)
334 variant = "INAT_VARIANT"
335 } else if (match(ext, lprefix2_expr)) {
336 lptable2[idx] = add_flags(lptable2[idx],flags)
337 variant = "INAT_VARIANT"
338 } else if (match(ext, lprefix3_expr)) {
339 lptable3[idx] = add_flags(lptable3[idx],flags)
340 variant = "INAT_VARIANT"
341 } else {
342 table[idx] = add_flags(table[idx],flags)
343 }
344 }
345 if (variant)
346 table[idx] = add_flags(table[idx],variant)
347}
348
349END {
350 if (awkchecked != "")
351 exit 1
352 # print escape opcode map's array
353 print "/* Escape opcode map array */"
354 print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
355 "[INAT_LSTPFX_MAX + 1] = {"
356 for (i = 0; i < geid; i++)
357 for (j = 0; j < max_lprefix; j++)
358 if (etable[i,j])
359 print " ["i"]["j"] = "etable[i,j]","
360 print "};\n"
361 # print group opcode map's array
362 print "/* Group opcode map array */"
363 print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
364 "[INAT_LSTPFX_MAX + 1] = {"
365 for (i = 0; i < ggid; i++)
366 for (j = 0; j < max_lprefix; j++)
367 if (gtable[i,j])
368 print " ["i"]["j"] = "gtable[i,j]","
369 print "};\n"
370 # print AVX opcode map's array
371 print "/* AVX opcode map array */"
372 print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
373 "[INAT_LSTPFX_MAX + 1] = {"
374 for (i = 0; i < gaid; i++)
375 for (j = 0; j < max_lprefix; j++)
376 if (atable[i,j])
377 print " ["i"]["j"] = "atable[i,j]","
378 print "};"
379}
380
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
new file mode 100644
index 000000000000..d8214dc03fa7
--- /dev/null
+++ b/arch/x86/tools/test_get_len.c
@@ -0,0 +1,173 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2009
17 */
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23#include <unistd.h>
24
25#define unlikely(cond) (cond)
26
27#include <asm/insn.h>
28#include <inat.c>
29#include <insn.c>
30
31/*
32 * Test of instruction analysis in general and insn_get_length() in
33 * particular. See if insn_get_length() and the disassembler agree
34 * on the length of each instruction in an elf disassembly.
35 *
36 * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len
37 */
38
39const char *prog;
40static int verbose;
41static int x86_64;
42
43static void usage(void)
44{
45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
46 " %s [-y|-n] [-v] \n", prog);
47 fprintf(stderr, "\t-y 64bit mode\n");
48 fprintf(stderr, "\t-n 32bit mode\n");
49 fprintf(stderr, "\t-v verbose mode\n");
50 exit(1);
51}
52
53static void malformed_line(const char *line, int line_nr)
54{
55 fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line);
56 exit(3);
57}
58
59static void dump_field(FILE *fp, const char *name, const char *indent,
60 struct insn_field *field)
61{
62 fprintf(fp, "%s.%s = {\n", indent, name);
63 fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
64 indent, field->value, field->bytes[0], field->bytes[1],
65 field->bytes[2], field->bytes[3]);
66 fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
67 field->got, field->nbytes);
68}
69
70static void dump_insn(FILE *fp, struct insn *insn)
71{
72 fprintf(fp, "Instruction = { \n");
73 dump_field(fp, "prefixes", "\t", &insn->prefixes);
74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
76 dump_field(fp, "opcode", "\t", &insn->opcode);
77 dump_field(fp, "modrm", "\t", &insn->modrm);
78 dump_field(fp, "sib", "\t", &insn->sib);
79 dump_field(fp, "displacement", "\t", &insn->displacement);
80 dump_field(fp, "immediate1", "\t", &insn->immediate1);
81 dump_field(fp, "immediate2", "\t", &insn->immediate2);
82 fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
83 insn->attr, insn->opnd_bytes, insn->addr_bytes);
84 fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
85 insn->length, insn->x86_64, insn->kaddr);
86}
87
88static void parse_args(int argc, char **argv)
89{
90 int c;
91 prog = argv[0];
92 while ((c = getopt(argc, argv, "ynv")) != -1) {
93 switch (c) {
94 case 'y':
95 x86_64 = 1;
96 break;
97 case 'n':
98 x86_64 = 0;
99 break;
100 case 'v':
101 verbose = 1;
102 break;
103 default:
104 usage();
105 }
106 }
107}
108
109#define BUFSIZE 256
110
111int main(int argc, char **argv)
112{
113 char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
114 unsigned char insn_buf[16];
115 struct insn insn;
116 int insns = 0, c;
117 int warnings = 0;
118
119 parse_args(argc, argv);
120
121 while (fgets(line, BUFSIZE, stdin)) {
122 char copy[BUFSIZE], *s, *tab1, *tab2;
123 int nb = 0;
124 unsigned int b;
125
126 if (line[0] == '<') {
127 /* Symbol line */
128 strcpy(sym, line);
129 continue;
130 }
131
132 insns++;
133 memset(insn_buf, 0, 16);
134 strcpy(copy, line);
135 tab1 = strchr(copy, '\t');
136 if (!tab1)
137 malformed_line(line, insns);
138 s = tab1 + 1;
139 s += strspn(s, " ");
140 tab2 = strchr(s, '\t');
141 if (!tab2)
142 malformed_line(line, insns);
143 *tab2 = '\0'; /* Characters beyond tab2 aren't examined */
144 while (s < tab2) {
145 if (sscanf(s, "%x", &b) == 1) {
146 insn_buf[nb++] = (unsigned char) b;
147 s += 3;
148 } else
149 break;
150 }
151 /* Decode an instruction */
152 insn_init(&insn, insn_buf, x86_64);
153 insn_get_length(&insn);
154 if (insn.length != nb) {
155 warnings++;
156 fprintf(stderr, "Warning: %s found difference at %s\n",
157 prog, sym);
158 fprintf(stderr, "Warning: %s", line);
159 fprintf(stderr, "Warning: objdump says %d bytes, but "
160 "insn_get_length() says %d\n", nb,
161 insn.length);
162 if (verbose)
163 dump_insn(stderr, &insn);
164 }
165 }
166 if (warnings)
167 fprintf(stderr, "Warning: decoded and checked %d"
168 " instructions with %d warnings\n", insns, warnings);
169 else
170 fprintf(stderr, "Succeed: decoded and checked %d"
171 " instructions\n", insns);
172 return 0;
173}
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 713ed7d37247..689cc6a6214d 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,7 +3,6 @@
3 3
4static bool report_gart_errors; 4static bool report_gart_errors;
5static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); 5static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
6static void (*orig_mce_callback)(struct mce *m);
7 6
8void amd_report_gart_errors(bool v) 7void amd_report_gart_errors(bool v)
9{ 8{
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
363 pr_warning("Huh? Unknown MCE error 0x%x\n", ec); 362 pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
364} 363}
365 364
366static void amd_decode_mce(struct mce *m) 365static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
366 void *data)
367{ 367{
368 struct mce *m = (struct mce *)data;
368 struct err_regs regs; 369 struct err_regs regs;
369 int node, ecc; 370 int node, ecc;
370 371
@@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
420 } 421 }
421 422
422 amd_decode_err_code(m->status & 0xffff); 423 amd_decode_err_code(m->status & 0xffff);
424
425 return NOTIFY_STOP;
423} 426}
424 427
428static struct notifier_block amd_mce_dec_nb = {
429 .notifier_call = amd_decode_mce,
430};
431
425static int __init mce_amd_init(void) 432static int __init mce_amd_init(void)
426{ 433{
427 /* 434 /*
428 * We can decode MCEs for Opteron and later CPUs: 435 * We can decode MCEs for Opteron and later CPUs:
429 */ 436 */
430 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && 437 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
431 (boot_cpu_data.x86 >= 0xf)) { 438 (boot_cpu_data.x86 >= 0xf))
432 /* safe the default decode mce callback */ 439 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
433 orig_mce_callback = x86_mce_decode_callback;
434
435 x86_mce_decode_callback = amd_decode_mce;
436 }
437 440
438 return 0; 441 return 0;
439} 442}
@@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
442#ifdef MODULE 445#ifdef MODULE
443static void __exit mce_amd_exit(void) 446static void __exit mce_amd_exit(void)
444{ 447{
445 x86_mce_decode_callback = orig_mce_callback; 448 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
446} 449}
447 450
448MODULE_DESCRIPTION("AMD MCE decoder"); 451MODULE_DESCRIPTION("AMD MCE decoder");
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4ec5e67e18cf..47bbdf9c38d0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -117,12 +117,12 @@ struct ftrace_event_call {
117 struct dentry *dir; 117 struct dentry *dir;
118 struct trace_event *event; 118 struct trace_event *event;
119 int enabled; 119 int enabled;
120 int (*regfunc)(void *); 120 int (*regfunc)(struct ftrace_event_call *);
121 void (*unregfunc)(void *); 121 void (*unregfunc)(struct ftrace_event_call *);
122 int id; 122 int id;
123 int (*raw_init)(void); 123 int (*raw_init)(struct ftrace_event_call *);
124 int (*show_format)(struct ftrace_event_call *call, 124 int (*show_format)(struct ftrace_event_call *,
125 struct trace_seq *s); 125 struct trace_seq *);
126 int (*define_fields)(struct ftrace_event_call *); 126 int (*define_fields)(struct ftrace_event_call *);
127 struct list_head fields; 127 struct list_head fields;
128 int filter_active; 128 int filter_active;
@@ -131,20 +131,20 @@ struct ftrace_event_call {
131 void *data; 131 void *data;
132 132
133 atomic_t profile_count; 133 atomic_t profile_count;
134 int (*profile_enable)(void); 134 int (*profile_enable)(struct ftrace_event_call *);
135 void (*profile_disable)(void); 135 void (*profile_disable)(struct ftrace_event_call *);
136}; 136};
137 137
138#define FTRACE_MAX_PROFILE_SIZE 2048 138#define FTRACE_MAX_PROFILE_SIZE 2048
139 139
140extern char *trace_profile_buf; 140extern char *perf_trace_buf;
141extern char *trace_profile_buf_nmi; 141extern char *perf_trace_buf_nmi;
142 142
143#define MAX_FILTER_PRED 32 143#define MAX_FILTER_PRED 32
144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
145 145
146extern void destroy_preds(struct ftrace_event_call *call); 146extern void destroy_preds(struct ftrace_event_call *call);
147extern int filter_match_preds(struct ftrace_event_call *call, void *rec); 147extern int filter_match_preds(struct event_filter *filter, void *rec);
148extern int filter_current_check_discard(struct ring_buffer *buffer, 148extern int filter_current_check_discard(struct ring_buffer *buffer,
149 struct ftrace_event_call *call, 149 struct ftrace_event_call *call,
150 void *rec, 150 void *rec,
@@ -157,11 +157,12 @@ enum {
157 FILTER_PTR_STRING, 157 FILTER_PTR_STRING,
158}; 158};
159 159
160extern int trace_define_field(struct ftrace_event_call *call,
161 const char *type, const char *name,
162 int offset, int size, int is_signed,
163 int filter_type);
164extern int trace_define_common_fields(struct ftrace_event_call *call); 160extern int trace_define_common_fields(struct ftrace_event_call *call);
161extern int trace_define_field(struct ftrace_event_call *call, const char *type,
162 const char *name, int offset, int size,
163 int is_signed, int filter_type);
164extern int trace_add_event_call(struct ftrace_event_call *call);
165extern void trace_remove_event_call(struct ftrace_event_call *call);
165 166
166#define is_signed_type(type) (((type)(-1)) < 0) 167#define is_signed_type(type) (((type)(-1)) < 0)
167 168
@@ -186,4 +187,13 @@ do { \
186 __trace_printk(ip, fmt, ##args); \ 187 __trace_printk(ip, fmt, ##args); \
187} while (0) 188} while (0)
188 189
190#ifdef CONFIG_EVENT_PROFILE
191struct perf_event;
192extern int ftrace_profile_enable(int event_id);
193extern void ftrace_profile_disable(int event_id);
194extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
195 char *filter_str);
196extern void ftrace_profile_free_filter(struct perf_event *event);
197#endif
198
189#endif /* _LINUX_FTRACE_EVENT_H */ 199#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..a03daed08c59
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,131 @@
1#ifndef _LINUX_HW_BREAKPOINT_H
2#define _LINUX_HW_BREAKPOINT_H
3
4enum {
5 HW_BREAKPOINT_LEN_1 = 1,
6 HW_BREAKPOINT_LEN_2 = 2,
7 HW_BREAKPOINT_LEN_4 = 4,
8 HW_BREAKPOINT_LEN_8 = 8,
9};
10
11enum {
12 HW_BREAKPOINT_R = 1,
13 HW_BREAKPOINT_W = 2,
14 HW_BREAKPOINT_X = 4,
15};
16
17#ifdef __KERNEL__
18
19#include <linux/perf_event.h>
20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22
23/* As it's for in-kernel or ptrace use, we want it to be pinned */
24#define DEFINE_BREAKPOINT_ATTR(name) \
25struct perf_event_attr name = { \
26 .type = PERF_TYPE_BREAKPOINT, \
27 .size = sizeof(name), \
28 .pinned = 1, \
29};
30
31static inline void hw_breakpoint_init(struct perf_event_attr *attr)
32{
33 attr->type = PERF_TYPE_BREAKPOINT;
34 attr->size = sizeof(*attr);
35 attr->pinned = 1;
36}
37
38static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
39{
40 return bp->attr.bp_addr;
41}
42
43static inline int hw_breakpoint_type(struct perf_event *bp)
44{
45 return bp->attr.bp_type;
46}
47
48static inline int hw_breakpoint_len(struct perf_event *bp)
49{
50 return bp->attr.bp_len;
51}
52
53extern struct perf_event *
54register_user_hw_breakpoint(struct perf_event_attr *attr,
55 perf_callback_t triggered,
56 struct task_struct *tsk);
57
58/* FIXME: only change from the attr, and don't unregister */
59extern struct perf_event *
60modify_user_hw_breakpoint(struct perf_event *bp,
61 struct perf_event_attr *attr,
62 perf_callback_t triggered,
63 struct task_struct *tsk);
64
65/*
66 * Kernel breakpoints are not associated with any particular thread.
67 */
68extern struct perf_event *
69register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
70 perf_callback_t triggered,
71 int cpu);
72
73extern struct perf_event **
74register_wide_hw_breakpoint(struct perf_event_attr *attr,
75 perf_callback_t triggered);
76
77extern int register_perf_hw_breakpoint(struct perf_event *bp);
78extern int __register_perf_hw_breakpoint(struct perf_event *bp);
79extern void unregister_hw_breakpoint(struct perf_event *bp);
80extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
81
82extern int reserve_bp_slot(struct perf_event *bp);
83extern void release_bp_slot(struct perf_event *bp);
84
85extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
86
87static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
88{
89 return &bp->hw.info;
90}
91
92#else /* !CONFIG_HAVE_HW_BREAKPOINT */
93
94static inline struct perf_event *
95register_user_hw_breakpoint(struct perf_event_attr *attr,
96 perf_callback_t triggered,
97 struct task_struct *tsk) { return NULL; }
98static inline struct perf_event *
99modify_user_hw_breakpoint(struct perf_event *bp,
100 struct perf_event_attr *attr,
101 perf_callback_t triggered,
102 struct task_struct *tsk) { return NULL; }
103static inline struct perf_event *
104register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
105 perf_callback_t triggered,
106 int cpu) { return NULL; }
107static inline struct perf_event **
108register_wide_hw_breakpoint(struct perf_event_attr *attr,
109 perf_callback_t triggered) { return NULL; }
110static inline int
111register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
112static inline int
113__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
114static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
115static inline void
116unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
117static inline int
118reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
119static inline void release_bp_slot(struct perf_event *bp) { }
120
121static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
122
123static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
124{
125 return NULL;
126}
127
128#endif /* CONFIG_HAVE_HW_BREAKPOINT */
129#endif /* __KERNEL__ */
130
131#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 3a46b7b7abb2..1b672f74a32f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
296int disable_kprobe(struct kprobe *kp); 296int disable_kprobe(struct kprobe *kp);
297int enable_kprobe(struct kprobe *kp); 297int enable_kprobe(struct kprobe *kp);
298 298
299void dump_kprobe(struct kprobe *kp);
300
299#else /* !CONFIG_KPROBES: */ 301#else /* !CONFIG_KPROBES: */
300 302
301static inline int kprobes_built_in(void) 303static inline int kprobes_built_in(void)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7b7fbf433cff..e3fb25606706 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,8 @@ enum perf_sw_ids {
106 PERF_COUNT_SW_CPU_MIGRATIONS = 4, 106 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
107 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, 107 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
108 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, 108 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
109 PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
110 PERF_COUNT_SW_EMULATION_FAULTS = 8,
109 111
110 PERF_COUNT_SW_MAX, /* non-ABI */ 112 PERF_COUNT_SW_MAX, /* non-ABI */
111}; 113};
@@ -225,6 +227,7 @@ struct perf_counter_attr {
225#define PERF_COUNTER_IOC_RESET _IO ('$', 3) 227#define PERF_COUNTER_IOC_RESET _IO ('$', 3)
226#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) 228#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
227#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) 229#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5)
230#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *)
228 231
229enum perf_counter_ioc_flags { 232enum perf_counter_ioc_flags {
230 PERF_IOC_FLAG_GROUP = 1U << 0, 233 PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9e7012689a84..43adbd7f0010 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
18#include <linux/ioctl.h> 18#include <linux/ioctl.h>
19#include <asm/byteorder.h> 19#include <asm/byteorder.h>
20 20
21#ifdef CONFIG_HAVE_HW_BREAKPOINT
22#include <asm/hw_breakpoint.h>
23#endif
24
21/* 25/*
22 * User-space ABI bits: 26 * User-space ABI bits:
23 */ 27 */
@@ -31,6 +35,7 @@ enum perf_type_id {
31 PERF_TYPE_TRACEPOINT = 2, 35 PERF_TYPE_TRACEPOINT = 2,
32 PERF_TYPE_HW_CACHE = 3, 36 PERF_TYPE_HW_CACHE = 3,
33 PERF_TYPE_RAW = 4, 37 PERF_TYPE_RAW = 4,
38 PERF_TYPE_BREAKPOINT = 5,
34 39
35 PERF_TYPE_MAX, /* non-ABI */ 40 PERF_TYPE_MAX, /* non-ABI */
36}; 41};
@@ -102,6 +107,8 @@ enum perf_sw_ids {
102 PERF_COUNT_SW_CPU_MIGRATIONS = 4, 107 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
103 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, 108 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
104 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, 109 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
110 PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
111 PERF_COUNT_SW_EMULATION_FAULTS = 8,
105 112
106 PERF_COUNT_SW_MAX, /* non-ABI */ 113 PERF_COUNT_SW_MAX, /* non-ABI */
107}; 114};
@@ -207,6 +214,15 @@ struct perf_event_attr {
207 __u32 wakeup_events; /* wakeup every n events */ 214 __u32 wakeup_events; /* wakeup every n events */
208 __u32 wakeup_watermark; /* bytes before wakeup */ 215 __u32 wakeup_watermark; /* bytes before wakeup */
209 }; 216 };
217
218 union {
219 struct { /* Hardware breakpoint info */
220 __u64 bp_addr;
221 __u32 bp_type;
222 __u32 bp_len;
223 };
224 };
225
210 __u32 __reserved_2; 226 __u32 __reserved_2;
211 227
212 __u64 __reserved_3; 228 __u64 __reserved_3;
@@ -219,8 +235,9 @@ struct perf_event_attr {
219#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) 235#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
220#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) 236#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
221#define PERF_EVENT_IOC_RESET _IO ('$', 3) 237#define PERF_EVENT_IOC_RESET _IO ('$', 3)
222#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) 238#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
223#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) 239#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
240#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
224 241
225enum perf_event_ioc_flags { 242enum perf_event_ioc_flags {
226 PERF_IOC_FLAG_GROUP = 1U << 0, 243 PERF_IOC_FLAG_GROUP = 1U << 0,
@@ -475,6 +492,11 @@ struct hw_perf_event {
475 s64 remaining; 492 s64 remaining;
476 struct hrtimer hrtimer; 493 struct hrtimer hrtimer;
477 }; 494 };
495#ifdef CONFIG_HAVE_HW_BREAKPOINT
496 union { /* breakpoint */
497 struct arch_hw_breakpoint info;
498 };
499#endif
478 }; 500 };
479 atomic64_t prev_count; 501 atomic64_t prev_count;
480 u64 sample_period; 502 u64 sample_period;
@@ -543,6 +565,10 @@ struct perf_pending_entry {
543 void (*func)(struct perf_pending_entry *); 565 void (*func)(struct perf_pending_entry *);
544}; 566};
545 567
568typedef void (*perf_callback_t)(struct perf_event *, void *);
569
570struct perf_sample_data;
571
546/** 572/**
547 * struct perf_event - performance event kernel representation: 573 * struct perf_event - performance event kernel representation:
548 */ 574 */
@@ -585,7 +611,7 @@ struct perf_event {
585 u64 tstamp_running; 611 u64 tstamp_running;
586 u64 tstamp_stopped; 612 u64 tstamp_stopped;
587 613
588 struct perf_event_attr attr; 614 struct perf_event_attr attr;
589 struct hw_perf_event hw; 615 struct hw_perf_event hw;
590 616
591 struct perf_event_context *ctx; 617 struct perf_event_context *ctx;
@@ -633,7 +659,20 @@ struct perf_event {
633 659
634 struct pid_namespace *ns; 660 struct pid_namespace *ns;
635 u64 id; 661 u64 id;
662
663 void (*overflow_handler)(struct perf_event *event,
664 int nmi, struct perf_sample_data *data,
665 struct pt_regs *regs);
666
667#ifdef CONFIG_EVENT_PROFILE
668 struct event_filter *filter;
636#endif 669#endif
670
671 perf_callback_t callback;
672
673 perf_callback_t event_callback;
674
675#endif /* CONFIG_PERF_EVENTS */
637}; 676};
638 677
639/** 678/**
@@ -706,7 +745,6 @@ struct perf_output_handle {
706 int nmi; 745 int nmi;
707 int sample; 746 int sample;
708 int locked; 747 int locked;
709 unsigned long flags;
710}; 748};
711 749
712#ifdef CONFIG_PERF_EVENTS 750#ifdef CONFIG_PERF_EVENTS
@@ -738,6 +776,14 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
738 struct perf_cpu_context *cpuctx, 776 struct perf_cpu_context *cpuctx,
739 struct perf_event_context *ctx, int cpu); 777 struct perf_event_context *ctx, int cpu);
740extern void perf_event_update_userpage(struct perf_event *event); 778extern void perf_event_update_userpage(struct perf_event *event);
779extern int perf_event_release_kernel(struct perf_event *event);
780extern struct perf_event *
781perf_event_create_kernel_counter(struct perf_event_attr *attr,
782 int cpu,
783 pid_t pid,
784 perf_callback_t callback);
785extern u64 perf_event_read_value(struct perf_event *event,
786 u64 *enabled, u64 *running);
741 787
742struct perf_sample_data { 788struct perf_sample_data {
743 u64 type; 789 u64 type;
@@ -814,6 +860,7 @@ extern int sysctl_perf_event_sample_rate;
814extern void perf_event_init(void); 860extern void perf_event_init(void);
815extern void perf_tp_event(int event_id, u64 addr, u64 count, 861extern void perf_tp_event(int event_id, u64 addr, u64 count,
816 void *record, int entry_size); 862 void *record, int entry_size);
863extern void perf_bp_event(struct perf_event *event, void *data);
817 864
818#ifndef perf_misc_flags 865#ifndef perf_misc_flags
819#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ 866#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -827,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
827extern void perf_output_end(struct perf_output_handle *handle); 874extern void perf_output_end(struct perf_output_handle *handle);
828extern void perf_output_copy(struct perf_output_handle *handle, 875extern void perf_output_copy(struct perf_output_handle *handle,
829 const void *buf, unsigned int len); 876 const void *buf, unsigned int len);
877extern int perf_swevent_get_recursion_context(void);
878extern void perf_swevent_put_recursion_context(int rctx);
830#else 879#else
831static inline void 880static inline void
832perf_event_task_sched_in(struct task_struct *task, int cpu) { } 881perf_event_task_sched_in(struct task_struct *task, int cpu) { }
@@ -848,11 +897,15 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
848static inline void 897static inline void
849perf_sw_event(u32 event_id, u64 nr, int nmi, 898perf_sw_event(u32 event_id, u64 nr, int nmi,
850 struct pt_regs *regs, u64 addr) { } 899 struct pt_regs *regs, u64 addr) { }
900static inline void
901perf_bp_event(struct perf_event *event, void *data) { }
851 902
852static inline void perf_event_mmap(struct vm_area_struct *vma) { } 903static inline void perf_event_mmap(struct vm_area_struct *vma) { }
853static inline void perf_event_comm(struct task_struct *tsk) { } 904static inline void perf_event_comm(struct task_struct *tsk) { }
854static inline void perf_event_fork(struct task_struct *tsk) { } 905static inline void perf_event_fork(struct task_struct *tsk) { }
855static inline void perf_event_init(void) { } 906static inline void perf_event_init(void) { }
907static inline int perf_swevent_get_recursion_context(void) { return -1; }
908static inline void perf_swevent_put_recursion_context(int rctx) { }
856 909
857#endif 910#endif
858 911
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace1a838..e79e2f3ccc51 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,37 +99,16 @@ struct perf_event_attr;
99#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) 99#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
100 100
101#ifdef CONFIG_EVENT_PROFILE 101#ifdef CONFIG_EVENT_PROFILE
102#define TRACE_SYS_ENTER_PROFILE(sname) \
103static int prof_sysenter_enable_##sname(void) \
104{ \
105 return reg_prof_syscall_enter("sys"#sname); \
106} \
107 \
108static void prof_sysenter_disable_##sname(void) \
109{ \
110 unreg_prof_syscall_enter("sys"#sname); \
111}
112
113#define TRACE_SYS_EXIT_PROFILE(sname) \
114static int prof_sysexit_enable_##sname(void) \
115{ \
116 return reg_prof_syscall_exit("sys"#sname); \
117} \
118 \
119static void prof_sysexit_disable_##sname(void) \
120{ \
121 unreg_prof_syscall_exit("sys"#sname); \
122}
123 102
124#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ 103#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
125 .profile_count = ATOMIC_INIT(-1), \ 104 .profile_count = ATOMIC_INIT(-1), \
126 .profile_enable = prof_sysenter_enable_##sname, \ 105 .profile_enable = prof_sysenter_enable, \
127 .profile_disable = prof_sysenter_disable_##sname, 106 .profile_disable = prof_sysenter_disable,
128 107
129#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ 108#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \
130 .profile_count = ATOMIC_INIT(-1), \ 109 .profile_count = ATOMIC_INIT(-1), \
131 .profile_enable = prof_sysexit_enable_##sname, \ 110 .profile_enable = prof_sysexit_enable, \
132 .profile_disable = prof_sysexit_disable_##sname, 111 .profile_disable = prof_sysexit_disable,
133#else 112#else
134#define TRACE_SYS_ENTER_PROFILE(sname) 113#define TRACE_SYS_ENTER_PROFILE(sname)
135#define TRACE_SYS_ENTER_PROFILE_INIT(sname) 114#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
@@ -153,74 +132,46 @@ static void prof_sysexit_disable_##sname(void) \
153#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) 132#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)
154 133
155#define SYSCALL_TRACE_ENTER_EVENT(sname) \ 134#define SYSCALL_TRACE_ENTER_EVENT(sname) \
135 static const struct syscall_metadata __syscall_meta_##sname; \
156 static struct ftrace_event_call event_enter_##sname; \ 136 static struct ftrace_event_call event_enter_##sname; \
157 struct trace_event enter_syscall_print_##sname = { \ 137 static struct trace_event enter_syscall_print_##sname = { \
158 .trace = print_syscall_enter, \ 138 .trace = print_syscall_enter, \
159 }; \ 139 }; \
160 static int init_enter_##sname(void) \
161 { \
162 int num, id; \
163 num = syscall_name_to_nr("sys"#sname); \
164 if (num < 0) \
165 return -ENOSYS; \
166 id = register_ftrace_event(&enter_syscall_print_##sname);\
167 if (!id) \
168 return -ENODEV; \
169 event_enter_##sname.id = id; \
170 set_syscall_enter_id(num, id); \
171 INIT_LIST_HEAD(&event_enter_##sname.fields); \
172 return 0; \
173 } \
174 TRACE_SYS_ENTER_PROFILE(sname); \
175 static struct ftrace_event_call __used \ 140 static struct ftrace_event_call __used \
176 __attribute__((__aligned__(4))) \ 141 __attribute__((__aligned__(4))) \
177 __attribute__((section("_ftrace_events"))) \ 142 __attribute__((section("_ftrace_events"))) \
178 event_enter_##sname = { \ 143 event_enter_##sname = { \
179 .name = "sys_enter"#sname, \ 144 .name = "sys_enter"#sname, \
180 .system = "syscalls", \ 145 .system = "syscalls", \
181 .event = &event_syscall_enter, \ 146 .event = &enter_syscall_print_##sname, \
182 .raw_init = init_enter_##sname, \ 147 .raw_init = init_syscall_trace, \
183 .show_format = syscall_enter_format, \ 148 .show_format = syscall_enter_format, \
184 .define_fields = syscall_enter_define_fields, \ 149 .define_fields = syscall_enter_define_fields, \
185 .regfunc = reg_event_syscall_enter, \ 150 .regfunc = reg_event_syscall_enter, \
186 .unregfunc = unreg_event_syscall_enter, \ 151 .unregfunc = unreg_event_syscall_enter, \
187 .data = "sys"#sname, \ 152 .data = (void *)&__syscall_meta_##sname,\
188 TRACE_SYS_ENTER_PROFILE_INIT(sname) \ 153 TRACE_SYS_ENTER_PROFILE_INIT(sname) \
189 } 154 }
190 155
191#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 156#define SYSCALL_TRACE_EXIT_EVENT(sname) \
157 static const struct syscall_metadata __syscall_meta_##sname; \
192 static struct ftrace_event_call event_exit_##sname; \ 158 static struct ftrace_event_call event_exit_##sname; \
193 struct trace_event exit_syscall_print_##sname = { \ 159 static struct trace_event exit_syscall_print_##sname = { \
194 .trace = print_syscall_exit, \ 160 .trace = print_syscall_exit, \
195 }; \ 161 }; \
196 static int init_exit_##sname(void) \
197 { \
198 int num, id; \
199 num = syscall_name_to_nr("sys"#sname); \
200 if (num < 0) \
201 return -ENOSYS; \
202 id = register_ftrace_event(&exit_syscall_print_##sname);\
203 if (!id) \
204 return -ENODEV; \
205 event_exit_##sname.id = id; \
206 set_syscall_exit_id(num, id); \
207 INIT_LIST_HEAD(&event_exit_##sname.fields); \
208 return 0; \
209 } \
210 TRACE_SYS_EXIT_PROFILE(sname); \
211 static struct ftrace_event_call __used \ 162 static struct ftrace_event_call __used \
212 __attribute__((__aligned__(4))) \ 163 __attribute__((__aligned__(4))) \
213 __attribute__((section("_ftrace_events"))) \ 164 __attribute__((section("_ftrace_events"))) \
214 event_exit_##sname = { \ 165 event_exit_##sname = { \
215 .name = "sys_exit"#sname, \ 166 .name = "sys_exit"#sname, \
216 .system = "syscalls", \ 167 .system = "syscalls", \
217 .event = &event_syscall_exit, \ 168 .event = &exit_syscall_print_##sname, \
218 .raw_init = init_exit_##sname, \ 169 .raw_init = init_syscall_trace, \
219 .show_format = syscall_exit_format, \ 170 .show_format = syscall_exit_format, \
220 .define_fields = syscall_exit_define_fields, \ 171 .define_fields = syscall_exit_define_fields, \
221 .regfunc = reg_event_syscall_exit, \ 172 .regfunc = reg_event_syscall_exit, \
222 .unregfunc = unreg_event_syscall_exit, \ 173 .unregfunc = unreg_event_syscall_exit, \
223 .data = "sys"#sname, \ 174 .data = (void *)&__syscall_meta_##sname,\
224 TRACE_SYS_EXIT_PROFILE_INIT(sname) \ 175 TRACE_SYS_EXIT_PROFILE_INIT(sname) \
225 } 176 }
226 177
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2aac8a83e89b..f59604ed0ec6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -280,6 +280,12 @@ static inline void tracepoint_synchronize_unregister(void)
280 * TRACE_EVENT_FN to perform any (un)registration work. 280 * TRACE_EVENT_FN to perform any (un)registration work.
281 */ 281 */
282 282
283#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
284#define DEFINE_EVENT(template, name, proto, args) \
285 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
286#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
287 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
288
283#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 289#define TRACE_EVENT(name, proto, args, struct, assign, print) \
284 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 290 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
285#define TRACE_EVENT_FN(name, proto, args, struct, \ 291#define TRACE_EVENT_FN(name, proto, args, struct, \
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 2a4b3bf74033..5acfb1eb4df9 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -31,6 +31,14 @@
31 assign, print, reg, unreg) \ 31 assign, print, reg, unreg) \
32 DEFINE_TRACE_FN(name, reg, unreg) 32 DEFINE_TRACE_FN(name, reg, unreg)
33 33
34#undef DEFINE_EVENT
35#define DEFINE_EVENT(template, name, proto, args) \
36 DEFINE_TRACE(name)
37
38#undef DEFINE_EVENT_PRINT
39#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
40 DEFINE_TRACE(name)
41
34#undef DECLARE_TRACE 42#undef DECLARE_TRACE
35#define DECLARE_TRACE(name, proto, args) \ 43#define DECLARE_TRACE(name, proto, args) \
36 DEFINE_TRACE(name) 44 DEFINE_TRACE(name)
@@ -63,6 +71,9 @@
63 71
64#undef TRACE_EVENT 72#undef TRACE_EVENT
65#undef TRACE_EVENT_FN 73#undef TRACE_EVENT_FN
74#undef DECLARE_EVENT_CLASS
75#undef DEFINE_EVENT
76#undef DEFINE_EVENT_PRINT
66#undef TRACE_HEADER_MULTI_READ 77#undef TRACE_HEADER_MULTI_READ
67 78
68/* Only undef what we defined in this file */ 79/* Only undef what we defined in this file */
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
index 8abd620a490e..1af72dc24278 100644
--- a/include/trace/events/bkl.h
+++ b/include/trace/events/bkl.h
@@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel,
13 TP_ARGS(func, file, line), 13 TP_ARGS(func, file, line),
14 14
15 TP_STRUCT__entry( 15 TP_STRUCT__entry(
16 __field( int, lock_depth ) 16 __field( int, depth )
17 __field_ext( const char *, func, FILTER_PTR_STRING ) 17 __field_ext( const char *, func, FILTER_PTR_STRING )
18 __field_ext( const char *, file, FILTER_PTR_STRING ) 18 __field_ext( const char *, file, FILTER_PTR_STRING )
19 __field( int, line ) 19 __field( int, line )
@@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel,
21 21
22 TP_fast_assign( 22 TP_fast_assign(
23 /* We want to record the lock_depth after lock is acquired */ 23 /* We want to record the lock_depth after lock is acquired */
24 __entry->lock_depth = current->lock_depth + 1; 24 __entry->depth = current->lock_depth + 1;
25 __entry->func = func; 25 __entry->func = func;
26 __entry->file = file; 26 __entry->file = file;
27 __entry->line = line; 27 __entry->line = line;
28 ), 28 ),
29 29
30 TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, 30 TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
31 __entry->file, __entry->line, __entry->func) 31 __entry->file, __entry->line, __entry->func)
32); 32);
33 33
@@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel,
38 TP_ARGS(func, file, line), 38 TP_ARGS(func, file, line),
39 39
40 TP_STRUCT__entry( 40 TP_STRUCT__entry(
41 __field(int, lock_depth) 41 __field(int, depth )
42 __field(const char *, func) 42 __field(const char *, func )
43 __field(const char *, file) 43 __field(const char *, file )
44 __field(int, line) 44 __field(int, line )
45 ), 45 ),
46 46
47 TP_fast_assign( 47 TP_fast_assign(
48 __entry->lock_depth = current->lock_depth; 48 __entry->depth = current->lock_depth;
49 __entry->func = func; 49 __entry->func = func;
50 __entry->file = file; 50 __entry->file = file;
51 __entry->line = line; 51 __entry->line = line;
52 ), 52 ),
53 53
54 TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, 54 TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
55 __entry->file, __entry->line, __entry->func) 55 __entry->file, __entry->line, __entry->func)
56); 56);
57 57
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 00405b5f624a..5fb72733331e 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,7 +8,7 @@
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/tracepoint.h> 9#include <linux/tracepoint.h>
10 10
11TRACE_EVENT(block_rq_abort, 11DECLARE_EVENT_CLASS(block_rq_with_error,
12 12
13 TP_PROTO(struct request_queue *q, struct request *rq), 13 TP_PROTO(struct request_queue *q, struct request *rq),
14 14
@@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort,
40 __entry->nr_sector, __entry->errors) 40 __entry->nr_sector, __entry->errors)
41); 41);
42 42
43TRACE_EVENT(block_rq_insert, 43DEFINE_EVENT(block_rq_with_error, block_rq_abort,
44 44
45 TP_PROTO(struct request_queue *q, struct request *rq), 45 TP_PROTO(struct request_queue *q, struct request *rq),
46 46
47 TP_ARGS(q, rq), 47 TP_ARGS(q, rq)
48);
48 49
49 TP_STRUCT__entry( 50DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
50 __field( dev_t, dev )
51 __field( sector_t, sector )
52 __field( unsigned int, nr_sector )
53 __field( unsigned int, bytes )
54 __array( char, rwbs, 6 )
55 __array( char, comm, TASK_COMM_LEN )
56 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
57 ),
58 51
59 TP_fast_assign( 52 TP_PROTO(struct request_queue *q, struct request *rq),
60 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
61 __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
62 __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
63 __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
64 53
65 blk_fill_rwbs_rq(__entry->rwbs, rq); 54 TP_ARGS(q, rq)
66 blk_dump_cmd(__get_str(cmd), rq); 55);
67 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
68 ),
69 56
70 TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", 57DEFINE_EVENT(block_rq_with_error, block_rq_complete,
71 MAJOR(__entry->dev), MINOR(__entry->dev), 58
72 __entry->rwbs, __entry->bytes, __get_str(cmd), 59 TP_PROTO(struct request_queue *q, struct request *rq),
73 (unsigned long long)__entry->sector, 60
74 __entry->nr_sector, __entry->comm) 61 TP_ARGS(q, rq)
75); 62);
76 63
77TRACE_EVENT(block_rq_issue, 64DECLARE_EVENT_CLASS(block_rq,
78 65
79 TP_PROTO(struct request_queue *q, struct request *rq), 66 TP_PROTO(struct request_queue *q, struct request *rq),
80 67
@@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue,
86 __field( unsigned int, nr_sector ) 73 __field( unsigned int, nr_sector )
87 __field( unsigned int, bytes ) 74 __field( unsigned int, bytes )
88 __array( char, rwbs, 6 ) 75 __array( char, rwbs, 6 )
89 __array( char, comm, TASK_COMM_LEN ) 76 __array( char, comm, TASK_COMM_LEN )
90 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 77 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
91 ), 78 ),
92 79
@@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue,
108 __entry->nr_sector, __entry->comm) 95 __entry->nr_sector, __entry->comm)
109); 96);
110 97
111TRACE_EVENT(block_rq_requeue, 98DEFINE_EVENT(block_rq, block_rq_insert,
112 99
113 TP_PROTO(struct request_queue *q, struct request *rq), 100 TP_PROTO(struct request_queue *q, struct request *rq),
114 101
115 TP_ARGS(q, rq), 102 TP_ARGS(q, rq)
116
117 TP_STRUCT__entry(
118 __field( dev_t, dev )
119 __field( sector_t, sector )
120 __field( unsigned int, nr_sector )
121 __field( int, errors )
122 __array( char, rwbs, 6 )
123 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
124 ),
125
126 TP_fast_assign(
127 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
128 __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
129 __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
130 __entry->errors = rq->errors;
131
132 blk_fill_rwbs_rq(__entry->rwbs, rq);
133 blk_dump_cmd(__get_str(cmd), rq);
134 ),
135
136 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
137 MAJOR(__entry->dev), MINOR(__entry->dev),
138 __entry->rwbs, __get_str(cmd),
139 (unsigned long long)__entry->sector,
140 __entry->nr_sector, __entry->errors)
141); 103);
142 104
143TRACE_EVENT(block_rq_complete, 105DEFINE_EVENT(block_rq, block_rq_issue,
144 106
145 TP_PROTO(struct request_queue *q, struct request *rq), 107 TP_PROTO(struct request_queue *q, struct request *rq),
146 108
147 TP_ARGS(q, rq), 109 TP_ARGS(q, rq)
148
149 TP_STRUCT__entry(
150 __field( dev_t, dev )
151 __field( sector_t, sector )
152 __field( unsigned int, nr_sector )
153 __field( int, errors )
154 __array( char, rwbs, 6 )
155 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
156 ),
157
158 TP_fast_assign(
159 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
160 __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
161 __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
162 __entry->errors = rq->errors;
163
164 blk_fill_rwbs_rq(__entry->rwbs, rq);
165 blk_dump_cmd(__get_str(cmd), rq);
166 ),
167
168 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
169 MAJOR(__entry->dev), MINOR(__entry->dev),
170 __entry->rwbs, __get_str(cmd),
171 (unsigned long long)__entry->sector,
172 __entry->nr_sector, __entry->errors)
173); 110);
174 111
175TRACE_EVENT(block_bio_bounce, 112TRACE_EVENT(block_bio_bounce,
@@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete,
228 __entry->nr_sector, __entry->error) 165 __entry->nr_sector, __entry->error)
229); 166);
230 167
231TRACE_EVENT(block_bio_backmerge, 168DECLARE_EVENT_CLASS(block_bio,
232 169
233 TP_PROTO(struct request_queue *q, struct bio *bio), 170 TP_PROTO(struct request_queue *q, struct bio *bio),
234 171
@@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge,
256 __entry->nr_sector, __entry->comm) 193 __entry->nr_sector, __entry->comm)
257); 194);
258 195
259TRACE_EVENT(block_bio_frontmerge, 196DEFINE_EVENT(block_bio, block_bio_backmerge,
260 197
261 TP_PROTO(struct request_queue *q, struct bio *bio), 198 TP_PROTO(struct request_queue *q, struct bio *bio),
262 199
263 TP_ARGS(q, bio), 200 TP_ARGS(q, bio)
264
265 TP_STRUCT__entry(
266 __field( dev_t, dev )
267 __field( sector_t, sector )
268 __field( unsigned, nr_sector )
269 __array( char, rwbs, 6 )
270 __array( char, comm, TASK_COMM_LEN )
271 ),
272
273 TP_fast_assign(
274 __entry->dev = bio->bi_bdev->bd_dev;
275 __entry->sector = bio->bi_sector;
276 __entry->nr_sector = bio->bi_size >> 9;
277 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
278 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
279 ),
280
281 TP_printk("%d,%d %s %llu + %u [%s]",
282 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
283 (unsigned long long)__entry->sector,
284 __entry->nr_sector, __entry->comm)
285); 201);
286 202
287TRACE_EVENT(block_bio_queue, 203DEFINE_EVENT(block_bio, block_bio_frontmerge,
288 204
289 TP_PROTO(struct request_queue *q, struct bio *bio), 205 TP_PROTO(struct request_queue *q, struct bio *bio),
290 206
291 TP_ARGS(q, bio), 207 TP_ARGS(q, bio)
208);
292 209
293 TP_STRUCT__entry( 210DEFINE_EVENT(block_bio, block_bio_queue,
294 __field( dev_t, dev )
295 __field( sector_t, sector )
296 __field( unsigned int, nr_sector )
297 __array( char, rwbs, 6 )
298 __array( char, comm, TASK_COMM_LEN )
299 ),
300 211
301 TP_fast_assign( 212 TP_PROTO(struct request_queue *q, struct bio *bio),
302 __entry->dev = bio->bi_bdev->bd_dev;
303 __entry->sector = bio->bi_sector;
304 __entry->nr_sector = bio->bi_size >> 9;
305 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
306 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
307 ),
308 213
309 TP_printk("%d,%d %s %llu + %u [%s]", 214 TP_ARGS(q, bio)
310 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
311 (unsigned long long)__entry->sector,
312 __entry->nr_sector, __entry->comm)
313); 215);
314 216
315TRACE_EVENT(block_getrq, 217DECLARE_EVENT_CLASS(block_get_rq,
316 218
317 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 219 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
318 220
@@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq,
341 __entry->nr_sector, __entry->comm) 243 __entry->nr_sector, __entry->comm)
342); 244);
343 245
344TRACE_EVENT(block_sleeprq, 246DEFINE_EVENT(block_get_rq, block_getrq,
345 247
346 TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 248 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
347 249
348 TP_ARGS(q, bio, rw), 250 TP_ARGS(q, bio, rw)
251);
349 252
350 TP_STRUCT__entry( 253DEFINE_EVENT(block_get_rq, block_sleeprq,
351 __field( dev_t, dev )
352 __field( sector_t, sector )
353 __field( unsigned int, nr_sector )
354 __array( char, rwbs, 6 )
355 __array( char, comm, TASK_COMM_LEN )
356 ),
357 254
358 TP_fast_assign( 255 TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
359 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
360 __entry->sector = bio ? bio->bi_sector : 0;
361 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
362 blk_fill_rwbs(__entry->rwbs,
363 bio ? bio->bi_rw : 0, __entry->nr_sector);
364 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
365 ),
366 256
367 TP_printk("%d,%d %s %llu + %u [%s]", 257 TP_ARGS(q, bio, rw)
368 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
369 (unsigned long long)__entry->sector,
370 __entry->nr_sector, __entry->comm)
371); 258);
372 259
373TRACE_EVENT(block_plug, 260TRACE_EVENT(block_plug,
@@ -387,7 +274,7 @@ TRACE_EVENT(block_plug,
387 TP_printk("[%s]", __entry->comm) 274 TP_printk("[%s]", __entry->comm)
388); 275);
389 276
390TRACE_EVENT(block_unplug_timer, 277DECLARE_EVENT_CLASS(block_unplug,
391 278
392 TP_PROTO(struct request_queue *q), 279 TP_PROTO(struct request_queue *q),
393 280
@@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer,
406 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) 293 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
407); 294);
408 295
409TRACE_EVENT(block_unplug_io, 296DEFINE_EVENT(block_unplug, block_unplug_timer,
410 297
411 TP_PROTO(struct request_queue *q), 298 TP_PROTO(struct request_queue *q),
412 299
413 TP_ARGS(q), 300 TP_ARGS(q)
301);
414 302
415 TP_STRUCT__entry( 303DEFINE_EVENT(block_unplug, block_unplug_io,
416 __field( int, nr_rq )
417 __array( char, comm, TASK_COMM_LEN )
418 ),
419 304
420 TP_fast_assign( 305 TP_PROTO(struct request_queue *q),
421 __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
422 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
423 ),
424 306
425 TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) 307 TP_ARGS(q)
426); 308);
427 309
428TRACE_EVENT(block_split, 310TRACE_EVENT(block_split,
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d09550bf3f95..318f76535bd4 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode,
90 (unsigned long) __entry->dir, __entry->mode) 90 (unsigned long) __entry->dir, __entry->mode)
91); 91);
92 92
93TRACE_EVENT(ext4_write_begin, 93DECLARE_EVENT_CLASS(ext4__write_begin,
94 94
95 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, 95 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
96 unsigned int flags), 96 unsigned int flags),
@@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin,
118 __entry->pos, __entry->len, __entry->flags) 118 __entry->pos, __entry->len, __entry->flags)
119); 119);
120 120
121TRACE_EVENT(ext4_ordered_write_end, 121DEFINE_EVENT(ext4__write_begin, ext4_write_begin,
122
123 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
124 unsigned int flags),
125
126 TP_ARGS(inode, pos, len, flags)
127);
128
129DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin,
130
131 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
132 unsigned int flags),
133
134 TP_ARGS(inode, pos, len, flags)
135);
136
137DECLARE_EVENT_CLASS(ext4__write_end,
122 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, 138 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
123 unsigned int copied), 139 unsigned int copied),
124 140
@@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end,
145 __entry->pos, __entry->len, __entry->copied) 161 __entry->pos, __entry->len, __entry->copied)
146); 162);
147 163
148TRACE_EVENT(ext4_writeback_write_end, 164DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
165
149 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, 166 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
150 unsigned int copied), 167 unsigned int copied),
151 168
152 TP_ARGS(inode, pos, len, copied), 169 TP_ARGS(inode, pos, len, copied)
170);
153 171
154 TP_STRUCT__entry( 172DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
155 __field( dev_t, dev )
156 __field( ino_t, ino )
157 __field( loff_t, pos )
158 __field( unsigned int, len )
159 __field( unsigned int, copied )
160 ),
161 173
162 TP_fast_assign( 174 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
163 __entry->dev = inode->i_sb->s_dev; 175 unsigned int copied),
164 __entry->ino = inode->i_ino;
165 __entry->pos = pos;
166 __entry->len = len;
167 __entry->copied = copied;
168 ),
169 176
170 TP_printk("dev %s ino %lu pos %llu len %u copied %u", 177 TP_ARGS(inode, pos, len, copied)
171 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
172 __entry->pos, __entry->len, __entry->copied)
173); 178);
174 179
175TRACE_EVENT(ext4_journalled_write_end, 180DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end,
181
176 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, 182 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
177 unsigned int copied), 183 unsigned int copied),
178 TP_ARGS(inode, pos, len, copied),
179 184
180 TP_STRUCT__entry( 185 TP_ARGS(inode, pos, len, copied)
181 __field( dev_t, dev ) 186);
182 __field( ino_t, ino )
183 __field( loff_t, pos )
184 __field( unsigned int, len )
185 __field( unsigned int, copied )
186 ),
187 187
188 TP_fast_assign( 188DEFINE_EVENT(ext4__write_end, ext4_da_write_end,
189 __entry->dev = inode->i_sb->s_dev;
190 __entry->ino = inode->i_ino;
191 __entry->pos = pos;
192 __entry->len = len;
193 __entry->copied = copied;
194 ),
195 189
196 TP_printk("dev %s ino %lu pos %llu len %u copied %u", 190 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
197 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, 191 unsigned int copied),
198 __entry->pos, __entry->len, __entry->copied) 192
193 TP_ARGS(inode, pos, len, copied)
199); 194);
200 195
201TRACE_EVENT(ext4_writepage, 196TRACE_EVENT(ext4_writepage,
@@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result,
337 (unsigned long) __entry->writeback_index) 332 (unsigned long) __entry->writeback_index)
338); 333);
339 334
340TRACE_EVENT(ext4_da_write_begin,
341 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
342 unsigned int flags),
343
344 TP_ARGS(inode, pos, len, flags),
345
346 TP_STRUCT__entry(
347 __field( dev_t, dev )
348 __field( ino_t, ino )
349 __field( loff_t, pos )
350 __field( unsigned int, len )
351 __field( unsigned int, flags )
352 ),
353
354 TP_fast_assign(
355 __entry->dev = inode->i_sb->s_dev;
356 __entry->ino = inode->i_ino;
357 __entry->pos = pos;
358 __entry->len = len;
359 __entry->flags = flags;
360 ),
361
362 TP_printk("dev %s ino %lu pos %llu len %u flags %u",
363 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
364 __entry->pos, __entry->len, __entry->flags)
365);
366
367TRACE_EVENT(ext4_da_write_end,
368 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
369 unsigned int copied),
370
371 TP_ARGS(inode, pos, len, copied),
372
373 TP_STRUCT__entry(
374 __field( dev_t, dev )
375 __field( ino_t, ino )
376 __field( loff_t, pos )
377 __field( unsigned int, len )
378 __field( unsigned int, copied )
379 ),
380
381 TP_fast_assign(
382 __entry->dev = inode->i_sb->s_dev;
383 __entry->ino = inode->i_ino;
384 __entry->pos = pos;
385 __entry->len = len;
386 __entry->copied = copied;
387 ),
388
389 TP_printk("dev %s ino %lu pos %llu len %u copied %u",
390 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
391 __entry->pos, __entry->len, __entry->copied)
392);
393
394TRACE_EVENT(ext4_discard_blocks, 335TRACE_EVENT(ext4_discard_blocks,
395 TP_PROTO(struct super_block *sb, unsigned long long blk, 336 TP_PROTO(struct super_block *sb, unsigned long long blk,
396 unsigned long long count), 337 unsigned long long count),
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index b89f9db4a404..0e4cfb694fe7 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry,
48 __assign_str(name, action->name); 48 __assign_str(name, action->name);
49 ), 49 ),
50 50
51 TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) 51 TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
52); 52);
53 53
54/** 54/**
@@ -78,22 +78,11 @@ TRACE_EVENT(irq_handler_exit,
78 __entry->ret = ret; 78 __entry->ret = ret;
79 ), 79 ),
80 80
81 TP_printk("irq=%d return=%s", 81 TP_printk("irq=%d ret=%s",
82 __entry->irq, __entry->ret ? "handled" : "unhandled") 82 __entry->irq, __entry->ret ? "handled" : "unhandled")
83); 83);
84 84
85/** 85DECLARE_EVENT_CLASS(softirq,
86 * softirq_entry - called immediately before the softirq handler
87 * @h: pointer to struct softirq_action
88 * @vec: pointer to first struct softirq_action in softirq_vec array
89 *
90 * The @h parameter, contains a pointer to the struct softirq_action
91 * which has a pointer to the action handler that is called. By subtracting
92 * the @vec pointer from the @h pointer, we can determine the softirq
93 * number. Also, when used in combination with the softirq_exit tracepoint
94 * we can determine the softirq latency.
95 */
96TRACE_EVENT(softirq_entry,
97 86
98 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 87 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
99 88
@@ -107,11 +96,29 @@ TRACE_EVENT(softirq_entry,
107 __entry->vec = (int)(h - vec); 96 __entry->vec = (int)(h - vec);
108 ), 97 ),
109 98
110 TP_printk("softirq=%d action=%s", __entry->vec, 99 TP_printk("vec=%d [action=%s]", __entry->vec,
111 show_softirq_name(__entry->vec)) 100 show_softirq_name(__entry->vec))
112); 101);
113 102
114/** 103/**
104 * softirq_entry - called immediately before the softirq handler
105 * @h: pointer to struct softirq_action
106 * @vec: pointer to first struct softirq_action in softirq_vec array
107 *
108 * The @h parameter, contains a pointer to the struct softirq_action
109 * which has a pointer to the action handler that is called. By subtracting
110 * the @vec pointer from the @h pointer, we can determine the softirq
111 * number. Also, when used in combination with the softirq_exit tracepoint
112 * we can determine the softirq latency.
113 */
114DEFINE_EVENT(softirq, softirq_entry,
115
116 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
117
118 TP_ARGS(h, vec)
119);
120
121/**
115 * softirq_exit - called immediately after the softirq handler returns 122 * softirq_exit - called immediately after the softirq handler returns
116 * @h: pointer to struct softirq_action 123 * @h: pointer to struct softirq_action
117 * @vec: pointer to first struct softirq_action in softirq_vec array 124 * @vec: pointer to first struct softirq_action in softirq_vec array
@@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry,
122 * combination with the softirq_entry tracepoint we can determine the softirq 129 * combination with the softirq_entry tracepoint we can determine the softirq
123 * latency. 130 * latency.
124 */ 131 */
125TRACE_EVENT(softirq_exit, 132DEFINE_EVENT(softirq, softirq_exit,
126 133
127 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 134 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
128 135
129 TP_ARGS(h, vec), 136 TP_ARGS(h, vec)
130
131 TP_STRUCT__entry(
132 __field( int, vec )
133 ),
134
135 TP_fast_assign(
136 __entry->vec = (int)(h - vec);
137 ),
138
139 TP_printk("softirq=%d action=%s", __entry->vec,
140 show_softirq_name(__entry->vec))
141); 137);
142 138
143#endif /* _TRACE_IRQ_H */ 139#endif /* _TRACE_IRQ_H */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 3c60b75adb9e..96b370a050de 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint,
30 jbd2_dev_to_name(__entry->dev), __entry->result) 30 jbd2_dev_to_name(__entry->dev), __entry->result)
31); 31);
32 32
33TRACE_EVENT(jbd2_start_commit, 33DECLARE_EVENT_CLASS(jbd2_commit,
34 34
35 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 35 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
36 36
@@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit,
53 __entry->sync_commit) 53 __entry->sync_commit)
54); 54);
55 55
56TRACE_EVENT(jbd2_commit_locking, 56DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
57 57
58 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 58 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
59 59
60 TP_ARGS(journal, commit_transaction), 60 TP_ARGS(journal, commit_transaction)
61
62 TP_STRUCT__entry(
63 __field( dev_t, dev )
64 __field( char, sync_commit )
65 __field( int, transaction )
66 ),
67
68 TP_fast_assign(
69 __entry->dev = journal->j_fs_dev->bd_dev;
70 __entry->sync_commit = commit_transaction->t_synchronous_commit;
71 __entry->transaction = commit_transaction->t_tid;
72 ),
73
74 TP_printk("dev %s transaction %d sync %d",
75 jbd2_dev_to_name(__entry->dev), __entry->transaction,
76 __entry->sync_commit)
77); 61);
78 62
79TRACE_EVENT(jbd2_commit_flushing, 63DEFINE_EVENT(jbd2_commit, jbd2_commit_locking,
80 64
81 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 65 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
82 66
83 TP_ARGS(journal, commit_transaction), 67 TP_ARGS(journal, commit_transaction)
84
85 TP_STRUCT__entry(
86 __field( dev_t, dev )
87 __field( char, sync_commit )
88 __field( int, transaction )
89 ),
90
91 TP_fast_assign(
92 __entry->dev = journal->j_fs_dev->bd_dev;
93 __entry->sync_commit = commit_transaction->t_synchronous_commit;
94 __entry->transaction = commit_transaction->t_tid;
95 ),
96
97 TP_printk("dev %s transaction %d sync %d",
98 jbd2_dev_to_name(__entry->dev), __entry->transaction,
99 __entry->sync_commit)
100); 68);
101 69
102TRACE_EVENT(jbd2_commit_logging, 70DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing,
103 71
104 TP_PROTO(journal_t *journal, transaction_t *commit_transaction), 72 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
105 73
106 TP_ARGS(journal, commit_transaction), 74 TP_ARGS(journal, commit_transaction)
75);
107 76
108 TP_STRUCT__entry( 77DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
109 __field( dev_t, dev )
110 __field( char, sync_commit )
111 __field( int, transaction )
112 ),
113 78
114 TP_fast_assign( 79 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
115 __entry->dev = journal->j_fs_dev->bd_dev;
116 __entry->sync_commit = commit_transaction->t_synchronous_commit;
117 __entry->transaction = commit_transaction->t_tid;
118 ),
119 80
120 TP_printk("dev %s transaction %d sync %d", 81 TP_ARGS(journal, commit_transaction)
121 jbd2_dev_to_name(__entry->dev), __entry->transaction,
122 __entry->sync_commit)
123); 82);
124 83
125TRACE_EVENT(jbd2_end_commit, 84TRACE_EVENT(jbd2_end_commit,
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eaf46bdd18a5..3adca0ca9dbe 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -44,7 +44,7 @@
44 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ 44 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \
45 ) : "GFP_NOWAIT" 45 ) : "GFP_NOWAIT"
46 46
47TRACE_EVENT(kmalloc, 47DECLARE_EVENT_CLASS(kmem_alloc,
48 48
49 TP_PROTO(unsigned long call_site, 49 TP_PROTO(unsigned long call_site,
50 const void *ptr, 50 const void *ptr,
@@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc,
78 show_gfp_flags(__entry->gfp_flags)) 78 show_gfp_flags(__entry->gfp_flags))
79); 79);
80 80
81TRACE_EVENT(kmem_cache_alloc, 81DEFINE_EVENT(kmem_alloc, kmalloc,
82 82
83 TP_PROTO(unsigned long call_site, 83 TP_PROTO(unsigned long call_site, const void *ptr,
84 const void *ptr, 84 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
85 size_t bytes_req,
86 size_t bytes_alloc,
87 gfp_t gfp_flags),
88 85
89 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), 86 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
87);
90 88
91 TP_STRUCT__entry( 89DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
92 __field( unsigned long, call_site )
93 __field( const void *, ptr )
94 __field( size_t, bytes_req )
95 __field( size_t, bytes_alloc )
96 __field( gfp_t, gfp_flags )
97 ),
98 90
99 TP_fast_assign( 91 TP_PROTO(unsigned long call_site, const void *ptr,
100 __entry->call_site = call_site; 92 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
101 __entry->ptr = ptr;
102 __entry->bytes_req = bytes_req;
103 __entry->bytes_alloc = bytes_alloc;
104 __entry->gfp_flags = gfp_flags;
105 ),
106 93
107 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", 94 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
108 __entry->call_site,
109 __entry->ptr,
110 __entry->bytes_req,
111 __entry->bytes_alloc,
112 show_gfp_flags(__entry->gfp_flags))
113); 95);
114 96
115TRACE_EVENT(kmalloc_node, 97DECLARE_EVENT_CLASS(kmem_alloc_node,
116 98
117 TP_PROTO(unsigned long call_site, 99 TP_PROTO(unsigned long call_site,
118 const void *ptr, 100 const void *ptr,
@@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node,
150 __entry->node) 132 __entry->node)
151); 133);
152 134
153TRACE_EVENT(kmem_cache_alloc_node, 135DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
154 136
155 TP_PROTO(unsigned long call_site, 137 TP_PROTO(unsigned long call_site, const void *ptr,
156 const void *ptr, 138 size_t bytes_req, size_t bytes_alloc,
157 size_t bytes_req, 139 gfp_t gfp_flags, int node),
158 size_t bytes_alloc,
159 gfp_t gfp_flags,
160 int node),
161 140
162 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), 141 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
142);
163 143
164 TP_STRUCT__entry( 144DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
165 __field( unsigned long, call_site )
166 __field( const void *, ptr )
167 __field( size_t, bytes_req )
168 __field( size_t, bytes_alloc )
169 __field( gfp_t, gfp_flags )
170 __field( int, node )
171 ),
172 145
173 TP_fast_assign( 146 TP_PROTO(unsigned long call_site, const void *ptr,
174 __entry->call_site = call_site; 147 size_t bytes_req, size_t bytes_alloc,
175 __entry->ptr = ptr; 148 gfp_t gfp_flags, int node),
176 __entry->bytes_req = bytes_req;
177 __entry->bytes_alloc = bytes_alloc;
178 __entry->gfp_flags = gfp_flags;
179 __entry->node = node;
180 ),
181 149
182 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", 150 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
183 __entry->call_site,
184 __entry->ptr,
185 __entry->bytes_req,
186 __entry->bytes_alloc,
187 show_gfp_flags(__entry->gfp_flags),
188 __entry->node)
189); 151);
190 152
191TRACE_EVENT(kfree, 153DECLARE_EVENT_CLASS(kmem_free,
192 154
193 TP_PROTO(unsigned long call_site, const void *ptr), 155 TP_PROTO(unsigned long call_site, const void *ptr),
194 156
@@ -207,23 +169,18 @@ TRACE_EVENT(kfree,
207 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) 169 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
208); 170);
209 171
210TRACE_EVENT(kmem_cache_free, 172DEFINE_EVENT(kmem_free, kfree,
211 173
212 TP_PROTO(unsigned long call_site, const void *ptr), 174 TP_PROTO(unsigned long call_site, const void *ptr),
213 175
214 TP_ARGS(call_site, ptr), 176 TP_ARGS(call_site, ptr)
177);
215 178
216 TP_STRUCT__entry( 179DEFINE_EVENT(kmem_free, kmem_cache_free,
217 __field( unsigned long, call_site )
218 __field( const void *, ptr )
219 ),
220 180
221 TP_fast_assign( 181 TP_PROTO(unsigned long call_site, const void *ptr),
222 __entry->call_site = call_site;
223 __entry->ptr = ptr;
224 ),
225 182
226 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) 183 TP_ARGS(call_site, ptr)
227); 184);
228 185
229TRACE_EVENT(mm_page_free_direct, 186TRACE_EVENT(mm_page_free_direct,
@@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc,
299 show_gfp_flags(__entry->gfp_flags)) 256 show_gfp_flags(__entry->gfp_flags))
300); 257);
301 258
302TRACE_EVENT(mm_page_alloc_zone_locked, 259DECLARE_EVENT_CLASS(mm_page,
303 260
304 TP_PROTO(struct page *page, unsigned int order, int migratetype), 261 TP_PROTO(struct page *page, unsigned int order, int migratetype),
305 262
@@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked,
325 __entry->order == 0) 282 __entry->order == 0)
326); 283);
327 284
328TRACE_EVENT(mm_page_pcpu_drain, 285DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
329 286
330 TP_PROTO(struct page *page, int order, int migratetype), 287 TP_PROTO(struct page *page, unsigned int order, int migratetype),
331 288
332 TP_ARGS(page, order, migratetype), 289 TP_ARGS(page, order, migratetype)
290);
333 291
334 TP_STRUCT__entry( 292DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
335 __field( struct page *, page )
336 __field( int, order )
337 __field( int, migratetype )
338 ),
339 293
340 TP_fast_assign( 294 TP_PROTO(struct page *page, unsigned int order, int migratetype),
341 __entry->page = page; 295
342 __entry->order = order; 296 TP_ARGS(page, order, migratetype),
343 __entry->migratetype = migratetype;
344 ),
345 297
346 TP_printk("page=%p pfn=%lu order=%d migratetype=%d", 298 TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
347 __entry->page, 299 __entry->page, page_to_pfn(__entry->page),
348 page_to_pfn(__entry->page), 300 __entry->order, __entry->migratetype)
349 __entry->order,
350 __entry->migratetype)
351); 301);
352 302
353TRACE_EVENT(mm_page_alloc_extfrag, 303TRACE_EVENT(mm_page_alloc_extfrag,
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lock.h
index bcf1d209a00d..a870ba125aa8 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lock.h
@@ -1,8 +1,8 @@
1#undef TRACE_SYSTEM 1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM lockdep 2#define TRACE_SYSTEM lock
3 3
4#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) 4#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_LOCKDEP_H 5#define _TRACE_LOCK_H
6 6
7#include <linux/lockdep.h> 7#include <linux/lockdep.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
@@ -90,7 +90,7 @@ TRACE_EVENT(lock_acquired,
90#endif 90#endif
91#endif 91#endif
92 92
93#endif /* _TRACE_LOCKDEP_H */ 93#endif /* _TRACE_LOCK_H */
94 94
95/* This part must be outside protection */ 95/* This part must be outside protection */
96#include <trace/define_trace.h> 96#include <trace/define_trace.h>
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644
index 000000000000..7eee77895cb3
--- /dev/null
+++ b/include/trace/events/mce.h
@@ -0,0 +1,69 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM mce
3
4#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_MCE_H
6
7#include <linux/ktime.h>
8#include <linux/tracepoint.h>
9#include <asm/mce.h>
10
11TRACE_EVENT(mce_record,
12
13 TP_PROTO(struct mce *m),
14
15 TP_ARGS(m),
16
17 TP_STRUCT__entry(
18 __field( u64, mcgcap )
19 __field( u64, mcgstatus )
20 __field( u8, bank )
21 __field( u64, status )
22 __field( u64, addr )
23 __field( u64, misc )
24 __field( u64, ip )
25 __field( u8, cs )
26 __field( u64, tsc )
27 __field( u64, walltime )
28 __field( u32, cpu )
29 __field( u32, cpuid )
30 __field( u32, apicid )
31 __field( u32, socketid )
32 __field( u8, cpuvendor )
33 ),
34
35 TP_fast_assign(
36 __entry->mcgcap = m->mcgcap;
37 __entry->mcgstatus = m->mcgstatus;
38 __entry->bank = m->bank;
39 __entry->status = m->status;
40 __entry->addr = m->addr;
41 __entry->misc = m->misc;
42 __entry->ip = m->ip;
43 __entry->cs = m->cs;
44 __entry->tsc = m->tsc;
45 __entry->walltime = m->time;
46 __entry->cpu = m->extcpu;
47 __entry->cpuid = m->cpuid;
48 __entry->apicid = m->apicid;
49 __entry->socketid = m->socketid;
50 __entry->cpuvendor = m->cpuvendor;
51 ),
52
53 TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
54 __entry->cpu,
55 __entry->mcgcap, __entry->mcgstatus,
56 __entry->bank, __entry->status,
57 __entry->addr, __entry->misc,
58 __entry->cs, __entry->ip,
59 __entry->tsc,
60 __entry->cpuvendor, __entry->cpuid,
61 __entry->walltime,
62 __entry->socketid,
63 __entry->apicid)
64);
65
66#endif /* _TRACE_MCE_H */
67
68/* This part must be outside protection */
69#include <trace/define_trace.h>
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 84160fb18478..4b0f48ba16a6 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -51,7 +51,7 @@ TRACE_EVENT(module_free,
51 TP_printk("%s", __get_str(name)) 51 TP_printk("%s", __get_str(name))
52); 52);
53 53
54TRACE_EVENT(module_get, 54DECLARE_EVENT_CLASS(module_refcnt,
55 55
56 TP_PROTO(struct module *mod, unsigned long ip, int refcnt), 56 TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
57 57
@@ -73,26 +73,18 @@ TRACE_EVENT(module_get,
73 __get_str(name), (void *)__entry->ip, __entry->refcnt) 73 __get_str(name), (void *)__entry->ip, __entry->refcnt)
74); 74);
75 75
76TRACE_EVENT(module_put, 76DEFINE_EVENT(module_refcnt, module_get,
77 77
78 TP_PROTO(struct module *mod, unsigned long ip, int refcnt), 78 TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
79 79
80 TP_ARGS(mod, ip, refcnt), 80 TP_ARGS(mod, ip, refcnt)
81);
81 82
82 TP_STRUCT__entry( 83DEFINE_EVENT(module_refcnt, module_put,
83 __field( unsigned long, ip )
84 __field( int, refcnt )
85 __string( name, mod->name )
86 ),
87 84
88 TP_fast_assign( 85 TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
89 __entry->ip = ip;
90 __entry->refcnt = refcnt;
91 __assign_str(name, mod->name);
92 ),
93 86
94 TP_printk("%s call_site=%pf refcnt=%d", 87 TP_ARGS(mod, ip, refcnt)
95 __get_str(name), (void *)__entry->ip, __entry->refcnt)
96); 88);
97 89
98TRACE_EVENT(module_request, 90TRACE_EVENT(module_request,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index ea6d579261ad..c4efe9b8280d 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -16,9 +16,7 @@ enum {
16}; 16};
17#endif 17#endif
18 18
19 19DECLARE_EVENT_CLASS(power,
20
21TRACE_EVENT(power_start,
22 20
23 TP_PROTO(unsigned int type, unsigned int state), 21 TP_PROTO(unsigned int type, unsigned int state),
24 22
@@ -37,42 +35,36 @@ TRACE_EVENT(power_start,
37 TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) 35 TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
38); 36);
39 37
40TRACE_EVENT(power_end, 38DEFINE_EVENT(power, power_start,
41
42 TP_PROTO(int dummy),
43 39
44 TP_ARGS(dummy), 40 TP_PROTO(unsigned int type, unsigned int state),
45 41
46 TP_STRUCT__entry( 42 TP_ARGS(type, state)
47 __field( u64, dummy ) 43);
48 ),
49 44
50 TP_fast_assign( 45DEFINE_EVENT(power, power_frequency,
51 __entry->dummy = 0xffff;
52 ),
53 46
54 TP_printk("dummy=%lu", (unsigned long)__entry->dummy) 47 TP_PROTO(unsigned int type, unsigned int state),
55 48
49 TP_ARGS(type, state)
56); 50);
57 51
52TRACE_EVENT(power_end,
58 53
59TRACE_EVENT(power_frequency, 54 TP_PROTO(int dummy),
60
61 TP_PROTO(unsigned int type, unsigned int state),
62 55
63 TP_ARGS(type, state), 56 TP_ARGS(dummy),
64 57
65 TP_STRUCT__entry( 58 TP_STRUCT__entry(
66 __field( u64, type ) 59 __field( u64, dummy )
67 __field( u64, state )
68 ), 60 ),
69 61
70 TP_fast_assign( 62 TP_fast_assign(
71 __entry->type = type; 63 __entry->dummy = 0xffff;
72 __entry->state = state;
73 ), 64 ),
74 65
75 TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) 66 TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
67
76); 68);
77 69
78#endif /* _TRACE_POWER_H */ 70#endif /* _TRACE_POWER_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4069c43f4187..cfceb0b73e20 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop,
26 __entry->pid = t->pid; 26 __entry->pid = t->pid;
27 ), 27 ),
28 28
29 TP_printk("task %s:%d", __entry->comm, __entry->pid) 29 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
30); 30);
31 31
32/* 32/*
@@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret,
46 __entry->ret = ret; 46 __entry->ret = ret;
47 ), 47 ),
48 48
49 TP_printk("ret %d", __entry->ret) 49 TP_printk("ret=%d", __entry->ret)
50); 50);
51 51
52/* 52/*
@@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task,
73 __entry->prio = p->prio; 73 __entry->prio = p->prio;
74 ), 74 ),
75 75
76 TP_printk("task %s:%d [%d]", 76 TP_printk("comm=%s pid=%d prio=%d",
77 __entry->comm, __entry->pid, __entry->prio) 77 __entry->comm, __entry->pid, __entry->prio)
78); 78);
79 79
@@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task,
83 * (NOTE: the 'rq' argument is not used by generic trace events, 83 * (NOTE: the 'rq' argument is not used by generic trace events,
84 * but used by the latency tracer plugin. ) 84 * but used by the latency tracer plugin. )
85 */ 85 */
86TRACE_EVENT(sched_wakeup, 86DECLARE_EVENT_CLASS(sched_wakeup_template,
87 87
88 TP_PROTO(struct rq *rq, struct task_struct *p, int success), 88 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
89 89
@@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup,
94 __field( pid_t, pid ) 94 __field( pid_t, pid )
95 __field( int, prio ) 95 __field( int, prio )
96 __field( int, success ) 96 __field( int, success )
97 __field( int, cpu ) 97 __field( int, target_cpu )
98 ), 98 ),
99 99
100 TP_fast_assign( 100 TP_fast_assign(
@@ -102,46 +102,27 @@ TRACE_EVENT(sched_wakeup,
102 __entry->pid = p->pid; 102 __entry->pid = p->pid;
103 __entry->prio = p->prio; 103 __entry->prio = p->prio;
104 __entry->success = success; 104 __entry->success = success;
105 __entry->cpu = task_cpu(p); 105 __entry->target_cpu = task_cpu(p);
106 ), 106 ),
107 107
108 TP_printk("task %s:%d [%d] success=%d [%03d]", 108 TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
109 __entry->comm, __entry->pid, __entry->prio, 109 __entry->comm, __entry->pid, __entry->prio,
110 __entry->success, __entry->cpu) 110 __entry->success, __entry->target_cpu)
111); 111);
112 112
113DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
114 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
115 TP_ARGS(rq, p, success));
116
113/* 117/*
114 * Tracepoint for waking up a new task: 118 * Tracepoint for waking up a new task:
115 * 119 *
116 * (NOTE: the 'rq' argument is not used by generic trace events, 120 * (NOTE: the 'rq' argument is not used by generic trace events,
117 * but used by the latency tracer plugin. ) 121 * but used by the latency tracer plugin. )
118 */ 122 */
119TRACE_EVENT(sched_wakeup_new, 123DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
120 124 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
121 TP_PROTO(struct rq *rq, struct task_struct *p, int success), 125 TP_ARGS(rq, p, success));
122
123 TP_ARGS(rq, p, success),
124
125 TP_STRUCT__entry(
126 __array( char, comm, TASK_COMM_LEN )
127 __field( pid_t, pid )
128 __field( int, prio )
129 __field( int, success )
130 __field( int, cpu )
131 ),
132
133 TP_fast_assign(
134 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
135 __entry->pid = p->pid;
136 __entry->prio = p->prio;
137 __entry->success = success;
138 __entry->cpu = task_cpu(p);
139 ),
140
141 TP_printk("task %s:%d [%d] success=%d [%03d]",
142 __entry->comm, __entry->pid, __entry->prio,
143 __entry->success, __entry->cpu)
144);
145 126
146/* 127/*
147 * Tracepoint for task switches, performed by the scheduler: 128 * Tracepoint for task switches, performed by the scheduler:
@@ -176,7 +157,7 @@ TRACE_EVENT(sched_switch,
176 __entry->next_prio = next->prio; 157 __entry->next_prio = next->prio;
177 ), 158 ),
178 159
179 TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", 160 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
180 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 161 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
181 __entry->prev_state ? 162 __entry->prev_state ?
182 __print_flags(__entry->prev_state, "|", 163 __print_flags(__entry->prev_state, "|",
@@ -211,15 +192,12 @@ TRACE_EVENT(sched_migrate_task,
211 __entry->dest_cpu = dest_cpu; 192 __entry->dest_cpu = dest_cpu;
212 ), 193 ),
213 194
214 TP_printk("task %s:%d [%d] from: %d to: %d", 195 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
215 __entry->comm, __entry->pid, __entry->prio, 196 __entry->comm, __entry->pid, __entry->prio,
216 __entry->orig_cpu, __entry->dest_cpu) 197 __entry->orig_cpu, __entry->dest_cpu)
217); 198);
218 199
219/* 200DECLARE_EVENT_CLASS(sched_process_template,
220 * Tracepoint for freeing a task:
221 */
222TRACE_EVENT(sched_process_free,
223 201
224 TP_PROTO(struct task_struct *p), 202 TP_PROTO(struct task_struct *p),
225 203
@@ -237,34 +215,24 @@ TRACE_EVENT(sched_process_free,
237 __entry->prio = p->prio; 215 __entry->prio = p->prio;
238 ), 216 ),
239 217
240 TP_printk("task %s:%d [%d]", 218 TP_printk("comm=%s pid=%d prio=%d",
241 __entry->comm, __entry->pid, __entry->prio) 219 __entry->comm, __entry->pid, __entry->prio)
242); 220);
243 221
244/* 222/*
245 * Tracepoint for a task exiting: 223 * Tracepoint for freeing a task:
246 */ 224 */
247TRACE_EVENT(sched_process_exit, 225DEFINE_EVENT(sched_process_template, sched_process_free,
226 TP_PROTO(struct task_struct *p),
227 TP_ARGS(p));
228
248 229
249 TP_PROTO(struct task_struct *p), 230/*
250 231 * Tracepoint for a task exiting:
251 TP_ARGS(p), 232 */
252 233DEFINE_EVENT(sched_process_template, sched_process_exit,
253 TP_STRUCT__entry( 234 TP_PROTO(struct task_struct *p),
254 __array( char, comm, TASK_COMM_LEN ) 235 TP_ARGS(p));
255 __field( pid_t, pid )
256 __field( int, prio )
257 ),
258
259 TP_fast_assign(
260 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
261 __entry->pid = p->pid;
262 __entry->prio = p->prio;
263 ),
264
265 TP_printk("task %s:%d [%d]",
266 __entry->comm, __entry->pid, __entry->prio)
267);
268 236
269/* 237/*
270 * Tracepoint for a waiting task: 238 * Tracepoint for a waiting task:
@@ -287,7 +255,7 @@ TRACE_EVENT(sched_process_wait,
287 __entry->prio = current->prio; 255 __entry->prio = current->prio;
288 ), 256 ),
289 257
290 TP_printk("task %s:%d [%d]", 258 TP_printk("comm=%s pid=%d prio=%d",
291 __entry->comm, __entry->pid, __entry->prio) 259 __entry->comm, __entry->pid, __entry->prio)
292); 260);
293 261
@@ -314,46 +282,16 @@ TRACE_EVENT(sched_process_fork,
314 __entry->child_pid = child->pid; 282 __entry->child_pid = child->pid;
315 ), 283 ),
316 284
317 TP_printk("parent %s:%d child %s:%d", 285 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
318 __entry->parent_comm, __entry->parent_pid, 286 __entry->parent_comm, __entry->parent_pid,
319 __entry->child_comm, __entry->child_pid) 287 __entry->child_comm, __entry->child_pid)
320); 288);
321 289
322/* 290/*
323 * Tracepoint for sending a signal:
324 */
325TRACE_EVENT(sched_signal_send,
326
327 TP_PROTO(int sig, struct task_struct *p),
328
329 TP_ARGS(sig, p),
330
331 TP_STRUCT__entry(
332 __field( int, sig )
333 __array( char, comm, TASK_COMM_LEN )
334 __field( pid_t, pid )
335 ),
336
337 TP_fast_assign(
338 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
339 __entry->pid = p->pid;
340 __entry->sig = sig;
341 ),
342
343 TP_printk("sig: %d task %s:%d",
344 __entry->sig, __entry->comm, __entry->pid)
345);
346
347/*
348 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 291 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
349 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 292 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
350 */ 293 */
351 294DECLARE_EVENT_CLASS(sched_stat_template,
352/*
353 * Tracepoint for accounting wait time (time the task is runnable
354 * but not actually running due to scheduler contention).
355 */
356TRACE_EVENT(sched_stat_wait,
357 295
358 TP_PROTO(struct task_struct *tsk, u64 delay), 296 TP_PROTO(struct task_struct *tsk, u64 delay),
359 297
@@ -374,11 +312,36 @@ TRACE_EVENT(sched_stat_wait,
374 __perf_count(delay); 312 __perf_count(delay);
375 ), 313 ),
376 314
377 TP_printk("task: %s:%d wait: %Lu [ns]", 315 TP_printk("comm=%s pid=%d delay=%Lu [ns]",
378 __entry->comm, __entry->pid, 316 __entry->comm, __entry->pid,
379 (unsigned long long)__entry->delay) 317 (unsigned long long)__entry->delay)
380); 318);
381 319
320
321/*
322 * Tracepoint for accounting wait time (time the task is runnable
323 * but not actually running due to scheduler contention).
324 */
325DEFINE_EVENT(sched_stat_template, sched_stat_wait,
326 TP_PROTO(struct task_struct *tsk, u64 delay),
327 TP_ARGS(tsk, delay));
328
329/*
330 * Tracepoint for accounting sleep time (time the task is not runnable,
331 * including iowait, see below).
332 */
333DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
334 TP_PROTO(struct task_struct *tsk, u64 delay),
335 TP_ARGS(tsk, delay));
336
337/*
338 * Tracepoint for accounting iowait time (time the task is not runnable
339 * due to waiting on IO to complete).
340 */
341DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
342 TP_PROTO(struct task_struct *tsk, u64 delay),
343 TP_ARGS(tsk, delay));
344
382/* 345/*
383 * Tracepoint for accounting runtime (time the task is executing 346 * Tracepoint for accounting runtime (time the task is executing
384 * on a CPU). 347 * on a CPU).
@@ -406,72 +369,12 @@ TRACE_EVENT(sched_stat_runtime,
406 __perf_count(runtime); 369 __perf_count(runtime);
407 ), 370 ),
408 371
409 TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", 372 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
410 __entry->comm, __entry->pid, 373 __entry->comm, __entry->pid,
411 (unsigned long long)__entry->runtime, 374 (unsigned long long)__entry->runtime,
412 (unsigned long long)__entry->vruntime) 375 (unsigned long long)__entry->vruntime)
413); 376);
414 377
415/*
416 * Tracepoint for accounting sleep time (time the task is not runnable,
417 * including iowait, see below).
418 */
419TRACE_EVENT(sched_stat_sleep,
420
421 TP_PROTO(struct task_struct *tsk, u64 delay),
422
423 TP_ARGS(tsk, delay),
424
425 TP_STRUCT__entry(
426 __array( char, comm, TASK_COMM_LEN )
427 __field( pid_t, pid )
428 __field( u64, delay )
429 ),
430
431 TP_fast_assign(
432 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
433 __entry->pid = tsk->pid;
434 __entry->delay = delay;
435 )
436 TP_perf_assign(
437 __perf_count(delay);
438 ),
439
440 TP_printk("task: %s:%d sleep: %Lu [ns]",
441 __entry->comm, __entry->pid,
442 (unsigned long long)__entry->delay)
443);
444
445/*
446 * Tracepoint for accounting iowait time (time the task is not runnable
447 * due to waiting on IO to complete).
448 */
449TRACE_EVENT(sched_stat_iowait,
450
451 TP_PROTO(struct task_struct *tsk, u64 delay),
452
453 TP_ARGS(tsk, delay),
454
455 TP_STRUCT__entry(
456 __array( char, comm, TASK_COMM_LEN )
457 __field( pid_t, pid )
458 __field( u64, delay )
459 ),
460
461 TP_fast_assign(
462 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
463 __entry->pid = tsk->pid;
464 __entry->delay = delay;
465 )
466 TP_perf_assign(
467 __perf_count(delay);
468 ),
469
470 TP_printk("task: %s:%d iowait: %Lu [ns]",
471 __entry->comm, __entry->pid,
472 (unsigned long long)__entry->delay)
473);
474
475#endif /* _TRACE_SCHED_H */ 378#endif /* _TRACE_SCHED_H */
476 379
477/* This part must be outside protection */ 380/* This part must be outside protection */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
new file mode 100644
index 000000000000..a510b75ac304
--- /dev/null
+++ b/include/trace/events/signal.h
@@ -0,0 +1,173 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM signal
3
4#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_SIGNAL_H
6
7#include <linux/signal.h>
8#include <linux/sched.h>
9#include <linux/tracepoint.h>
10
11#define TP_STORE_SIGINFO(__entry, info) \
12 do { \
13 if (info == SEND_SIG_NOINFO) { \
14 __entry->errno = 0; \
15 __entry->code = SI_USER; \
16 } else if (info == SEND_SIG_PRIV) { \
17 __entry->errno = 0; \
18 __entry->code = SI_KERNEL; \
19 } else { \
20 __entry->errno = info->si_errno; \
21 __entry->code = info->si_code; \
22 } \
23 } while (0)
24
25/**
26 * signal_generate - called when a signal is generated
27 * @sig: signal number
28 * @info: pointer to struct siginfo
29 * @task: pointer to struct task_struct
30 *
31 * Current process sends a 'sig' signal to 'task' process with
32 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
33 * 'info' is not a pointer and you can't access its field. Instead,
34 * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV
35 * means that si_code is SI_KERNEL.
36 */
37TRACE_EVENT(signal_generate,
38
39 TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
40
41 TP_ARGS(sig, info, task),
42
43 TP_STRUCT__entry(
44 __field( int, sig )
45 __field( int, errno )
46 __field( int, code )
47 __array( char, comm, TASK_COMM_LEN )
48 __field( pid_t, pid )
49 ),
50
51 TP_fast_assign(
52 __entry->sig = sig;
53 TP_STORE_SIGINFO(__entry, info);
54 memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
55 __entry->pid = task->pid;
56 ),
57
58 TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
59 __entry->sig, __entry->errno, __entry->code,
60 __entry->comm, __entry->pid)
61);
62
63/**
64 * signal_deliver - called when a signal is delivered
65 * @sig: signal number
66 * @info: pointer to struct siginfo
67 * @ka: pointer to struct k_sigaction
68 *
69 * A 'sig' signal is delivered to current process with 'info' siginfo,
70 * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or
71 * SIG_DFL.
72 * Note that some signals reported by signal_generate tracepoint can be
73 * lost, ignored or modified (by debugger) before hitting this tracepoint.
74 * This means, this can show which signals are actually delivered, but
75 * matching generated signals and delivered signals may not be correct.
76 */
77TRACE_EVENT(signal_deliver,
78
79 TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka),
80
81 TP_ARGS(sig, info, ka),
82
83 TP_STRUCT__entry(
84 __field( int, sig )
85 __field( int, errno )
86 __field( int, code )
87 __field( unsigned long, sa_handler )
88 __field( unsigned long, sa_flags )
89 ),
90
91 TP_fast_assign(
92 __entry->sig = sig;
93 TP_STORE_SIGINFO(__entry, info);
94 __entry->sa_handler = (unsigned long)ka->sa.sa_handler;
95 __entry->sa_flags = ka->sa.sa_flags;
96 ),
97
98 TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx",
99 __entry->sig, __entry->errno, __entry->code,
100 __entry->sa_handler, __entry->sa_flags)
101);
102
103/**
104 * signal_overflow_fail - called when signal queue is overflow
105 * @sig: signal number
106 * @group: signal to process group or not (bool)
107 * @info: pointer to struct siginfo
108 *
109 * Kernel fails to generate 'sig' signal with 'info' siginfo, because
110 * siginfo queue is overflow, and the signal is dropped.
111 * 'group' is not 0 if the signal will be sent to a process group.
112 * 'sig' is always one of RT signals.
113 */
114TRACE_EVENT(signal_overflow_fail,
115
116 TP_PROTO(int sig, int group, struct siginfo *info),
117
118 TP_ARGS(sig, group, info),
119
120 TP_STRUCT__entry(
121 __field( int, sig )
122 __field( int, group )
123 __field( int, errno )
124 __field( int, code )
125 ),
126
127 TP_fast_assign(
128 __entry->sig = sig;
129 __entry->group = group;
130 TP_STORE_SIGINFO(__entry, info);
131 ),
132
133 TP_printk("sig=%d group=%d errno=%d code=%d",
134 __entry->sig, __entry->group, __entry->errno, __entry->code)
135);
136
137/**
138 * signal_lose_info - called when siginfo is lost
139 * @sig: signal number
140 * @group: signal to process group or not (bool)
141 * @info: pointer to struct siginfo
142 *
143 * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
144 * queue is overflow.
145 * 'group' is not 0 if the signal will be sent to a process group.
146 * 'sig' is always one of non-RT signals.
147 */
148TRACE_EVENT(signal_lose_info,
149
150 TP_PROTO(int sig, int group, struct siginfo *info),
151
152 TP_ARGS(sig, group, info),
153
154 TP_STRUCT__entry(
155 __field( int, sig )
156 __field( int, group )
157 __field( int, errno )
158 __field( int, code )
159 ),
160
161 TP_fast_assign(
162 __entry->sig = sig;
163 __entry->group = group;
164 TP_STORE_SIGINFO(__entry, info);
165 ),
166
167 TP_printk("sig=%d group=%d errno=%d code=%d",
168 __entry->sig, __entry->group, __entry->errno, __entry->code)
169);
170#endif /* _TRACE_SIGNAL_H */
171
172/* This part must be outside protection */
173#include <trace/define_trace.h>
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1844c48d640e..e5ce87a0498d 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -26,7 +26,7 @@ TRACE_EVENT(timer_init,
26 __entry->timer = timer; 26 __entry->timer = timer;
27 ), 27 ),
28 28
29 TP_printk("timer %p", __entry->timer) 29 TP_printk("timer=%p", __entry->timer)
30); 30);
31 31
32/** 32/**
@@ -54,7 +54,7 @@ TRACE_EVENT(timer_start,
54 __entry->now = jiffies; 54 __entry->now = jiffies;
55 ), 55 ),
56 56
57 TP_printk("timer %p: func %pf, expires %lu, timeout %ld", 57 TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
58 __entry->timer, __entry->function, __entry->expires, 58 __entry->timer, __entry->function, __entry->expires,
59 (long)__entry->expires - __entry->now) 59 (long)__entry->expires - __entry->now)
60); 60);
@@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry,
81 __entry->now = jiffies; 81 __entry->now = jiffies;
82 ), 82 ),
83 83
84 TP_printk("timer %p: now %lu", __entry->timer, __entry->now) 84 TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
85); 85);
86 86
87/** 87/**
@@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit,
108 __entry->timer = timer; 108 __entry->timer = timer;
109 ), 109 ),
110 110
111 TP_printk("timer %p", __entry->timer) 111 TP_printk("timer=%p", __entry->timer)
112); 112);
113 113
114/** 114/**
@@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel,
129 __entry->timer = timer; 129 __entry->timer = timer;
130 ), 130 ),
131 131
132 TP_printk("timer %p", __entry->timer) 132 TP_printk("timer=%p", __entry->timer)
133); 133);
134 134
135/** 135/**
@@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel,
140 */ 140 */
141TRACE_EVENT(hrtimer_init, 141TRACE_EVENT(hrtimer_init,
142 142
143 TP_PROTO(struct hrtimer *timer, clockid_t clockid, 143 TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
144 enum hrtimer_mode mode), 144 enum hrtimer_mode mode),
145 145
146 TP_ARGS(timer, clockid, mode), 146 TP_ARGS(hrtimer, clockid, mode),
147 147
148 TP_STRUCT__entry( 148 TP_STRUCT__entry(
149 __field( void *, timer ) 149 __field( void *, hrtimer )
150 __field( clockid_t, clockid ) 150 __field( clockid_t, clockid )
151 __field( enum hrtimer_mode, mode ) 151 __field( enum hrtimer_mode, mode )
152 ), 152 ),
153 153
154 TP_fast_assign( 154 TP_fast_assign(
155 __entry->timer = timer; 155 __entry->hrtimer = hrtimer;
156 __entry->clockid = clockid; 156 __entry->clockid = clockid;
157 __entry->mode = mode; 157 __entry->mode = mode;
158 ), 158 ),
159 159
160 TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, 160 TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
161 __entry->clockid == CLOCK_REALTIME ? 161 __entry->clockid == CLOCK_REALTIME ?
162 "CLOCK_REALTIME" : "CLOCK_MONOTONIC", 162 "CLOCK_REALTIME" : "CLOCK_MONOTONIC",
163 __entry->mode == HRTIMER_MODE_ABS ? 163 __entry->mode == HRTIMER_MODE_ABS ?
@@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init,
170 */ 170 */
171TRACE_EVENT(hrtimer_start, 171TRACE_EVENT(hrtimer_start,
172 172
173 TP_PROTO(struct hrtimer *timer), 173 TP_PROTO(struct hrtimer *hrtimer),
174 174
175 TP_ARGS(timer), 175 TP_ARGS(hrtimer),
176 176
177 TP_STRUCT__entry( 177 TP_STRUCT__entry(
178 __field( void *, timer ) 178 __field( void *, hrtimer )
179 __field( void *, function ) 179 __field( void *, function )
180 __field( s64, expires ) 180 __field( s64, expires )
181 __field( s64, softexpires ) 181 __field( s64, softexpires )
182 ), 182 ),
183 183
184 TP_fast_assign( 184 TP_fast_assign(
185 __entry->timer = timer; 185 __entry->hrtimer = hrtimer;
186 __entry->function = timer->function; 186 __entry->function = hrtimer->function;
187 __entry->expires = hrtimer_get_expires(timer).tv64; 187 __entry->expires = hrtimer_get_expires(hrtimer).tv64;
188 __entry->softexpires = hrtimer_get_softexpires(timer).tv64; 188 __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64;
189 ), 189 ),
190 190
191 TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", 191 TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
192 __entry->timer, __entry->function, 192 __entry->hrtimer, __entry->function,
193 (unsigned long long)ktime_to_ns((ktime_t) { 193 (unsigned long long)ktime_to_ns((ktime_t) {
194 .tv64 = __entry->expires }), 194 .tv64 = __entry->expires }),
195 (unsigned long long)ktime_to_ns((ktime_t) { 195 (unsigned long long)ktime_to_ns((ktime_t) {
@@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start,
206 */ 206 */
207TRACE_EVENT(hrtimer_expire_entry, 207TRACE_EVENT(hrtimer_expire_entry,
208 208
209 TP_PROTO(struct hrtimer *timer, ktime_t *now), 209 TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
210 210
211 TP_ARGS(timer, now), 211 TP_ARGS(hrtimer, now),
212 212
213 TP_STRUCT__entry( 213 TP_STRUCT__entry(
214 __field( void *, timer ) 214 __field( void *, hrtimer )
215 __field( s64, now ) 215 __field( s64, now )
216 ), 216 ),
217 217
218 TP_fast_assign( 218 TP_fast_assign(
219 __entry->timer = timer; 219 __entry->hrtimer = hrtimer;
220 __entry->now = now->tv64; 220 __entry->now = now->tv64;
221 ), 221 ),
222 222
223 TP_printk("hrtimer %p, now %llu", __entry->timer, 223 TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
224 (unsigned long long)ktime_to_ns((ktime_t) { 224 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
225 .tv64 = __entry->now }))
226 ); 225 );
227 226
228/** 227/**
@@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry,
234 */ 233 */
235TRACE_EVENT(hrtimer_expire_exit, 234TRACE_EVENT(hrtimer_expire_exit,
236 235
237 TP_PROTO(struct hrtimer *timer), 236 TP_PROTO(struct hrtimer *hrtimer),
238 237
239 TP_ARGS(timer), 238 TP_ARGS(hrtimer),
240 239
241 TP_STRUCT__entry( 240 TP_STRUCT__entry(
242 __field( void *, timer ) 241 __field( void *, hrtimer )
243 ), 242 ),
244 243
245 TP_fast_assign( 244 TP_fast_assign(
246 __entry->timer = timer; 245 __entry->hrtimer = hrtimer;
247 ), 246 ),
248 247
249 TP_printk("hrtimer %p", __entry->timer) 248 TP_printk("hrtimer=%p", __entry->hrtimer)
250); 249);
251 250
252/** 251/**
253 * hrtimer_cancel - called when the hrtimer is canceled 252 * hrtimer_cancel - called when the hrtimer is canceled
254 * @timer: pointer to struct hrtimer 253 * @hrtimer: pointer to struct hrtimer
255 */ 254 */
256TRACE_EVENT(hrtimer_cancel, 255TRACE_EVENT(hrtimer_cancel,
257 256
258 TP_PROTO(struct hrtimer *timer), 257 TP_PROTO(struct hrtimer *hrtimer),
259 258
260 TP_ARGS(timer), 259 TP_ARGS(hrtimer),
261 260
262 TP_STRUCT__entry( 261 TP_STRUCT__entry(
263 __field( void *, timer ) 262 __field( void *, hrtimer )
264 ), 263 ),
265 264
266 TP_fast_assign( 265 TP_fast_assign(
267 __entry->timer = timer; 266 __entry->hrtimer = hrtimer;
268 ), 267 ),
269 268
270 TP_printk("hrtimer %p", __entry->timer) 269 TP_printk("hrtimer=%p", __entry->hrtimer)
271); 270);
272 271
273/** 272/**
@@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state,
302 __entry->interval_usec = value->it_interval.tv_usec; 301 __entry->interval_usec = value->it_interval.tv_usec;
303 ), 302 ),
304 303
305 TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", 304 TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
306 __entry->which, __entry->expires, 305 __entry->which, __entry->expires,
307 __entry->value_sec, __entry->value_usec, 306 __entry->value_sec, __entry->value_usec,
308 __entry->interval_sec, __entry->interval_usec) 307 __entry->interval_sec, __entry->interval_usec)
@@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire,
332 __entry->pid = pid_nr(pid); 331 __entry->pid = pid_nr(pid);
333 ), 332 ),
334 333
335 TP_printk("which %d, pid %d, now %lu", __entry->which, 334 TP_printk("which=%d pid=%d now=%lu", __entry->which,
336 (int) __entry->pid, __entry->now) 335 (int) __entry->pid, __entry->now)
337); 336);
338 337
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index e4612dbd7ba6..d6c974474e70 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -8,7 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/tracepoint.h> 9#include <linux/tracepoint.h>
10 10
11TRACE_EVENT(workqueue_insertion, 11DECLARE_EVENT_CLASS(workqueue,
12 12
13 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), 13 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
14 14
@@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion,
30 __entry->thread_pid, __entry->func) 30 __entry->thread_pid, __entry->func)
31); 31);
32 32
33TRACE_EVENT(workqueue_execution, 33DEFINE_EVENT(workqueue, workqueue_insertion,
34 34
35 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), 35 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
36 36
37 TP_ARGS(wq_thread, work), 37 TP_ARGS(wq_thread, work)
38);
38 39
39 TP_STRUCT__entry( 40DEFINE_EVENT(workqueue, workqueue_execution,
40 __array(char, thread_comm, TASK_COMM_LEN)
41 __field(pid_t, thread_pid)
42 __field(work_func_t, func)
43 ),
44 41
45 TP_fast_assign( 42 TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
46 memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
47 __entry->thread_pid = wq_thread->pid;
48 __entry->func = work->func;
49 ),
50 43
51 TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, 44 TP_ARGS(wq_thread, work)
52 __entry->thread_pid, __entry->func)
53); 45);
54 46
55/* Trace the creation of one workqueue thread on a cpu */ 47/* Trace the creation of one workqueue thread on a cpu */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index dacb8ef67000..d1b3de9c1a71 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,6 +18,26 @@
18 18
19#include <linux/ftrace_event.h> 19#include <linux/ftrace_event.h>
20 20
21/*
22 * DECLARE_EVENT_CLASS can be used to add a generic function
23 * handlers for events. That is, if all events have the same
24 * parameters and just have distinct trace points.
25 * Each tracepoint can be defined with DEFINE_EVENT and that
26 * will map the DECLARE_EVENT_CLASS to the tracepoint.
27 *
28 * TRACE_EVENT is a one to one mapping between tracepoint and template.
29 */
30#undef TRACE_EVENT
31#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
32 DECLARE_EVENT_CLASS(name, \
33 PARAMS(proto), \
34 PARAMS(args), \
35 PARAMS(tstruct), \
36 PARAMS(assign), \
37 PARAMS(print)); \
38 DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
39
40
21#undef __field 41#undef __field
22#define __field(type, item) type item; 42#define __field(type, item) type item;
23 43
@@ -36,15 +56,21 @@
36#undef TP_STRUCT__entry 56#undef TP_STRUCT__entry
37#define TP_STRUCT__entry(args...) args 57#define TP_STRUCT__entry(args...) args
38 58
39#undef TRACE_EVENT 59#undef DECLARE_EVENT_CLASS
40#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ 60#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
41 struct ftrace_raw_##name { \ 61 struct ftrace_raw_##name { \
42 struct trace_entry ent; \ 62 struct trace_entry ent; \
43 tstruct \ 63 tstruct \
44 char __data[0]; \ 64 char __data[0]; \
45 }; \ 65 };
66#undef DEFINE_EVENT
67#define DEFINE_EVENT(template, name, proto, args) \
46 static struct ftrace_event_call event_##name 68 static struct ftrace_event_call event_##name
47 69
70#undef DEFINE_EVENT_PRINT
71#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
72 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
73
48#undef __cpparg 74#undef __cpparg
49#define __cpparg(arg...) arg 75#define __cpparg(arg...) arg
50 76
@@ -89,12 +115,19 @@
89#undef __string 115#undef __string
90#define __string(item, src) __dynamic_array(char, item, -1) 116#define __string(item, src) __dynamic_array(char, item, -1)
91 117
92#undef TRACE_EVENT 118#undef DECLARE_EVENT_CLASS
93#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 119#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
94 struct ftrace_data_offsets_##call { \ 120 struct ftrace_data_offsets_##call { \
95 tstruct; \ 121 tstruct; \
96 }; 122 };
97 123
124#undef DEFINE_EVENT
125#define DEFINE_EVENT(template, name, proto, args)
126
127#undef DEFINE_EVENT_PRINT
128#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
129 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
130
98#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 131#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
99 132
100/* 133/*
@@ -120,9 +153,10 @@
120#undef __field 153#undef __field
121#define __field(type, item) \ 154#define __field(type, item) \
122 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 155 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
123 "offset:%u;\tsize:%u;\n", \ 156 "offset:%u;\tsize:%u;\tsigned:%u;\n", \
124 (unsigned int)offsetof(typeof(field), item), \ 157 (unsigned int)offsetof(typeof(field), item), \
125 (unsigned int)sizeof(field.item)); \ 158 (unsigned int)sizeof(field.item), \
159 (unsigned int)is_signed_type(type)); \
126 if (!ret) \ 160 if (!ret) \
127 return 0; 161 return 0;
128 162
@@ -132,19 +166,21 @@
132#undef __array 166#undef __array
133#define __array(type, item, len) \ 167#define __array(type, item, len) \
134 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 168 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
135 "offset:%u;\tsize:%u;\n", \ 169 "offset:%u;\tsize:%u;\tsigned:%u;\n", \
136 (unsigned int)offsetof(typeof(field), item), \ 170 (unsigned int)offsetof(typeof(field), item), \
137 (unsigned int)sizeof(field.item)); \ 171 (unsigned int)sizeof(field.item), \
172 (unsigned int)is_signed_type(type)); \
138 if (!ret) \ 173 if (!ret) \
139 return 0; 174 return 0;
140 175
141#undef __dynamic_array 176#undef __dynamic_array
142#define __dynamic_array(type, item, len) \ 177#define __dynamic_array(type, item, len) \
143 ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ 178 ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
144 "offset:%u;\tsize:%u;\n", \ 179 "offset:%u;\tsize:%u;\tsigned:%u;\n", \
145 (unsigned int)offsetof(typeof(field), \ 180 (unsigned int)offsetof(typeof(field), \
146 __data_loc_##item), \ 181 __data_loc_##item), \
147 (unsigned int)sizeof(field.__data_loc_##item)); \ 182 (unsigned int)sizeof(field.__data_loc_##item), \
183 (unsigned int)is_signed_type(type)); \
148 if (!ret) \ 184 if (!ret) \
149 return 0; 185 return 0;
150 186
@@ -167,17 +203,50 @@
167#undef TP_perf_assign 203#undef TP_perf_assign
168#define TP_perf_assign(args...) 204#define TP_perf_assign(args...)
169 205
170#undef TRACE_EVENT 206#undef DECLARE_EVENT_CLASS
171#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ 207#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
172static int \ 208static int \
173ftrace_format_##call(struct ftrace_event_call *unused, \ 209ftrace_format_setup_##call(struct ftrace_event_call *unused, \
174 struct trace_seq *s) \ 210 struct trace_seq *s) \
175{ \ 211{ \
176 struct ftrace_raw_##call field __attribute__((unused)); \ 212 struct ftrace_raw_##call field __attribute__((unused)); \
177 int ret = 0; \ 213 int ret = 0; \
178 \ 214 \
179 tstruct; \ 215 tstruct; \
180 \ 216 \
217 return ret; \
218} \
219 \
220static int \
221ftrace_format_##call(struct ftrace_event_call *unused, \
222 struct trace_seq *s) \
223{ \
224 int ret = 0; \
225 \
226 ret = ftrace_format_setup_##call(unused, s); \
227 if (!ret) \
228 return ret; \
229 \
230 ret = trace_seq_printf(s, "\nprint fmt: " print); \
231 \
232 return ret; \
233}
234
235#undef DEFINE_EVENT
236#define DEFINE_EVENT(template, name, proto, args)
237
238#undef DEFINE_EVENT_PRINT
239#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
240static int \
241ftrace_format_##name(struct ftrace_event_call *unused, \
242 struct trace_seq *s) \
243{ \
244 int ret = 0; \
245 \
246 ret = ftrace_format_setup_##template(unused, s); \
247 if (!ret) \
248 return ret; \
249 \
181 trace_seq_printf(s, "\nprint fmt: " print); \ 250 trace_seq_printf(s, "\nprint fmt: " print); \
182 \ 251 \
183 return ret; \ 252 return ret; \
@@ -252,10 +321,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \
252 ftrace_print_symbols_seq(p, value, symbols); \ 321 ftrace_print_symbols_seq(p, value, symbols); \
253 }) 322 })
254 323
255#undef TRACE_EVENT 324#undef DECLARE_EVENT_CLASS
256#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 325#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
257static enum print_line_t \ 326static enum print_line_t \
258ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ 327ftrace_raw_output_id_##call(int event_id, const char *name, \
328 struct trace_iterator *iter, int flags) \
259{ \ 329{ \
260 struct trace_seq *s = &iter->seq; \ 330 struct trace_seq *s = &iter->seq; \
261 struct ftrace_raw_##call *field; \ 331 struct ftrace_raw_##call *field; \
@@ -265,6 +335,47 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
265 \ 335 \
266 entry = iter->ent; \ 336 entry = iter->ent; \
267 \ 337 \
338 if (entry->type != event_id) { \
339 WARN_ON_ONCE(1); \
340 return TRACE_TYPE_UNHANDLED; \
341 } \
342 \
343 field = (typeof(field))entry; \
344 \
345 p = &get_cpu_var(ftrace_event_seq); \
346 trace_seq_init(p); \
347 ret = trace_seq_printf(s, "%s: ", name); \
348 if (ret) \
349 ret = trace_seq_printf(s, print); \
350 put_cpu(); \
351 if (!ret) \
352 return TRACE_TYPE_PARTIAL_LINE; \
353 \
354 return TRACE_TYPE_HANDLED; \
355}
356
357#undef DEFINE_EVENT
358#define DEFINE_EVENT(template, name, proto, args) \
359static enum print_line_t \
360ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \
361{ \
362 return ftrace_raw_output_id_##template(event_##name.id, \
363 #name, iter, flags); \
364}
365
366#undef DEFINE_EVENT_PRINT
367#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \
368static enum print_line_t \
369ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
370{ \
371 struct trace_seq *s = &iter->seq; \
372 struct ftrace_raw_##template *field; \
373 struct trace_entry *entry; \
374 struct trace_seq *p; \
375 int ret; \
376 \
377 entry = iter->ent; \
378 \
268 if (entry->type != event_##call.id) { \ 379 if (entry->type != event_##call.id) { \
269 WARN_ON_ONCE(1); \ 380 WARN_ON_ONCE(1); \
270 return TRACE_TYPE_UNHANDLED; \ 381 return TRACE_TYPE_UNHANDLED; \
@@ -274,14 +385,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
274 \ 385 \
275 p = &get_cpu_var(ftrace_event_seq); \ 386 p = &get_cpu_var(ftrace_event_seq); \
276 trace_seq_init(p); \ 387 trace_seq_init(p); \
277 ret = trace_seq_printf(s, #call ": " print); \ 388 ret = trace_seq_printf(s, "%s: ", #call); \
389 if (ret) \
390 ret = trace_seq_printf(s, print); \
278 put_cpu(); \ 391 put_cpu(); \
279 if (!ret) \ 392 if (!ret) \
280 return TRACE_TYPE_PARTIAL_LINE; \ 393 return TRACE_TYPE_PARTIAL_LINE; \
281 \ 394 \
282 return TRACE_TYPE_HANDLED; \ 395 return TRACE_TYPE_HANDLED; \
283} 396}
284 397
285#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 398#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
286 399
287#undef __field_ext 400#undef __field_ext
@@ -315,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
315#undef __string 428#undef __string
316#define __string(item, src) __dynamic_array(char, item, -1) 429#define __string(item, src) __dynamic_array(char, item, -1)
317 430
318#undef TRACE_EVENT 431#undef DECLARE_EVENT_CLASS
319#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ 432#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
320static int \ 433static int \
321ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ 434ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
322{ \ 435{ \
@@ -332,6 +445,13 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
332 return ret; \ 445 return ret; \
333} 446}
334 447
448#undef DEFINE_EVENT
449#define DEFINE_EVENT(template, name, proto, args)
450
451#undef DEFINE_EVENT_PRINT
452#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
453 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
454
335#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 455#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
336 456
337/* 457/*
@@ -358,10 +478,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
358 __data_size += (len) * sizeof(type); 478 __data_size += (len) * sizeof(type);
359 479
360#undef __string 480#undef __string
361#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ 481#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)
362 482
363#undef TRACE_EVENT 483#undef DECLARE_EVENT_CLASS
364#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 484#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
365static inline int ftrace_get_offsets_##call( \ 485static inline int ftrace_get_offsets_##call( \
366 struct ftrace_data_offsets_##call *__data_offsets, proto) \ 486 struct ftrace_data_offsets_##call *__data_offsets, proto) \
367{ \ 487{ \
@@ -373,6 +493,13 @@ static inline int ftrace_get_offsets_##call( \
373 return __data_size; \ 493 return __data_size; \
374} 494}
375 495
496#undef DEFINE_EVENT
497#define DEFINE_EVENT(template, name, proto, args)
498
499#undef DEFINE_EVENT_PRINT
500#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
501 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
502
376#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 503#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
377 504
378#ifdef CONFIG_EVENT_PROFILE 505#ifdef CONFIG_EVENT_PROFILE
@@ -394,21 +521,28 @@ static inline int ftrace_get_offsets_##call( \
394 * 521 *
395 */ 522 */
396 523
397#undef TRACE_EVENT 524#undef DECLARE_EVENT_CLASS
398#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 525#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
526
527#undef DEFINE_EVENT
528#define DEFINE_EVENT(template, name, proto, args) \
399 \ 529 \
400static void ftrace_profile_##call(proto); \ 530static void ftrace_profile_##name(proto); \
401 \ 531 \
402static int ftrace_profile_enable_##call(void) \ 532static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\
403{ \ 533{ \
404 return register_trace_##call(ftrace_profile_##call); \ 534 return register_trace_##name(ftrace_profile_##name); \
405} \ 535} \
406 \ 536 \
407static void ftrace_profile_disable_##call(void) \ 537static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
408{ \ 538{ \
409 unregister_trace_##call(ftrace_profile_##call); \ 539 unregister_trace_##name(ftrace_profile_##name); \
410} 540}
411 541
542#undef DEFINE_EVENT_PRINT
543#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
544 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
545
412#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 546#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
413 547
414#endif 548#endif
@@ -423,7 +557,7 @@ static void ftrace_profile_disable_##call(void) \
423 * event_trace_printk(_RET_IP_, "<call>: " <fmt>); 557 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
424 * } 558 * }
425 * 559 *
426 * static int ftrace_reg_event_<call>(void) 560 * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused)
427 * { 561 * {
428 * int ret; 562 * int ret;
429 * 563 *
@@ -434,7 +568,7 @@ static void ftrace_profile_disable_##call(void) \
434 * return ret; 568 * return ret;
435 * } 569 * }
436 * 570 *
437 * static void ftrace_unreg_event_<call>(void) 571 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
438 * { 572 * {
439 * unregister_trace_<call>(ftrace_event_<call>); 573 * unregister_trace_<call>(ftrace_event_<call>);
440 * } 574 * }
@@ -469,7 +603,7 @@ static void ftrace_profile_disable_##call(void) \
469 * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); 603 * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
470 * } 604 * }
471 * 605 *
472 * static int ftrace_raw_reg_event_<call>(void) 606 * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
473 * { 607 * {
474 * int ret; 608 * int ret;
475 * 609 *
@@ -480,7 +614,7 @@ static void ftrace_profile_disable_##call(void) \
480 * return ret; 614 * return ret;
481 * } 615 * }
482 * 616 *
483 * static void ftrace_unreg_event_<call>(void) 617 * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
484 * { 618 * {
485 * unregister_trace_<call>(ftrace_raw_event_<call>); 619 * unregister_trace_<call>(ftrace_raw_event_<call>);
486 * } 620 * }
@@ -489,7 +623,7 @@ static void ftrace_profile_disable_##call(void) \
489 * .trace = ftrace_raw_output_<call>, <-- stage 2 623 * .trace = ftrace_raw_output_<call>, <-- stage 2
490 * }; 624 * };
491 * 625 *
492 * static int ftrace_raw_init_event_<call>(void) 626 * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
493 * { 627 * {
494 * int id; 628 * int id;
495 * 629 *
@@ -547,15 +681,13 @@ static void ftrace_profile_disable_##call(void) \
547#define __assign_str(dst, src) \ 681#define __assign_str(dst, src) \
548 strcpy(__get_str(dst), src); 682 strcpy(__get_str(dst), src);
549 683
550#undef TRACE_EVENT 684#undef DECLARE_EVENT_CLASS
551#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 685#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
552 \ 686 \
553static struct ftrace_event_call event_##call; \ 687static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
554 \ 688 proto) \
555static void ftrace_raw_event_##call(proto) \
556{ \ 689{ \
557 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 690 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
558 struct ftrace_event_call *event_call = &event_##call; \
559 struct ring_buffer_event *event; \ 691 struct ring_buffer_event *event; \
560 struct ftrace_raw_##call *entry; \ 692 struct ftrace_raw_##call *entry; \
561 struct ring_buffer *buffer; \ 693 struct ring_buffer *buffer; \
@@ -569,7 +701,7 @@ static void ftrace_raw_event_##call(proto) \
569 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 701 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
570 \ 702 \
571 event = trace_current_buffer_lock_reserve(&buffer, \ 703 event = trace_current_buffer_lock_reserve(&buffer, \
572 event_##call.id, \ 704 event_call->id, \
573 sizeof(*entry) + __data_size, \ 705 sizeof(*entry) + __data_size, \
574 irq_flags, pc); \ 706 irq_flags, pc); \
575 if (!event) \ 707 if (!event) \
@@ -584,9 +716,17 @@ static void ftrace_raw_event_##call(proto) \
584 if (!filter_current_check_discard(buffer, event_call, entry, event)) \ 716 if (!filter_current_check_discard(buffer, event_call, entry, event)) \
585 trace_nowake_buffer_unlock_commit(buffer, \ 717 trace_nowake_buffer_unlock_commit(buffer, \
586 event, irq_flags, pc); \ 718 event, irq_flags, pc); \
719}
720
721#undef DEFINE_EVENT
722#define DEFINE_EVENT(template, call, proto, args) \
723 \
724static void ftrace_raw_event_##call(proto) \
725{ \
726 ftrace_raw_event_id_##template(&event_##call, args); \
587} \ 727} \
588 \ 728 \
589static int ftrace_raw_reg_event_##call(void *ptr) \ 729static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\
590{ \ 730{ \
591 int ret; \ 731 int ret; \
592 \ 732 \
@@ -597,7 +737,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \
597 return ret; \ 737 return ret; \
598} \ 738} \
599 \ 739 \
600static void ftrace_raw_unreg_event_##call(void *ptr) \ 740static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
601{ \ 741{ \
602 unregister_trace_##call(ftrace_raw_event_##call); \ 742 unregister_trace_##call(ftrace_raw_event_##call); \
603} \ 743} \
@@ -606,7 +746,7 @@ static struct trace_event ftrace_event_type_##call = { \
606 .trace = ftrace_raw_output_##call, \ 746 .trace = ftrace_raw_output_##call, \
607}; \ 747}; \
608 \ 748 \
609static int ftrace_raw_init_event_##call(void) \ 749static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
610{ \ 750{ \
611 int id; \ 751 int id; \
612 \ 752 \
@@ -616,7 +756,36 @@ static int ftrace_raw_init_event_##call(void) \
616 event_##call.id = id; \ 756 event_##call.id = id; \
617 INIT_LIST_HEAD(&event_##call.fields); \ 757 INIT_LIST_HEAD(&event_##call.fields); \
618 return 0; \ 758 return 0; \
619} \ 759}
760
761#undef DEFINE_EVENT_PRINT
762#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
763 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
764
765#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
766
767#undef DECLARE_EVENT_CLASS
768#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
769
770#undef DEFINE_EVENT
771#define DEFINE_EVENT(template, call, proto, args) \
772 \
773static struct ftrace_event_call __used \
774__attribute__((__aligned__(4))) \
775__attribute__((section("_ftrace_events"))) event_##call = { \
776 .name = #call, \
777 .system = __stringify(TRACE_SYSTEM), \
778 .event = &ftrace_event_type_##call, \
779 .raw_init = ftrace_raw_init_event_##call, \
780 .regfunc = ftrace_raw_reg_event_##call, \
781 .unregfunc = ftrace_raw_unreg_event_##call, \
782 .show_format = ftrace_format_##template, \
783 .define_fields = ftrace_define_fields_##template, \
784 _TRACE_PROFILE_INIT(call) \
785}
786
787#undef DEFINE_EVENT_PRINT
788#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \
620 \ 789 \
621static struct ftrace_event_call __used \ 790static struct ftrace_event_call __used \
622__attribute__((__aligned__(4))) \ 791__attribute__((__aligned__(4))) \
@@ -628,7 +797,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
628 .regfunc = ftrace_raw_reg_event_##call, \ 797 .regfunc = ftrace_raw_reg_event_##call, \
629 .unregfunc = ftrace_raw_unreg_event_##call, \ 798 .unregfunc = ftrace_raw_unreg_event_##call, \
630 .show_format = ftrace_format_##call, \ 799 .show_format = ftrace_format_##call, \
631 .define_fields = ftrace_define_fields_##call, \ 800 .define_fields = ftrace_define_fields_##template, \
632 _TRACE_PROFILE_INIT(call) \ 801 _TRACE_PROFILE_INIT(call) \
633} 802}
634 803
@@ -646,6 +815,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
646 * struct ftrace_event_call *event_call = &event_<call>; 815 * struct ftrace_event_call *event_call = &event_<call>;
647 * extern void perf_tp_event(int, u64, u64, void *, int); 816 * extern void perf_tp_event(int, u64, u64, void *, int);
648 * struct ftrace_raw_##call *entry; 817 * struct ftrace_raw_##call *entry;
818 * struct perf_trace_buf *trace_buf;
649 * u64 __addr = 0, __count = 1; 819 * u64 __addr = 0, __count = 1;
650 * unsigned long irq_flags; 820 * unsigned long irq_flags;
651 * struct trace_entry *ent; 821 * struct trace_entry *ent;
@@ -670,14 +840,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
670 * __cpu = smp_processor_id(); 840 * __cpu = smp_processor_id();
671 * 841 *
672 * if (in_nmi()) 842 * if (in_nmi())
673 * raw_data = rcu_dereference(trace_profile_buf_nmi); 843 * trace_buf = rcu_dereference(perf_trace_buf_nmi);
674 * else 844 * else
675 * raw_data = rcu_dereference(trace_profile_buf); 845 * trace_buf = rcu_dereference(perf_trace_buf);
676 * 846 *
677 * if (!raw_data) 847 * if (!trace_buf)
678 * goto end; 848 * goto end;
679 * 849 *
680 * raw_data = per_cpu_ptr(raw_data, __cpu); 850 * trace_buf = per_cpu_ptr(trace_buf, __cpu);
851 *
852 * // Avoid recursion from perf that could mess up the buffer
853 * if (trace_buf->recursion++)
854 * goto end_recursion;
855 *
856 * raw_data = trace_buf->buf;
857 *
858 * // Make recursion update visible before entering perf_tp_event
859 * // so that we protect from perf recursions.
860 *
861 * barrier();
681 * 862 *
682 * //zero dead bytes from alignment to avoid stack leak to userspace: 863 * //zero dead bytes from alignment to avoid stack leak to userspace:
683 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 864 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -704,21 +885,26 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
704#undef __perf_count 885#undef __perf_count
705#define __perf_count(c) __count = (c) 886#define __perf_count(c) __count = (c)
706 887
707#undef TRACE_EVENT 888#undef DECLARE_EVENT_CLASS
708#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 889#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
709static void ftrace_profile_##call(proto) \ 890static void \
891ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \
892 proto) \
710{ \ 893{ \
711 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 894 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
712 struct ftrace_event_call *event_call = &event_##call; \ 895 extern int perf_swevent_get_recursion_context(void); \
713 extern void perf_tp_event(int, u64, u64, void *, int); \ 896 extern void perf_swevent_put_recursion_context(int rctx); \
897 extern void perf_tp_event(int, u64, u64, void *, int); \
714 struct ftrace_raw_##call *entry; \ 898 struct ftrace_raw_##call *entry; \
715 u64 __addr = 0, __count = 1; \ 899 u64 __addr = 0, __count = 1; \
716 unsigned long irq_flags; \ 900 unsigned long irq_flags; \
717 struct trace_entry *ent; \ 901 struct trace_entry *ent; \
718 int __entry_size; \ 902 int __entry_size; \
719 int __data_size; \ 903 int __data_size; \
904 char *trace_buf; \
720 char *raw_data; \ 905 char *raw_data; \
721 int __cpu; \ 906 int __cpu; \
907 int rctx; \
722 int pc; \ 908 int pc; \
723 \ 909 \
724 pc = preempt_count(); \ 910 pc = preempt_count(); \
@@ -733,17 +919,22 @@ static void ftrace_profile_##call(proto) \
733 return; \ 919 return; \
734 \ 920 \
735 local_irq_save(irq_flags); \ 921 local_irq_save(irq_flags); \
922 \
923 rctx = perf_swevent_get_recursion_context(); \
924 if (rctx < 0) \
925 goto end_recursion; \
926 \
736 __cpu = smp_processor_id(); \ 927 __cpu = smp_processor_id(); \
737 \ 928 \
738 if (in_nmi()) \ 929 if (in_nmi()) \
739 raw_data = rcu_dereference(trace_profile_buf_nmi); \ 930 trace_buf = rcu_dereference(perf_trace_buf_nmi); \
740 else \ 931 else \
741 raw_data = rcu_dereference(trace_profile_buf); \ 932 trace_buf = rcu_dereference(perf_trace_buf); \
742 \ 933 \
743 if (!raw_data) \ 934 if (!trace_buf) \
744 goto end; \ 935 goto end; \
745 \ 936 \
746 raw_data = per_cpu_ptr(raw_data, __cpu); \ 937 raw_data = per_cpu_ptr(trace_buf, __cpu); \
747 \ 938 \
748 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 939 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
749 entry = (struct ftrace_raw_##call *)raw_data; \ 940 entry = (struct ftrace_raw_##call *)raw_data; \
@@ -759,10 +950,25 @@ static void ftrace_profile_##call(proto) \
759 __entry_size); \ 950 __entry_size); \
760 \ 951 \
761end: \ 952end: \
953 perf_swevent_put_recursion_context(rctx); \
954end_recursion: \
762 local_irq_restore(irq_flags); \ 955 local_irq_restore(irq_flags); \
763 \ 956 \
764} 957}
765 958
959#undef DEFINE_EVENT
960#define DEFINE_EVENT(template, call, proto, args) \
961static void ftrace_profile_##call(proto) \
962{ \
963 struct ftrace_event_call *event_call = &event_##call; \
964 \
965 ftrace_profile_templ_##template(event_call, args); \
966}
967
968#undef DEFINE_EVENT_PRINT
969#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
970 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
971
766#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 972#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
767#endif /* CONFIG_EVENT_PROFILE */ 973#endif /* CONFIG_EVENT_PROFILE */
768 974
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index e972f0a40f8d..961fda3556bb 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -12,21 +12,19 @@
12 * A syscall entry in the ftrace syscalls array. 12 * A syscall entry in the ftrace syscalls array.
13 * 13 *
14 * @name: name of the syscall 14 * @name: name of the syscall
15 * @syscall_nr: number of the syscall
15 * @nb_args: number of parameters it takes 16 * @nb_args: number of parameters it takes
16 * @types: list of types as strings 17 * @types: list of types as strings
17 * @args: list of args as strings (args[i] matches types[i]) 18 * @args: list of args as strings (args[i] matches types[i])
18 * @enter_id: associated ftrace enter event id
19 * @exit_id: associated ftrace exit event id
20 * @enter_event: associated syscall_enter trace event 19 * @enter_event: associated syscall_enter trace event
21 * @exit_event: associated syscall_exit trace event 20 * @exit_event: associated syscall_exit trace event
22 */ 21 */
23struct syscall_metadata { 22struct syscall_metadata {
24 const char *name; 23 const char *name;
24 int syscall_nr;
25 int nb_args; 25 int nb_args;
26 const char **types; 26 const char **types;
27 const char **args; 27 const char **args;
28 int enter_id;
29 int exit_id;
30 28
31 struct ftrace_event_call *enter_event; 29 struct ftrace_event_call *enter_event;
32 struct ftrace_event_call *exit_event; 30 struct ftrace_event_call *exit_event;
@@ -34,29 +32,28 @@ struct syscall_metadata {
34 32
35#ifdef CONFIG_FTRACE_SYSCALLS 33#ifdef CONFIG_FTRACE_SYSCALLS
36extern unsigned long arch_syscall_addr(int nr); 34extern unsigned long arch_syscall_addr(int nr);
37extern int syscall_name_to_nr(char *name); 35extern int init_syscall_trace(struct ftrace_event_call *call);
38void set_syscall_enter_id(int num, int id); 36
39void set_syscall_exit_id(int num, int id);
40extern struct trace_event event_syscall_enter;
41extern struct trace_event event_syscall_exit;
42extern int reg_event_syscall_enter(void *ptr);
43extern void unreg_event_syscall_enter(void *ptr);
44extern int reg_event_syscall_exit(void *ptr);
45extern void unreg_event_syscall_exit(void *ptr);
46extern int syscall_enter_format(struct ftrace_event_call *call, 37extern int syscall_enter_format(struct ftrace_event_call *call,
47 struct trace_seq *s); 38 struct trace_seq *s);
48extern int syscall_exit_format(struct ftrace_event_call *call, 39extern int syscall_exit_format(struct ftrace_event_call *call,
49 struct trace_seq *s); 40 struct trace_seq *s);
50extern int syscall_enter_define_fields(struct ftrace_event_call *call); 41extern int syscall_enter_define_fields(struct ftrace_event_call *call);
51extern int syscall_exit_define_fields(struct ftrace_event_call *call); 42extern int syscall_exit_define_fields(struct ftrace_event_call *call);
43extern int reg_event_syscall_enter(struct ftrace_event_call *call);
44extern void unreg_event_syscall_enter(struct ftrace_event_call *call);
45extern int reg_event_syscall_exit(struct ftrace_event_call *call);
46extern void unreg_event_syscall_exit(struct ftrace_event_call *call);
47extern int
48ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
52enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); 49enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
53enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); 50enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
54#endif 51#endif
55#ifdef CONFIG_EVENT_PROFILE 52#ifdef CONFIG_EVENT_PROFILE
56int reg_prof_syscall_enter(char *name); 53int prof_sysenter_enable(struct ftrace_event_call *call);
57void unreg_prof_syscall_enter(char *name); 54void prof_sysenter_disable(struct ftrace_event_call *call);
58int reg_prof_syscall_exit(char *name); 55int prof_sysexit_enable(struct ftrace_event_call *call);
59void unreg_prof_syscall_exit(char *name); 56void prof_sysexit_disable(struct ftrace_event_call *call);
60 57
61#endif 58#endif
62 59
diff --git a/kernel/Makefile b/kernel/Makefile
index dcf6789bf547..982c50e2ce53 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg 21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg 22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg 23CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_perf_event.o = -pg
24endif 25endif
25 26
26obj-$(CONFIG_FREEZER) += freezer.o 27obj-$(CONFIG_FREEZER) += freezer.o
@@ -97,6 +98,7 @@ obj-$(CONFIG_SMP) += sched_cpupri.o
97obj-$(CONFIG_SLOW_WORK) += slow-work.o 98obj-$(CONFIG_SLOW_WORK) += slow-work.o
98obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o 99obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
99obj-$(CONFIG_PERF_EVENTS) += perf_event.o 100obj-$(CONFIG_PERF_EVENTS) += perf_event.o
101obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
100 102
101ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 103ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
102# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 104# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..3f45e3cf931d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code)
978 proc_exit_connector(tsk); 979 proc_exit_connector(tsk);
979 980
980 /* 981 /*
982 * FIXME: do that only when needed, using sched_exit tracepoint
983 */
984 flush_ptrace_hw_breakpoint(tsk);
985 /*
981 * Flush inherited counters to the parent - before the parent 986 * Flush inherited counters to the parent - before the parent
982 * gets woken up by child-exit notifications. 987 * gets woken up by child-exit notifications.
983 */ 988 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..cf5ee1628411
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,423 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
19 *
20 * Thanks to Ingo Molnar for his many suggestions.
21 *
22 * Authors: Alan Stern <stern@rowland.harvard.edu>
23 * K.Prasad <prasad@linux.vnet.ibm.com>
24 * Frederic Weisbecker <fweisbec@gmail.com>
25 */
26
27/*
28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
29 * using the CPU's debug registers.
30 * This file contains the arch-independent routines.
31 */
32
33#include <linux/irqflags.h>
34#include <linux/kallsyms.h>
35#include <linux/notifier.h>
36#include <linux/kprobes.h>
37#include <linux/kdebug.h>
38#include <linux/kernel.h>
39#include <linux/module.h>
40#include <linux/percpu.h>
41#include <linux/sched.h>
42#include <linux/init.h>
43#include <linux/smp.h>
44
45#include <linux/hw_breakpoint.h>
46
47/*
48 * Constraints data
49 */
50
51/* Number of pinned cpu breakpoints in a cpu */
52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
53
54/* Number of pinned task breakpoints in a cpu */
55static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
56
57/* Number of non-pinned cpu/task breakpoints in a cpu */
58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
59
60/* Gather the number of total pinned and un-pinned bp in a cpuset */
61struct bp_busy_slots {
62 unsigned int pinned;
63 unsigned int flexible;
64};
65
66/* Serialize accesses to the above constraints */
67static DEFINE_MUTEX(nr_bp_mutex);
68
69/*
70 * Report the maximum number of pinned breakpoints a task
71 * have in this cpu
72 */
73static unsigned int max_task_bp_pinned(int cpu)
74{
75 int i;
76 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
77
78 for (i = HBP_NUM -1; i >= 0; i--) {
79 if (tsk_pinned[i] > 0)
80 return i + 1;
81 }
82
83 return 0;
84}
85
86/*
87 * Report the number of pinned/un-pinned breakpoints we have in
88 * a given cpu (cpu > -1) or in all of them (cpu = -1).
89 */
90static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
91{
92 if (cpu >= 0) {
93 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
94 slots->pinned += max_task_bp_pinned(cpu);
95 slots->flexible = per_cpu(nr_bp_flexible, cpu);
96
97 return;
98 }
99
100 for_each_online_cpu(cpu) {
101 unsigned int nr;
102
103 nr = per_cpu(nr_cpu_bp_pinned, cpu);
104 nr += max_task_bp_pinned(cpu);
105
106 if (nr > slots->pinned)
107 slots->pinned = nr;
108
109 nr = per_cpu(nr_bp_flexible, cpu);
110
111 if (nr > slots->flexible)
112 slots->flexible = nr;
113 }
114}
115
116/*
117 * Add a pinned breakpoint for the given task in our constraint table
118 */
119static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
120{
121 int count = 0;
122 struct perf_event *bp;
123 struct perf_event_context *ctx = tsk->perf_event_ctxp;
124 unsigned int *tsk_pinned;
125 struct list_head *list;
126 unsigned long flags;
127
128 if (WARN_ONCE(!ctx, "No perf context for this task"))
129 return;
130
131 list = &ctx->event_list;
132
133 spin_lock_irqsave(&ctx->lock, flags);
134
135 /*
136 * The current breakpoint counter is not included in the list
137 * at the open() callback time
138 */
139 list_for_each_entry(bp, list, event_entry) {
140 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
141 count++;
142 }
143
144 spin_unlock_irqrestore(&ctx->lock, flags);
145
146 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
147 return;
148
149 tsk_pinned = per_cpu(task_bp_pinned, cpu);
150 if (enable) {
151 tsk_pinned[count]++;
152 if (count > 0)
153 tsk_pinned[count-1]--;
154 } else {
155 tsk_pinned[count]--;
156 if (count > 0)
157 tsk_pinned[count-1]++;
158 }
159}
160
161/*
162 * Add/remove the given breakpoint in our constraint table
163 */
164static void toggle_bp_slot(struct perf_event *bp, bool enable)
165{
166 int cpu = bp->cpu;
167 struct task_struct *tsk = bp->ctx->task;
168
169 /* Pinned counter task profiling */
170 if (tsk) {
171 if (cpu >= 0) {
172 toggle_bp_task_slot(tsk, cpu, enable);
173 return;
174 }
175
176 for_each_online_cpu(cpu)
177 toggle_bp_task_slot(tsk, cpu, enable);
178 return;
179 }
180
181 /* Pinned counter cpu profiling */
182 if (enable)
183 per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
184 else
185 per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
186}
187
188/*
189 * Contraints to check before allowing this new breakpoint counter:
190 *
191 * == Non-pinned counter == (Considered as pinned for now)
192 *
193 * - If attached to a single cpu, check:
194 *
195 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
196 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
197 *
198 * -> If there are already non-pinned counters in this cpu, it means
199 * there is already a free slot for them.
200 * Otherwise, we check that the maximum number of per task
201 * breakpoints (for this cpu) plus the number of per cpu breakpoint
202 * (for this cpu) doesn't cover every registers.
203 *
204 * - If attached to every cpus, check:
205 *
206 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
207 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
208 *
209 * -> This is roughly the same, except we check the number of per cpu
210 * bp for every cpu and we keep the max one. Same for the per tasks
211 * breakpoints.
212 *
213 *
214 * == Pinned counter ==
215 *
216 * - If attached to a single cpu, check:
217 *
218 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
219 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
220 *
221 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
222 * one register at least (or they will never be fed).
223 *
224 * - If attached to every cpus, check:
225 *
226 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
227 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
228 */
229int reserve_bp_slot(struct perf_event *bp)
230{
231 struct bp_busy_slots slots = {0};
232 int ret = 0;
233
234 mutex_lock(&nr_bp_mutex);
235
236 fetch_bp_busy_slots(&slots, bp->cpu);
237
238 /* Flexible counters need to keep at least one slot */
239 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
240 ret = -ENOSPC;
241 goto end;
242 }
243
244 toggle_bp_slot(bp, true);
245
246end:
247 mutex_unlock(&nr_bp_mutex);
248
249 return ret;
250}
251
252void release_bp_slot(struct perf_event *bp)
253{
254 mutex_lock(&nr_bp_mutex);
255
256 toggle_bp_slot(bp, false);
257
258 mutex_unlock(&nr_bp_mutex);
259}
260
261
262int __register_perf_hw_breakpoint(struct perf_event *bp)
263{
264 int ret;
265
266 ret = reserve_bp_slot(bp);
267 if (ret)
268 return ret;
269
270 /*
271 * Ptrace breakpoints can be temporary perf events only
272 * meant to reserve a slot. In this case, it is created disabled and
273 * we don't want to check the params right now (as we put a null addr)
274 * But perf tools create events as disabled and we want to check
275 * the params for them.
276 * This is a quick hack that will be removed soon, once we remove
277 * the tmp breakpoints from ptrace
278 */
279 if (!bp->attr.disabled || bp->callback == perf_bp_event)
280 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
281
282 return ret;
283}
284
285int register_perf_hw_breakpoint(struct perf_event *bp)
286{
287 bp->callback = perf_bp_event;
288
289 return __register_perf_hw_breakpoint(bp);
290}
291
292/**
293 * register_user_hw_breakpoint - register a hardware breakpoint for user space
294 * @attr: breakpoint attributes
295 * @triggered: callback to trigger when we hit the breakpoint
296 * @tsk: pointer to 'task_struct' of the process to which the address belongs
297 */
298struct perf_event *
299register_user_hw_breakpoint(struct perf_event_attr *attr,
300 perf_callback_t triggered,
301 struct task_struct *tsk)
302{
303 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
304}
305EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
306
307/**
308 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
309 * @bp: the breakpoint structure to modify
310 * @attr: new breakpoint attributes
311 * @triggered: callback to trigger when we hit the breakpoint
312 * @tsk: pointer to 'task_struct' of the process to which the address belongs
313 */
314struct perf_event *
315modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
316 perf_callback_t triggered,
317 struct task_struct *tsk)
318{
319 /*
320 * FIXME: do it without unregistering
321 * - We don't want to lose our slot
322 * - If the new bp is incorrect, don't lose the older one
323 */
324 unregister_hw_breakpoint(bp);
325
326 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
327}
328EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
329
330/**
331 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
332 * @bp: the breakpoint structure to unregister
333 */
334void unregister_hw_breakpoint(struct perf_event *bp)
335{
336 if (!bp)
337 return;
338 perf_event_release_kernel(bp);
339}
340EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
341
342/**
343 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
344 * @attr: breakpoint attributes
345 * @triggered: callback to trigger when we hit the breakpoint
346 *
347 * @return a set of per_cpu pointers to perf events
348 */
349struct perf_event **
350register_wide_hw_breakpoint(struct perf_event_attr *attr,
351 perf_callback_t triggered)
352{
353 struct perf_event **cpu_events, **pevent, *bp;
354 long err;
355 int cpu;
356
357 cpu_events = alloc_percpu(typeof(*cpu_events));
358 if (!cpu_events)
359 return ERR_PTR(-ENOMEM);
360
361 for_each_possible_cpu(cpu) {
362 pevent = per_cpu_ptr(cpu_events, cpu);
363 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
364
365 *pevent = bp;
366
367 if (IS_ERR(bp)) {
368 err = PTR_ERR(bp);
369 goto fail;
370 }
371 }
372
373 return cpu_events;
374
375fail:
376 for_each_possible_cpu(cpu) {
377 pevent = per_cpu_ptr(cpu_events, cpu);
378 if (IS_ERR(*pevent))
379 break;
380 unregister_hw_breakpoint(*pevent);
381 }
382 free_percpu(cpu_events);
383 /* return the error if any */
384 return ERR_PTR(err);
385}
386EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
387
388/**
389 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
390 * @cpu_events: the per cpu set of events to unregister
391 */
392void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
393{
394 int cpu;
395 struct perf_event **pevent;
396
397 for_each_possible_cpu(cpu) {
398 pevent = per_cpu_ptr(cpu_events, cpu);
399 unregister_hw_breakpoint(*pevent);
400 }
401 free_percpu(cpu_events);
402}
403EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
404
405static struct notifier_block hw_breakpoint_exceptions_nb = {
406 .notifier_call = hw_breakpoint_exceptions_notify,
407 /* we need to be notified first */
408 .priority = 0x7fffffff
409};
410
411static int __init init_hw_breakpoint(void)
412{
413 return register_die_notifier(&hw_breakpoint_exceptions_nb);
414}
415core_initcall(init_hw_breakpoint);
416
417
418struct pmu perf_ops_bp = {
419 .enable = arch_install_hw_breakpoint,
420 .disable = arch_uninstall_hw_breakpoint,
421 .read = hw_breakpoint_pmu_read,
422 .unthrottle = hw_breakpoint_pmu_unthrottle
423};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b697c68..8e5288a8a355 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
181 } 181 }
182 return module_kallsyms_lookup_name(name); 182 return module_kallsyms_lookup_name(name);
183} 183}
184EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
184 185
185int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, 186int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
186 unsigned long), 187 unsigned long),
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1494e85b35f2..e5342a344c43 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
90 */ 90 */
91static struct kprobe_blackpoint kprobe_blacklist[] = { 91static struct kprobe_blackpoint kprobe_blacklist[] = {
92 {"preempt_schedule",}, 92 {"preempt_schedule",},
93 {"native_get_debugreg",},
94 {"irq_entries_start",},
95 {"common_interrupt",},
93 {NULL} /* Terminator */ 96 {NULL} /* Terminator */
94}; 97};
95 98
@@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
673 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 676 return (kprobe_opcode_t *)(((char *)addr) + p->offset);
674} 677}
675 678
679/* Check passed kprobe is valid and return kprobe in kprobe_table. */
680static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
681{
682 struct kprobe *old_p, *list_p;
683
684 old_p = get_kprobe(p->addr);
685 if (unlikely(!old_p))
686 return NULL;
687
688 if (p != old_p) {
689 list_for_each_entry_rcu(list_p, &old_p->list, list)
690 if (list_p == p)
691 /* kprobe p is a valid probe */
692 goto valid;
693 return NULL;
694 }
695valid:
696 return old_p;
697}
698
699/* Return error if the kprobe is being re-registered */
700static inline int check_kprobe_rereg(struct kprobe *p)
701{
702 int ret = 0;
703 struct kprobe *old_p;
704
705 mutex_lock(&kprobe_mutex);
706 old_p = __get_valid_kprobe(p);
707 if (old_p)
708 ret = -EINVAL;
709 mutex_unlock(&kprobe_mutex);
710 return ret;
711}
712
676int __kprobes register_kprobe(struct kprobe *p) 713int __kprobes register_kprobe(struct kprobe *p)
677{ 714{
678 int ret = 0; 715 int ret = 0;
@@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p)
685 return -EINVAL; 722 return -EINVAL;
686 p->addr = addr; 723 p->addr = addr;
687 724
725 ret = check_kprobe_rereg(p);
726 if (ret)
727 return ret;
728
688 preempt_disable(); 729 preempt_disable();
689 if (!kernel_text_address((unsigned long) p->addr) || 730 if (!kernel_text_address((unsigned long) p->addr) ||
690 in_kprobes_functions((unsigned long) p->addr)) { 731 in_kprobes_functions((unsigned long) p->addr)) {
@@ -754,26 +795,6 @@ out:
754} 795}
755EXPORT_SYMBOL_GPL(register_kprobe); 796EXPORT_SYMBOL_GPL(register_kprobe);
756 797
757/* Check passed kprobe is valid and return kprobe in kprobe_table. */
758static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
759{
760 struct kprobe *old_p, *list_p;
761
762 old_p = get_kprobe(p->addr);
763 if (unlikely(!old_p))
764 return NULL;
765
766 if (p != old_p) {
767 list_for_each_entry_rcu(list_p, &old_p->list, list)
768 if (list_p == p)
769 /* kprobe p is a valid probe */
770 goto valid;
771 return NULL;
772 }
773valid:
774 return old_p;
775}
776
777/* 798/*
778 * Unregister a kprobe without a scheduler synchronization. 799 * Unregister a kprobe without a scheduler synchronization.
779 */ 800 */
@@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1141 arch_remove_kprobe(p); 1162 arch_remove_kprobe(p);
1142} 1163}
1143 1164
1165void __kprobes dump_kprobe(struct kprobe *kp)
1166{
1167 printk(KERN_WARNING "Dumping kprobe:\n");
1168 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
1169 kp->symbol_name, kp->addr, kp->offset);
1170}
1171
1144/* Module notifier call back, checking kprobes on the module */ 1172/* Module notifier call back, checking kprobes on the module */
1145static int __kprobes kprobes_module_callback(struct notifier_block *nb, 1173static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1146 unsigned long val, void *data) 1174 unsigned long val, void *data)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9af56723c096..f5dcd36d3151 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -49,7 +49,7 @@
49#include "lockdep_internals.h" 49#include "lockdep_internals.h"
50 50
51#define CREATE_TRACE_POINTS 51#define CREATE_TRACE_POINTS
52#include <trace/events/lockdep.h> 52#include <trace/events/lock.h>
53 53
54#ifdef CONFIG_PROVE_LOCKING 54#ifdef CONFIG_PROVE_LOCKING
55int prove_locking = 1; 55int prove_locking = 1;
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 61d5aa5eced3..acd24e7643eb 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
558 558
559static ATOMIC_NOTIFIER_HEAD(die_chain); 559static ATOMIC_NOTIFIER_HEAD(die_chain);
560 560
561int notrace notify_die(enum die_val val, const char *str, 561int notrace __kprobes notify_die(enum die_val val, const char *str,
562 struct pt_regs *regs, long err, int trap, int sig) 562 struct pt_regs *regs, long err, int trap, int sig)
563{ 563{
564 struct die_args args = { 564 struct die_args args = {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..6b7ddba1dd64 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,8 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
@@ -244,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx)
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -445,50 +502,11 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 }
451 spin_unlock_irq(&ctx->lock); 507 spin_unlock_irq(&ctx->lock);
452} 508}
453 509
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490}
491
492/* 510/*
493 * Update total_time_enabled and total_time_running for all events in a group. 511 * Update total_time_enabled and total_time_running for all events in a group.
494 */ 512 */
@@ -1031,10 +1049,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1031 update_context_time(ctx); 1049 update_context_time(ctx);
1032 1050
1033 perf_disable(); 1051 perf_disable();
1034 if (ctx->nr_active) 1052 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1053 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1054 group_sched_out(event, cpuctx, ctx);
1037 1055 }
1038 perf_enable(); 1056 perf_enable();
1039 out: 1057 out:
1040 spin_unlock(&ctx->lock); 1058 spin_unlock(&ctx->lock);
@@ -1059,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1077 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1078}
1061 1079
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1080static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1081 struct perf_event *next_event)
1066{ 1082{
@@ -1078,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1094 */
1079 switch (event->state) { 1095 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1096 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1097 event->pmu->read(event);
1082 break; 1098 /* fall-through */
1083 1099
1084 case PERF_EVENT_STATE_INACTIVE: 1100 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1101 update_event_times(event);
@@ -1118,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1134 if (!ctx->nr_stat)
1119 return; 1135 return;
1120 1136
1137 update_context_time(ctx);
1138
1121 event = list_first_entry(&ctx->event_list, 1139 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1140 struct perf_event, event_entry);
1123 1141
@@ -1161,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1179 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1180 return;
1163 1181
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1182 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1183 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1184 next_ctx = next->perf_event_ctxp;
@@ -1515,7 +1531,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1531 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1532 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1533 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1534
1520 /* 1535 /*
1521 * If this is a task context, we need to check whether it is 1536 * If this is a task context, we need to check whether it is
@@ -1527,12 +1542,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1542 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1543 return;
1529 1544
1530 local_irq_save(flags); 1545 spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1546 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1547 update_event_times(event);
1535 local_irq_restore(flags); 1548 spin_unlock(&ctx->lock);
1549
1550 event->pmu->read(event);
1536} 1551}
1537 1552
1538static u64 perf_event_read(struct perf_event *event) 1553static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1560 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1561 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1562 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1563 struct perf_event_context *ctx = event->ctx;
1564 unsigned long flags;
1565
1566 spin_lock_irqsave(&ctx->lock, flags);
1567 update_context_time(ctx);
1548 update_event_times(event); 1568 update_event_times(event);
1569 spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1570 }
1550 1571
1551 return atomic64_read(&event->count); 1572 return atomic64_read(&event->count);
@@ -1658,6 +1679,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1679 return ERR_PTR(err);
1659} 1680}
1660 1681
1682static void perf_event_free_filter(struct perf_event *event);
1683
1661static void free_event_rcu(struct rcu_head *head) 1684static void free_event_rcu(struct rcu_head *head)
1662{ 1685{
1663 struct perf_event *event; 1686 struct perf_event *event;
@@ -1665,6 +1688,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1688 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1689 if (event->ns)
1667 put_pid_ns(event->ns); 1690 put_pid_ns(event->ns);
1691 perf_event_free_filter(event);
1668 kfree(event); 1692 kfree(event);
1669} 1693}
1670 1694
@@ -1696,16 +1720,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1720 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1721}
1698 1722
1699/* 1723int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1724{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1725 struct perf_event_context *ctx = event->ctx;
1706 1726
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1727 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1728 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1729 perf_event_remove_from_context(event);
@@ -1720,6 +1738,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1738
1721 return 0; 1739 return 0;
1722} 1740}
1741EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1742
1743/*
1744 * Called when the last reference to the file is gone.
1745 */
1746static int perf_release(struct inode *inode, struct file *file)
1747{
1748 struct perf_event *event = file->private_data;
1749
1750 file->private_data = NULL;
1751
1752 return perf_event_release_kernel(event);
1753}
1723 1754
1724static int perf_event_read_size(struct perf_event *event) 1755static int perf_event_read_size(struct perf_event *event)
1725{ 1756{
@@ -1746,91 +1777,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1777 return size;
1747} 1778}
1748 1779
1749static u64 perf_event_read_value(struct perf_event *event) 1780u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1781{
1751 struct perf_event *child; 1782 struct perf_event *child;
1752 u64 total = 0; 1783 u64 total = 0;
1753 1784
1785 *enabled = 0;
1786 *running = 0;
1787
1788 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1789 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1790 *enabled += event->total_time_enabled +
1791 atomic64_read(&event->child_total_time_enabled);
1792 *running += event->total_time_running +
1793 atomic64_read(&event->child_total_time_running);
1794
1795 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1796 total += perf_event_read(child);
1797 *enabled += child->total_time_enabled;
1798 *running += child->total_time_running;
1799 }
1800 mutex_unlock(&event->child_mutex);
1757 1801
1758 return total; 1802 return total;
1759} 1803}
1760 1804EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1805
1779static int perf_event_read_group(struct perf_event *event, 1806static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1807 u64 read_format, char __user *buf)
1781{ 1808{
1782 struct perf_event *leader = event->group_leader, *sub; 1809 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1810 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1811 struct perf_event_context *ctx = leader->ctx;
1812 u64 values[5];
1813 u64 count, enabled, running;
1814
1815 mutex_lock(&ctx->mutex);
1816 count = perf_event_read_value(leader, &enabled, &running);
1785 1817
1786 values[n++] = 1 + leader->nr_siblings; 1818 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1819 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1820 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1821 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1822 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1823 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1824 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1825 values[n++] = primary_event_id(leader);
1794 }
1795 1826
1796 size = n * sizeof(u64); 1827 size = n * sizeof(u64);
1797 1828
1798 if (copy_to_user(buf, values, size)) 1829 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1830 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1831
1805 size += err; 1832 ret = size;
1806 1833
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1834 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1835 n = 0;
1809 buf + size); 1836
1810 if (err < 0) 1837 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1838 if (read_format & PERF_FORMAT_ID)
1839 values[n++] = primary_event_id(sub);
1840
1841 size = n * sizeof(u64);
1812 1842
1813 size += err; 1843 if (copy_to_user(buf + ret, values, size)) {
1844 ret = -EFAULT;
1845 goto unlock;
1846 }
1847
1848 ret += size;
1814 } 1849 }
1850unlock:
1851 mutex_unlock(&ctx->mutex);
1815 1852
1816 return size; 1853 return ret;
1817} 1854}
1818 1855
1819static int perf_event_read_one(struct perf_event *event, 1856static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1857 u64 read_format, char __user *buf)
1821{ 1858{
1859 u64 enabled, running;
1822 u64 values[4]; 1860 u64 values[4];
1823 int n = 0; 1861 int n = 0;
1824 1862
1825 values[n++] = perf_event_read_value(event); 1863 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1864 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1865 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1866 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1867 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1868 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1869 values[n++] = primary_event_id(event);
1836 1870
@@ -1861,12 +1895,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1895 return -ENOSPC;
1862 1896
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1897 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1898 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1899 ret = perf_event_read_group(event, read_format, buf);
1867 else 1900 else
1868 ret = perf_event_read_one(event, read_format, buf); 1901 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1902
1871 return ret; 1903 return ret;
1872} 1904}
@@ -1974,7 +2006,8 @@ unlock:
1974 return ret; 2006 return ret;
1975} 2007}
1976 2008
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2009static int perf_event_set_output(struct perf_event *event, int output_fd);
2010static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2011
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2012static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2013{
@@ -2002,6 +2035,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2035 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2036 return perf_event_set_output(event, arg);
2004 2037
2038 case PERF_EVENT_IOC_SET_FILTER:
2039 return perf_event_set_filter(event, (void __user *)arg);
2040
2005 default: 2041 default:
2006 return -ENOTTY; 2042 return -ENOTTY;
2007 } 2043 }
@@ -2174,6 +2210,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2210 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2211 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2212 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2213 kfree(data);
2177} 2214}
2178 2215
2179#else 2216#else
@@ -2214,6 +2251,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2251 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2252
2216 vfree(base); 2253 vfree(base);
2254 kfree(data);
2217} 2255}
2218 2256
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2257static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2345,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2345 }
2308 2346
2309 if (!data->watermark) 2347 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2348 data->watermark = max_size / 2;
2311 2349
2312 2350
2313 rcu_assign_pointer(event->data, data); 2351 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2357,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2357
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2358 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2359 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2360}
2324 2361
2325static void perf_mmap_data_release(struct perf_event *event) 2362static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2703,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2703static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2704{
2668 struct perf_mmap_data *data = handle->data; 2705 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2706 int cur, cpu = get_cpu();
2670 2707
2671 handle->locked = 0; 2708 handle->locked = 0;
2672 2709
2673 local_irq_save(handle->flags); 2710 for (;;) {
2674 cpu = smp_processor_id(); 2711 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2712 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2713 handle->locked = 1;
2677 return; 2714 break;
2715 }
2716 if (cur == cpu)
2717 break;
2678 2718
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2719 cpu_relax();
2681 2720 }
2682 handle->locked = 1;
2683} 2721}
2684 2722
2685static void perf_output_unlock(struct perf_output_handle *handle) 2723static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2763,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2763 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2764 perf_output_wakeup(handle);
2727out: 2765out:
2728 local_irq_restore(handle->flags); 2766 put_cpu();
2729} 2767}
2730 2768
2731void perf_output_copy(struct perf_output_handle *handle, 2769void perf_output_copy(struct perf_output_handle *handle,
@@ -3236,15 +3274,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3274{
3237 struct perf_event *event; 3275 struct perf_event *event;
3238 3276
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3277 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3278 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3279 perf_event_task_output(event, task_event);
3246 } 3280 }
3247 rcu_read_unlock();
3248} 3281}
3249 3282
3250static void perf_event_task_event(struct perf_task_event *task_event) 3283static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,11 +3285,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3285 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3286 struct perf_event_context *ctx = task_event->task_ctx;
3254 3287
3288 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3289 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3290 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context); 3291 put_cpu_var(perf_cpu_context);
3258 3292
3259 rcu_read_lock();
3260 if (!ctx) 3293 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3294 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3295 if (ctx)
@@ -3348,15 +3381,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3381{
3349 struct perf_event *event; 3382 struct perf_event *event;
3350 3383
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3384 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3385 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3386 perf_event_comm_output(event, comm_event);
3358 } 3387 }
3359 rcu_read_unlock();
3360} 3388}
3361 3389
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3390static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3395,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3395 char comm[TASK_COMM_LEN];
3368 3396
3369 memset(comm, 0, sizeof(comm)); 3397 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3398 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3399 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3400
3373 comm_event->comm = comm; 3401 comm_event->comm = comm;
@@ -3375,11 +3403,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3403
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3404 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3405
3406 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3407 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3408 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context); 3409 put_cpu_var(perf_cpu_context);
3381 3410
3382 rcu_read_lock();
3383 /* 3411 /*
3384 * doesn't really matter which of the child contexts the 3412 * doesn't really matter which of the child contexts the
3385 * events ends up in. 3413 * events ends up in.
@@ -3472,15 +3500,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3500{
3473 struct perf_event *event; 3501 struct perf_event *event;
3474 3502
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3503 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3504 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3505 perf_event_mmap_output(event, mmap_event);
3482 } 3506 }
3483 rcu_read_unlock();
3484} 3507}
3485 3508
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3509static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,11 +3559,11 @@ got_name:
3536 3559
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3560 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3561
3562 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3563 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3564 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context); 3565 put_cpu_var(perf_cpu_context);
3542 3566
3543 rcu_read_lock();
3544 /* 3567 /*
3545 * doesn't really matter which of the child contexts the 3568 * doesn't really matter which of the child contexts the
3546 * events ends up in. 3569 * events ends up in.
@@ -3679,7 +3702,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3702 perf_event_disable(event);
3680 } 3703 }
3681 3704
3682 perf_event_output(event, nmi, data, regs); 3705 if (event->overflow_handler)
3706 event->overflow_handler(event, nmi, data, regs);
3707 else
3708 perf_event_output(event, nmi, data, regs);
3709
3683 return ret; 3710 return ret;
3684} 3711}
3685 3712
@@ -3724,16 +3751,16 @@ again:
3724 return nr; 3751 return nr;
3725} 3752}
3726 3753
3727static void perf_swevent_overflow(struct perf_event *event, 3754static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3755 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3756 struct pt_regs *regs)
3730{ 3757{
3731 struct hw_perf_event *hwc = &event->hw; 3758 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3759 int throttle = 0;
3733 u64 overflow;
3734 3760
3735 data->period = event->hw.last_period; 3761 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3762 if (!overflow)
3763 overflow = perf_swevent_set_period(event);
3737 3764
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3765 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3766 return;
@@ -3766,14 +3793,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3793
3767 atomic64_add(nr, &event->count); 3794 atomic64_add(nr, &event->count);
3768 3795
3796 if (!regs)
3797 return;
3798
3769 if (!hwc->sample_period) 3799 if (!hwc->sample_period)
3770 return; 3800 return;
3771 3801
3772 if (!regs) 3802 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3803 return perf_swevent_overflow(event, 1, nmi, data, regs);
3804
3805 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3806 return;
3774 3807
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3808 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3809}
3778 3810
3779static int perf_swevent_is_counting(struct perf_event *event) 3811static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3838,44 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3838 return 1;
3807} 3839}
3808 3840
3841static int perf_tp_event_match(struct perf_event *event,
3842 struct perf_sample_data *data);
3843
3844static int perf_exclude_event(struct perf_event *event,
3845 struct pt_regs *regs)
3846{
3847 if (regs) {
3848 if (event->attr.exclude_user && user_mode(regs))
3849 return 1;
3850
3851 if (event->attr.exclude_kernel && !user_mode(regs))
3852 return 1;
3853 }
3854
3855 return 0;
3856}
3857
3809static int perf_swevent_match(struct perf_event *event, 3858static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3859 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3860 u32 event_id,
3861 struct perf_sample_data *data,
3862 struct pt_regs *regs)
3812{ 3863{
3813 if (!perf_swevent_is_counting(event)) 3864 if (!perf_swevent_is_counting(event))
3814 return 0; 3865 return 0;
3815 3866
3816 if (event->attr.type != type) 3867 if (event->attr.type != type)
3817 return 0; 3868 return 0;
3869
3818 if (event->attr.config != event_id) 3870 if (event->attr.config != event_id)
3819 return 0; 3871 return 0;
3820 3872
3821 if (regs) { 3873 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3874 return 0;
3823 return 0;
3824 3875
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3876 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3877 !perf_tp_event_match(event, data))
3827 } 3878 return 0;
3828 3879
3829 return 1; 3880 return 1;
3830} 3881}
@@ -3837,49 +3888,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3888{
3838 struct perf_event *event; 3889 struct perf_event *event;
3839 3890
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3891 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3892 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3893 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3894 }
3848 rcu_read_unlock();
3849} 3895}
3850 3896
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3897int perf_swevent_get_recursion_context(void)
3852{ 3898{
3899 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3900 int rctx;
3901
3853 if (in_nmi()) 3902 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3903 rctx = 3;
3904 else if (in_irq())
3905 rctx = 2;
3906 else if (in_softirq())
3907 rctx = 1;
3908 else
3909 rctx = 0;
3910
3911 if (cpuctx->recursion[rctx]) {
3912 put_cpu_var(perf_cpu_context);
3913 return -1;
3914 }
3855 3915
3856 if (in_irq()) 3916 cpuctx->recursion[rctx]++;
3857 return &cpuctx->recursion[2]; 3917 barrier();
3858 3918
3859 if (in_softirq()) 3919 return rctx;
3860 return &cpuctx->recursion[1]; 3920}
3921EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3861 3922
3862 return &cpuctx->recursion[0]; 3923void perf_swevent_put_recursion_context(int rctx)
3924{
3925 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3926 barrier();
3927 cpuctx->recursion[rctx]--;
3928 put_cpu_var(perf_cpu_context);
3863} 3929}
3930EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3931
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3932static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3933 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3934 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3935 struct pt_regs *regs)
3869{ 3936{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3937 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3938 struct perf_event_context *ctx;
3873 3939
3874 if (*recursion) 3940 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3941 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3942 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3943 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3944 /*
3884 * doesn't really matter which of the child contexts the 3945 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3946 * events ends up in.
@@ -3888,23 +3949,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3949 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3950 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3951 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3952}
3898 3953
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3954void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3955 struct pt_regs *regs, u64 addr)
3901{ 3956{
3902 struct perf_sample_data data = { 3957 struct perf_sample_data data;
3903 .addr = addr, 3958 int rctx;
3904 };
3905 3959
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3960 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3961 if (rctx < 0)
3962 return;
3963
3964 data.addr = addr;
3965 data.raw = NULL;
3966
3967 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3968
3969 perf_swevent_put_recursion_context(rctx);
3908} 3970}
3909 3971
3910static void perf_swevent_read(struct perf_event *event) 3972static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4011,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4011 event->pmu->read(event);
3950 4012
3951 data.addr = 0; 4013 data.addr = 0;
4014 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4015 regs = get_irq_regs();
3953 /* 4016 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4017 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4108,6 +4171,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4171};
4109 4172
4110#ifdef CONFIG_EVENT_PROFILE 4173#ifdef CONFIG_EVENT_PROFILE
4174
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4175void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4176 int entry_size)
4113{ 4177{
@@ -4126,13 +4190,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4190 if (!regs)
4127 regs = task_pt_regs(current); 4191 regs = task_pt_regs(current);
4128 4192
4193 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4194 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4195 &data, regs);
4131} 4196}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4197EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4198
4134extern int ftrace_profile_enable(int); 4199static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4200 struct perf_sample_data *data)
4201{
4202 void *record = data->raw->data;
4203
4204 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4205 return 1;
4206 return 0;
4207}
4136 4208
4137static void tp_perf_event_destroy(struct perf_event *event) 4209static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4210{
@@ -4157,11 +4229,99 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4229
4158 return &perf_ops_generic; 4230 return &perf_ops_generic;
4159} 4231}
4232
4233static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4234{
4235 char *filter_str;
4236 int ret;
4237
4238 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4239 return -EINVAL;
4240
4241 filter_str = strndup_user(arg, PAGE_SIZE);
4242 if (IS_ERR(filter_str))
4243 return PTR_ERR(filter_str);
4244
4245 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4246
4247 kfree(filter_str);
4248 return ret;
4249}
4250
4251static void perf_event_free_filter(struct perf_event *event)
4252{
4253 ftrace_profile_free_filter(event);
4254}
4255
4160#else 4256#else
4257
4258static int perf_tp_event_match(struct perf_event *event,
4259 struct perf_sample_data *data)
4260{
4261 return 1;
4262}
4263
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4264static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4265{
4163 return NULL; 4266 return NULL;
4164} 4267}
4268
4269static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4270{
4271 return -ENOENT;
4272}
4273
4274static void perf_event_free_filter(struct perf_event *event)
4275{
4276}
4277
4278#endif /* CONFIG_EVENT_PROFILE */
4279
4280#ifdef CONFIG_HAVE_HW_BREAKPOINT
4281static void bp_perf_event_destroy(struct perf_event *event)
4282{
4283 release_bp_slot(event);
4284}
4285
4286static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4287{
4288 int err;
4289 /*
4290 * The breakpoint is already filled if we haven't created the counter
4291 * through perf syscall
4292 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4293 */
4294 if (!bp->callback)
4295 err = register_perf_hw_breakpoint(bp);
4296 else
4297 err = __register_perf_hw_breakpoint(bp);
4298 if (err)
4299 return ERR_PTR(err);
4300
4301 bp->destroy = bp_perf_event_destroy;
4302
4303 return &perf_ops_bp;
4304}
4305
4306void perf_bp_event(struct perf_event *bp, void *data)
4307{
4308 struct perf_sample_data sample;
4309 struct pt_regs *regs = data;
4310
4311 sample.addr = bp->attr.bp_addr;
4312
4313 if (!perf_exclude_event(bp, regs))
4314 perf_swevent_add(bp, 1, 1, &sample, regs);
4315}
4316#else
4317static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4318{
4319 return NULL;
4320}
4321
4322void perf_bp_event(struct perf_event *bp, void *regs)
4323{
4324}
4165#endif 4325#endif
4166 4326
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4327atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4368 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4369 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4370 case PERF_COUNT_SW_CPU_MIGRATIONS:
4371 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4372 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4373 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4374 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4375 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4390 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4391 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4392 struct perf_event *parent_event,
4393 perf_callback_t callback,
4231 gfp_t gfpflags) 4394 gfp_t gfpflags)
4232{ 4395{
4233 const struct pmu *pmu; 4396 const struct pmu *pmu;
@@ -4270,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4433
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4434 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4435
4436 if (!callback && parent_event)
4437 callback = parent_event->callback;
4438
4439 event->callback = callback;
4440
4273 if (attr->disabled) 4441 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4442 event->state = PERF_EVENT_STATE_OFF;
4275 4443
@@ -4304,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4472 pmu = tp_perf_event_init(event);
4305 break; 4473 break;
4306 4474
4475 case PERF_TYPE_BREAKPOINT:
4476 pmu = bp_perf_event_init(event);
4477 break;
4478
4479
4307 default: 4480 default:
4308 break; 4481 break;
4309 } 4482 }
@@ -4416,7 +4589,7 @@ err_size:
4416 goto out; 4589 goto out;
4417} 4590}
4418 4591
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4592static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4593{
4421 struct perf_event *output_event = NULL; 4594 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4595 struct file *output_file = NULL;
@@ -4546,7 +4719,7 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4719 }
4547 4720
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4721 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4722 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4723 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4724 if (IS_ERR(event))
4552 goto err_put_context; 4725 goto err_put_context;
@@ -4594,6 +4767,60 @@ err_put_context:
4594 return err; 4767 return err;
4595} 4768}
4596 4769
4770/**
4771 * perf_event_create_kernel_counter
4772 *
4773 * @attr: attributes of the counter to create
4774 * @cpu: cpu in which the counter is bound
4775 * @pid: task to profile
4776 */
4777struct perf_event *
4778perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4779 pid_t pid, perf_callback_t callback)
4780{
4781 struct perf_event *event;
4782 struct perf_event_context *ctx;
4783 int err;
4784
4785 /*
4786 * Get the target context (task or percpu):
4787 */
4788
4789 ctx = find_get_context(pid, cpu);
4790 if (IS_ERR(ctx)) {
4791 err = PTR_ERR(ctx);
4792 goto err_exit;
4793 }
4794
4795 event = perf_event_alloc(attr, cpu, ctx, NULL,
4796 NULL, callback, GFP_KERNEL);
4797 if (IS_ERR(event)) {
4798 err = PTR_ERR(event);
4799 goto err_put_context;
4800 }
4801
4802 event->filp = NULL;
4803 WARN_ON_ONCE(ctx->parent_ctx);
4804 mutex_lock(&ctx->mutex);
4805 perf_install_in_context(ctx, event, cpu);
4806 ++ctx->generation;
4807 mutex_unlock(&ctx->mutex);
4808
4809 event->owner = current;
4810 get_task_struct(current);
4811 mutex_lock(&current->perf_event_mutex);
4812 list_add_tail(&event->owner_entry, &current->perf_event_list);
4813 mutex_unlock(&current->perf_event_mutex);
4814
4815 return event;
4816
4817 err_put_context:
4818 put_ctx(ctx);
4819 err_exit:
4820 return ERR_PTR(err);
4821}
4822EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4823
4597/* 4824/*
4598 * inherit a event from parent task to child task: 4825 * inherit a event from parent task to child task:
4599 */ 4826 */
@@ -4619,7 +4846,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4846 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4847 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4848 group_leader, parent_event,
4622 GFP_KERNEL); 4849 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4850 if (IS_ERR(child_event))
4624 return child_event; 4851 return child_event;
4625 get_ctx(child_ctx); 4852 get_ctx(child_ctx);
@@ -4637,6 +4864,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4864 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4865 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4866
4867 child_event->overflow_handler = parent_event->overflow_handler;
4868
4640 /* 4869 /*
4641 * Link it up in the child's context: 4870 * Link it up in the child's context:
4642 */ 4871 */
@@ -4726,7 +4955,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4955{
4727 struct perf_event *parent_event; 4956 struct perf_event *parent_event;
4728 4957
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4958 perf_event_remove_from_context(child_event);
4731 4959
4732 parent_event = child_event->parent; 4960 parent_event = child_event->parent;
@@ -4778,6 +5006,7 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5006 * the events from it.
4779 */ 5007 */
4780 unclone_ctx(child_ctx); 5008 unclone_ctx(child_ctx);
5009 update_context_time(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5010 spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5011
4783 /* 5012 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index fe08008133da..6b982f2cf524 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -28,7 +28,8 @@
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/pid_namespace.h> 29#include <linux/pid_namespace.h>
30#include <linux/nsproxy.h> 30#include <linux/nsproxy.h>
31#include <trace/events/sched.h> 31#define CREATE_TRACE_POINTS
32#include <trace/events/signal.h>
32 33
33#include <asm/param.h> 34#include <asm/param.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -856,7 +857,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
856 struct sigqueue *q; 857 struct sigqueue *q;
857 int override_rlimit; 858 int override_rlimit;
858 859
859 trace_sched_signal_send(sig, t); 860 trace_signal_generate(sig, info, t);
860 861
861 assert_spin_locked(&t->sighand->siglock); 862 assert_spin_locked(&t->sighand->siglock);
862 863
@@ -918,12 +919,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
918 break; 919 break;
919 } 920 }
920 } else if (!is_si_special(info)) { 921 } else if (!is_si_special(info)) {
921 if (sig >= SIGRTMIN && info->si_code != SI_USER) 922 if (sig >= SIGRTMIN && info->si_code != SI_USER) {
922 /* 923 /*
923 * Queue overflow, abort. We may abort if the signal was rt 924 * Queue overflow, abort. We may abort if the
924 * and sent by user using something other than kill(). 925 * signal was rt and sent by user using something
925 */ 926 * other than kill().
927 */
928 trace_signal_overflow_fail(sig, group, info);
926 return -EAGAIN; 929 return -EAGAIN;
930 } else {
931 /*
932 * This is a silent loss of information. We still
933 * send the signal, but the *info bits are lost.
934 */
935 trace_signal_lose_info(sig, group, info);
936 }
927 } 937 }
928 938
929out_set: 939out_set:
@@ -1859,6 +1869,9 @@ relock:
1859 ka = &sighand->action[signr-1]; 1869 ka = &sighand->action[signr-1];
1860 } 1870 }
1861 1871
1872 /* Trace actually delivered signals. */
1873 trace_signal_deliver(signr, info, ka);
1874
1862 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ 1875 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */
1863 continue; 1876 continue;
1864 if (ka->sa.sa_handler != SIG_DFL) { 1877 if (ka->sa.sa_handler != SIG_DFL) {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
@@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE
428 449
429 If unsure, say N. 450 If unsure, say N.
430 451
452config KPROBE_EVENT
453 depends on KPROBES
454 depends on X86
455 bool "Enable kprobes-based dynamic events"
456 select TRACING
457 default y
458 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt
461 for more details.
462
463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values.
465
466 This option is also required by perf-probe subcommand of perf tools. If
467 you want to use perf tools, this option is strongly recommended.
468
431config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 471 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a72c6e03deec..a1ca4956ab5e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 397 int ret;
398 398
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
402 403
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 404 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 406 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
407 409
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 410 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 412 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 413 (unsigned int)BUF_PAGE_SIZE,
414 (unsigned int)is_signed_type(char));
412 415
413 return ret; 416 return ret;
414} 417}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index acef8b4636f0..1d7f4830a80d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -364,6 +390,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 390void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 391int is_tracing_stopped(void);
366 392
393extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
394
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 395extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 396
369#ifdef CONFIG_TRACER_MAX_TRACE 397#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 466 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 467extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 468 struct trace_array *tr);
469extern int trace_selftest_startup_ksym(struct tracer *trace,
470 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 471#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 472
443extern void *head_page(struct trace_array_cpu *data); 473extern void *head_page(struct trace_array_cpu *data);
@@ -683,7 +713,6 @@ struct event_filter {
683 int n_preds; 713 int n_preds;
684 struct filter_pred **preds; 714 struct filter_pred **preds;
685 char *filter_string; 715 char *filter_string;
686 bool no_reset;
687}; 716};
688 717
689struct event_subsystem { 718struct event_subsystem {
@@ -703,7 +732,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
703typedef int (*regex_match_func)(char *str, struct regex *r, int len); 732typedef int (*regex_match_func)(char *str, struct regex *r, int len);
704 733
705enum regex_type { 734enum regex_type {
706 MATCH_FULL, 735 MATCH_FULL = 0,
707 MATCH_FRONT_ONLY, 736 MATCH_FRONT_ONLY,
708 MATCH_MIDDLE_ONLY, 737 MATCH_MIDDLE_ONLY,
709 MATCH_END_ONLY, 738 MATCH_END_ONLY,
@@ -744,7 +773,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
744 struct ring_buffer *buffer, 773 struct ring_buffer *buffer,
745 struct ring_buffer_event *event) 774 struct ring_buffer_event *event)
746{ 775{
747 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 776 if (unlikely(call->filter_active) &&
777 !filter_match_preds(call->filter, rec)) {
748 ring_buffer_discard_commit(buffer, event); 778 ring_buffer_discard_commit(buffer, event);
749 return 1; 779 return 1;
750 } 780 }
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
19 17
20char *trace_profile_buf_nmi; 18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 19
23/* Count the events in use (per event id, not per instance) */ 20/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 21static int total_profile_count;
@@ -32,20 +29,20 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
32 return 0; 29 return 0;
33 30
34 if (!total_profile_count) { 31 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 32 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 33 if (!buf)
37 goto fail_buf; 34 goto fail_buf;
38 35
39 rcu_assign_pointer(trace_profile_buf, buf); 36 rcu_assign_pointer(perf_trace_buf, buf);
40 37
41 buf = (char *)alloc_percpu(profile_buf_t); 38 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 39 if (!buf)
43 goto fail_buf_nmi; 40 goto fail_buf_nmi;
44 41
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 43 }
47 44
48 ret = event->profile_enable(); 45 ret = event->profile_enable(event);
49 if (!ret) { 46 if (!ret) {
50 total_profile_count++; 47 total_profile_count++;
51 return 0; 48 return 0;
@@ -53,10 +50,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 50
54fail_buf_nmi: 51fail_buf_nmi:
55 if (!total_profile_count) { 52 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 53 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 54 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 55 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 56 perf_trace_buf = NULL;
60 } 57 }
61fail_buf: 58fail_buf:
62 atomic_dec(&event->profile_count); 59 atomic_dec(&event->profile_count);
@@ -89,14 +86,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
89 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 87 return;
91 88
92 event->profile_disable(); 89 event->profile_disable(event);
93 90
94 if (!--total_profile_count) { 91 if (!--total_profile_count) {
95 buf = trace_profile_buf; 92 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 93 rcu_assign_pointer(perf_trace_buf, NULL);
97 94
98 nmi_buf = trace_profile_buf_nmi; 95 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 97
101 /* 98 /*
102 * Ensure every events in profiling have finished before 99 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5e9ffc33f6db..1d18315dc836 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields); 94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 95
96#ifdef CONFIG_MODULES 96void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 97{
100 struct ftrace_event_field *field, *next; 98 struct ftrace_event_field *field, *next;
101 99
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 105 }
108} 106}
109 107
110#endif /* CONFIG_MODULES */
111
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 108static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 109 int enable)
114{ 110{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
117 if (call->enabled) { 113 if (call->enabled) {
118 call->enabled = 0; 114 call->enabled = 0;
119 tracing_stop_cmdline_record(); 115 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 116 call->unregfunc(call);
121 } 117 }
122 break; 118 break;
123 case 1: 119 case 1:
124 if (!call->enabled) { 120 if (!call->enabled) {
125 call->enabled = 1; 121 call->enabled = 1;
126 tracing_start_cmdline_record(); 122 tracing_start_cmdline_record();
127 call->regfunc(call->data); 123 call->regfunc(call);
128 } 124 }
129 break; 125 break;
130 } 126 }
@@ -507,7 +503,7 @@ extern char *__bad_type_size(void);
507#define FIELD(type, name) \ 503#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 504 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \ 505 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name) 506 sizeof(field.name), is_signed_type(type)
511 507
512static int trace_write_header(struct trace_seq *s) 508static int trace_write_header(struct trace_seq *s)
513{ 509{
@@ -515,17 +511,17 @@ static int trace_write_header(struct trace_seq *s)
515 511
516 /* struct trace_entry */ 512 /* struct trace_entry */
517 return trace_seq_printf(s, 513 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 514 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 515 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 516 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 517 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
523 "\n", 519 "\n",
524 FIELD(unsigned short, type), 520 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags), 521 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count), 522 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid), 523 FIELD(int, pid),
528 FIELD(int, lock_depth)); 524 FIELD(int, lock_depth));
529} 525}
530 526
531static ssize_t 527static ssize_t
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
937 return 0; 933 return 0;
938} 934}
939 935
940#define for_each_event(event, start, end) \ 936static int __trace_add_event_call(struct ftrace_event_call *call)
941 for (event = start; \ 937{
942 (unsigned long)event < (unsigned long)end; \ 938 struct dentry *d_events;
943 event++) 939 int ret;
944 940
945#ifdef CONFIG_MODULES 941 if (!call->name)
942 return -EINVAL;
946 943
947static LIST_HEAD(ftrace_module_file_list); 944 if (call->raw_init) {
945 ret = call->raw_init(call);
946 if (ret < 0) {
947 if (ret != -ENOSYS)
948 pr_warning("Could not initialize trace "
949 "events/%s\n", call->name);
950 return ret;
951 }
952 }
948 953
949/* 954 d_events = event_trace_events_dir();
950 * Modules must own their file_operations to keep up with 955 if (!d_events)
951 * reference counting. 956 return -ENOENT;
952 */ 957
953struct ftrace_module_file_ops { 958 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
954 struct list_head list; 959 &ftrace_enable_fops, &ftrace_event_filter_fops,
955 struct module *mod; 960 &ftrace_event_format_fops);
956 struct file_operations id; 961 if (!ret)
957 struct file_operations enable; 962 list_add(&call->list, &ftrace_events);
958 struct file_operations format; 963
959 struct file_operations filter; 964 return ret;
960}; 965}
966
967/* Add an additional event_call dynamically */
968int trace_add_event_call(struct ftrace_event_call *call)
969{
970 int ret;
971 mutex_lock(&event_mutex);
972 ret = __trace_add_event_call(call);
973 mutex_unlock(&event_mutex);
974 return ret;
975}
961 976
962static void remove_subsystem_dir(const char *name) 977static void remove_subsystem_dir(const char *name)
963{ 978{
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
985 } 1000 }
986} 1001}
987 1002
1003/*
1004 * Must be called under locking both of event_mutex and trace_event_mutex.
1005 */
1006static void __trace_remove_event_call(struct ftrace_event_call *call)
1007{
1008 ftrace_event_enable_disable(call, 0);
1009 if (call->event)
1010 __unregister_ftrace_event(call->event);
1011 debugfs_remove_recursive(call->dir);
1012 list_del(&call->list);
1013 trace_destroy_fields(call);
1014 destroy_preds(call);
1015 remove_subsystem_dir(call->system);
1016}
1017
1018/* Remove an event_call */
1019void trace_remove_event_call(struct ftrace_event_call *call)
1020{
1021 mutex_lock(&event_mutex);
1022 down_write(&trace_event_mutex);
1023 __trace_remove_event_call(call);
1024 up_write(&trace_event_mutex);
1025 mutex_unlock(&event_mutex);
1026}
1027
1028#define for_each_event(event, start, end) \
1029 for (event = start; \
1030 (unsigned long)event < (unsigned long)end; \
1031 event++)
1032
1033#ifdef CONFIG_MODULES
1034
1035static LIST_HEAD(ftrace_module_file_list);
1036
1037/*
1038 * Modules must own their file_operations to keep up with
1039 * reference counting.
1040 */
1041struct ftrace_module_file_ops {
1042 struct list_head list;
1043 struct module *mod;
1044 struct file_operations id;
1045 struct file_operations enable;
1046 struct file_operations format;
1047 struct file_operations filter;
1048};
1049
988static struct ftrace_module_file_ops * 1050static struct ftrace_module_file_ops *
989trace_create_file_ops(struct module *mod) 1051trace_create_file_ops(struct module *mod)
990{ 1052{
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
1042 if (!call->name) 1104 if (!call->name)
1043 continue; 1105 continue;
1044 if (call->raw_init) { 1106 if (call->raw_init) {
1045 ret = call->raw_init(); 1107 ret = call->raw_init(call);
1046 if (ret < 0) { 1108 if (ret < 0) {
1047 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS)
1048 pr_warning("Could not initialize trace " 1110 pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
1060 return; 1122 return;
1061 } 1123 }
1062 call->mod = mod; 1124 call->mod = mod;
1063 list_add(&call->list, &ftrace_events); 1125 ret = event_create_dir(call, d_events,
1064 event_create_dir(call, d_events, 1126 &file_ops->id, &file_ops->enable,
1065 &file_ops->id, &file_ops->enable, 1127 &file_ops->filter, &file_ops->format);
1066 &file_ops->filter, &file_ops->format); 1128 if (!ret)
1129 list_add(&call->list, &ftrace_events);
1067 } 1130 }
1068} 1131}
1069 1132
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1140 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1078 if (call->mod == mod) { 1141 if (call->mod == mod) {
1079 found = true; 1142 found = true;
1080 ftrace_event_enable_disable(call, 0); 1143 __trace_remove_event_call(call);
1081 if (call->event)
1082 __unregister_ftrace_event(call->event);
1083 debugfs_remove_recursive(call->dir);
1084 list_del(&call->list);
1085 trace_destroy_fields(call);
1086 destroy_preds(call);
1087 remove_subsystem_dir(call->system);
1088 } 1144 }
1089 } 1145 }
1090 1146
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void)
1202 if (!call->name) 1258 if (!call->name)
1203 continue; 1259 continue;
1204 if (call->raw_init) { 1260 if (call->raw_init) {
1205 ret = call->raw_init(); 1261 ret = call->raw_init(call);
1206 if (ret < 0) { 1262 if (ret < 0) {
1207 if (ret != -ENOSYS) 1263 if (ret != -ENOSYS)
1208 pr_warning("Could not initialize trace " 1264 pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void)
1210 continue; 1266 continue;
1211 } 1267 }
1212 } 1268 }
1213 list_add(&call->list, &ftrace_events); 1269 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1214 event_create_dir(call, d_events, &ftrace_event_id_fops, 1270 &ftrace_enable_fops,
1215 &ftrace_enable_fops, &ftrace_event_filter_fops, 1271 &ftrace_event_filter_fops,
1216 &ftrace_event_format_fops); 1272 &ftrace_event_format_fops);
1273 if (!ret)
1274 list_add(&call->list, &ftrace_events);
1217 } 1275 }
1218 1276
1219 while (true) { 1277 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 92672016da28..50504cb228de 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
24 25
25#include "trace.h" 26#include "trace.h"
26#include "trace_output.h" 27#include "trace_output.h"
@@ -29,6 +30,7 @@ enum filter_op_ids
29{ 30{
30 OP_OR, 31 OP_OR,
31 OP_AND, 32 OP_AND,
33 OP_GLOB,
32 OP_NE, 34 OP_NE,
33 OP_EQ, 35 OP_EQ,
34 OP_LT, 36 OP_LT,
@@ -46,16 +48,17 @@ struct filter_op {
46}; 48};
47 49
48static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
49 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
50 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
51 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
52 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
53 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
54 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
55 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
56 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
57 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
58 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
59}; 62};
60 63
61enum { 64enum {
@@ -329,22 +332,18 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
329 return type; 332 return type;
330} 333}
331 334
332static int filter_build_regex(struct filter_pred *pred) 335static void filter_build_regex(struct filter_pred *pred)
333{ 336{
334 struct regex *r = &pred->regex; 337 struct regex *r = &pred->regex;
335 char *search, *dup; 338 char *search;
336 enum regex_type type; 339 enum regex_type type = MATCH_FULL;
337 int not; 340 int not = 0;
338 341
339 type = filter_parse_regex(r->pattern, r->len, &search, &not); 342 if (pred->op == OP_GLOB) {
340 dup = kstrdup(search, GFP_KERNEL); 343 type = filter_parse_regex(r->pattern, r->len, &search, &not);
341 if (!dup) 344 r->len = strlen(search);
342 return -ENOMEM; 345 memmove(r->pattern, search, r->len+1);
343 346 }
344 strcpy(r->pattern, dup);
345 kfree(dup);
346
347 r->len = strlen(r->pattern);
348 347
349 switch (type) { 348 switch (type) {
350 case MATCH_FULL: 349 case MATCH_FULL:
@@ -362,14 +361,11 @@ static int filter_build_regex(struct filter_pred *pred)
362 } 361 }
363 362
364 pred->not ^= not; 363 pred->not ^= not;
365
366 return 0;
367} 364}
368 365
369/* return 1 if event matches, 0 otherwise (discard) */ 366/* return 1 if event matches, 0 otherwise (discard) */
370int filter_match_preds(struct ftrace_event_call *call, void *rec) 367int filter_match_preds(struct event_filter *filter, void *rec)
371{ 368{
372 struct event_filter *filter = call->filter;
373 int match, top = 0, val1 = 0, val2 = 0; 369 int match, top = 0, val1 = 0, val2 = 0;
374 int stack[MAX_FILTER_PRED]; 370 int stack[MAX_FILTER_PRED];
375 struct filter_pred *pred; 371 struct filter_pred *pred;
@@ -542,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
542 filter->preds[i]->fn = filter_pred_none; 538 filter->preds[i]->fn = filter_pred_none;
543} 539}
544 540
545void destroy_preds(struct ftrace_event_call *call) 541static void __free_preds(struct event_filter *filter)
546{ 542{
547 struct event_filter *filter = call->filter;
548 int i; 543 int i;
549 544
550 if (!filter) 545 if (!filter)
@@ -557,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
557 kfree(filter->preds); 552 kfree(filter->preds);
558 kfree(filter->filter_string); 553 kfree(filter->filter_string);
559 kfree(filter); 554 kfree(filter);
555}
556
557void destroy_preds(struct ftrace_event_call *call)
558{
559 __free_preds(call->filter);
560 call->filter = NULL; 560 call->filter = NULL;
561 call->filter_active = 0;
561} 562}
562 563
563static int init_preds(struct ftrace_event_call *call) 564static struct event_filter *__alloc_preds(void)
564{ 565{
565 struct event_filter *filter; 566 struct event_filter *filter;
566 struct filter_pred *pred; 567 struct filter_pred *pred;
567 int i; 568 int i;
568 569
569 if (call->filter) 570 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
570 return 0; 571 if (!filter)
571 572 return ERR_PTR(-ENOMEM);
572 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
573 if (!call->filter)
574 return -ENOMEM;
575 573
576 filter->n_preds = 0; 574 filter->n_preds = 0;
577 575
@@ -587,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
587 filter->preds[i] = pred; 585 filter->preds[i] = pred;
588 } 586 }
589 587
590 return 0; 588 return filter;
591 589
592oom: 590oom:
593 destroy_preds(call); 591 __free_preds(filter);
592 return ERR_PTR(-ENOMEM);
593}
594
595static int init_preds(struct ftrace_event_call *call)
596{
597 if (call->filter)
598 return 0;
594 599
595 return -ENOMEM; 600 call->filter_active = 0;
601 call->filter = __alloc_preds();
602 if (IS_ERR(call->filter))
603 return PTR_ERR(call->filter);
604
605 return 0;
596} 606}
597 607
598static int init_subsystem_preds(struct event_subsystem *system) 608static int init_subsystem_preds(struct event_subsystem *system)
@@ -615,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
615 return 0; 625 return 0;
616} 626}
617 627
618enum { 628static void filter_free_subsystem_preds(struct event_subsystem *system)
619 FILTER_DISABLE_ALL,
620 FILTER_INIT_NO_RESET,
621 FILTER_SKIP_NO_RESET,
622};
623
624static void filter_free_subsystem_preds(struct event_subsystem *system,
625 int flag)
626{ 629{
627 struct ftrace_event_call *call; 630 struct ftrace_event_call *call;
628 631
@@ -633,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
633 if (strcmp(call->system, system->name) != 0) 636 if (strcmp(call->system, system->name) != 0)
634 continue; 637 continue;
635 638
636 if (flag == FILTER_INIT_NO_RESET) {
637 call->filter->no_reset = false;
638 continue;
639 }
640
641 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
642 continue;
643
644 filter_disable_preds(call); 639 filter_disable_preds(call);
645 remove_filter_string(call->filter); 640 remove_filter_string(call->filter);
646 } 641 }
@@ -648,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
648 643
649static int filter_add_pred_fn(struct filter_parse_state *ps, 644static int filter_add_pred_fn(struct filter_parse_state *ps,
650 struct ftrace_event_call *call, 645 struct ftrace_event_call *call,
646 struct event_filter *filter,
651 struct filter_pred *pred, 647 struct filter_pred *pred,
652 filter_pred_fn_t fn) 648 filter_pred_fn_t fn)
653{ 649{
654 struct event_filter *filter = call->filter;
655 int idx, err; 650 int idx, err;
656 651
657 if (filter->n_preds == MAX_FILTER_PRED) { 652 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -666,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
666 return err; 661 return err;
667 662
668 filter->n_preds++; 663 filter->n_preds++;
669 call->filter_active = 1;
670 664
671 return 0; 665 return 0;
672} 666}
@@ -691,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
691 685
692static int is_legal_op(struct ftrace_event_field *field, int op) 686static int is_legal_op(struct ftrace_event_field *field, int op)
693{ 687{
694 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 688 if (is_string_field(field) &&
689 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
690 return 0;
691 if (!is_string_field(field) && op == OP_GLOB)
695 return 0; 692 return 0;
696 693
697 return 1; 694 return 1;
@@ -742,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
742 739
743static int filter_add_pred(struct filter_parse_state *ps, 740static int filter_add_pred(struct filter_parse_state *ps,
744 struct ftrace_event_call *call, 741 struct ftrace_event_call *call,
742 struct event_filter *filter,
745 struct filter_pred *pred, 743 struct filter_pred *pred,
746 bool dry_run) 744 bool dry_run)
747{ 745{
@@ -776,15 +774,13 @@ static int filter_add_pred(struct filter_parse_state *ps,
776 } 774 }
777 775
778 if (is_string_field(field)) { 776 if (is_string_field(field)) {
779 ret = filter_build_regex(pred); 777 filter_build_regex(pred);
780 if (ret)
781 return ret;
782 778
783 if (field->filter_type == FILTER_STATIC_STRING) { 779 if (field->filter_type == FILTER_STATIC_STRING) {
784 fn = filter_pred_string; 780 fn = filter_pred_string;
785 pred->regex.field_len = field->size; 781 pred->regex.field_len = field->size;
786 } else if (field->filter_type == FILTER_DYN_STRING) 782 } else if (field->filter_type == FILTER_DYN_STRING)
787 fn = filter_pred_strloc; 783 fn = filter_pred_strloc;
788 else { 784 else {
789 fn = filter_pred_pchar; 785 fn = filter_pred_pchar;
790 pred->regex.field_len = strlen(pred->regex.pattern); 786 pred->regex.field_len = strlen(pred->regex.pattern);
@@ -813,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
813 809
814add_pred_fn: 810add_pred_fn:
815 if (!dry_run) 811 if (!dry_run)
816 return filter_add_pred_fn(ps, call, pred, fn); 812 return filter_add_pred_fn(ps, call, filter, pred, fn);
817 return 0;
818}
819
820static int filter_add_subsystem_pred(struct filter_parse_state *ps,
821 struct event_subsystem *system,
822 struct filter_pred *pred,
823 char *filter_string,
824 bool dry_run)
825{
826 struct ftrace_event_call *call;
827 int err = 0;
828 bool fail = true;
829
830 list_for_each_entry(call, &ftrace_events, list) {
831
832 if (!call->define_fields)
833 continue;
834
835 if (strcmp(call->system, system->name))
836 continue;
837
838 if (call->filter->no_reset)
839 continue;
840
841 err = filter_add_pred(ps, call, pred, dry_run);
842 if (err)
843 call->filter->no_reset = true;
844 else
845 fail = false;
846
847 if (!dry_run)
848 replace_filter_string(call->filter, filter_string);
849 }
850
851 if (fail) {
852 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
853 return err;
854 }
855 return 0; 813 return 0;
856} 814}
857 815
@@ -1209,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
1209 return 0; 1167 return 0;
1210} 1168}
1211 1169
1212static int replace_preds(struct event_subsystem *system, 1170static int replace_preds(struct ftrace_event_call *call,
1213 struct ftrace_event_call *call, 1171 struct event_filter *filter,
1214 struct filter_parse_state *ps, 1172 struct filter_parse_state *ps,
1215 char *filter_string, 1173 char *filter_string,
1216 bool dry_run) 1174 bool dry_run)
@@ -1257,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
1257add_pred: 1215add_pred:
1258 if (!pred) 1216 if (!pred)
1259 return -ENOMEM; 1217 return -ENOMEM;
1260 if (call) 1218 err = filter_add_pred(ps, call, filter, pred, dry_run);
1261 err = filter_add_pred(ps, call, pred, false);
1262 else
1263 err = filter_add_subsystem_pred(ps, system, pred,
1264 filter_string, dry_run);
1265 filter_free_pred(pred); 1219 filter_free_pred(pred);
1266 if (err) 1220 if (err)
1267 return err; 1221 return err;
@@ -1272,10 +1226,50 @@ add_pred:
1272 return 0; 1226 return 0;
1273} 1227}
1274 1228
1275int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1229static int replace_system_preds(struct event_subsystem *system,
1230 struct filter_parse_state *ps,
1231 char *filter_string)
1276{ 1232{
1233 struct ftrace_event_call *call;
1234 bool fail = true;
1277 int err; 1235 int err;
1278 1236
1237 list_for_each_entry(call, &ftrace_events, list) {
1238 struct event_filter *filter = call->filter;
1239
1240 if (!call->define_fields)
1241 continue;
1242
1243 if (strcmp(call->system, system->name) != 0)
1244 continue;
1245
1246 /* try to see if the filter can be applied */
1247 err = replace_preds(call, filter, ps, filter_string, true);
1248 if (err)
1249 continue;
1250
1251 /* really apply the filter */
1252 filter_disable_preds(call);
1253 err = replace_preds(call, filter, ps, filter_string, false);
1254 if (err)
1255 filter_disable_preds(call);
1256 else {
1257 call->filter_active = 1;
1258 replace_filter_string(filter, filter_string);
1259 }
1260 fail = false;
1261 }
1262
1263 if (fail) {
1264 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1265 return -EINVAL;
1266 }
1267 return 0;
1268}
1269
1270int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1271{
1272 int err;
1279 struct filter_parse_state *ps; 1273 struct filter_parse_state *ps;
1280 1274
1281 mutex_lock(&event_mutex); 1275 mutex_lock(&event_mutex);
@@ -1287,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1287 if (!strcmp(strstrip(filter_string), "0")) { 1281 if (!strcmp(strstrip(filter_string), "0")) {
1288 filter_disable_preds(call); 1282 filter_disable_preds(call);
1289 remove_filter_string(call->filter); 1283 remove_filter_string(call->filter);
1290 mutex_unlock(&event_mutex); 1284 goto out_unlock;
1291 return 0;
1292 } 1285 }
1293 1286
1294 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1306,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1306 goto out; 1299 goto out;
1307 } 1300 }
1308 1301
1309 err = replace_preds(NULL, call, ps, filter_string, false); 1302 err = replace_preds(call, call->filter, ps, filter_string, false);
1310 if (err) 1303 if (err)
1311 append_filter_err(ps, call->filter); 1304 append_filter_err(ps, call->filter);
1312 1305 else
1306 call->filter_active = 1;
1313out: 1307out:
1314 filter_opstack_clear(ps); 1308 filter_opstack_clear(ps);
1315 postfix_clear(ps); 1309 postfix_clear(ps);
@@ -1324,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1324 char *filter_string) 1318 char *filter_string)
1325{ 1319{
1326 int err; 1320 int err;
1327
1328 struct filter_parse_state *ps; 1321 struct filter_parse_state *ps;
1329 1322
1330 mutex_lock(&event_mutex); 1323 mutex_lock(&event_mutex);
@@ -1334,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1334 goto out_unlock; 1327 goto out_unlock;
1335 1328
1336 if (!strcmp(strstrip(filter_string), "0")) { 1329 if (!strcmp(strstrip(filter_string), "0")) {
1337 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1330 filter_free_subsystem_preds(system);
1338 remove_filter_string(system->filter); 1331 remove_filter_string(system->filter);
1339 mutex_unlock(&event_mutex); 1332 goto out_unlock;
1340 return 0;
1341 } 1333 }
1342 1334
1343 err = -ENOMEM; 1335 err = -ENOMEM;
@@ -1354,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1354 goto out; 1346 goto out;
1355 } 1347 }
1356 1348
1357 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1349 err = replace_system_preds(system, ps, filter_string);
1358 1350 if (err)
1359 /* try to see the filter can be applied to which events */
1360 err = replace_preds(system, NULL, ps, filter_string, true);
1361 if (err) {
1362 append_filter_err(ps, system->filter); 1351 append_filter_err(ps, system->filter);
1363 goto out; 1352
1353out:
1354 filter_opstack_clear(ps);
1355 postfix_clear(ps);
1356 kfree(ps);
1357out_unlock:
1358 mutex_unlock(&event_mutex);
1359
1360 return err;
1361}
1362
1363#ifdef CONFIG_EVENT_PROFILE
1364
1365void ftrace_profile_free_filter(struct perf_event *event)
1366{
1367 struct event_filter *filter = event->filter;
1368
1369 event->filter = NULL;
1370 __free_preds(filter);
1371}
1372
1373int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1374 char *filter_str)
1375{
1376 int err;
1377 struct event_filter *filter;
1378 struct filter_parse_state *ps;
1379 struct ftrace_event_call *call = NULL;
1380
1381 mutex_lock(&event_mutex);
1382
1383 list_for_each_entry(call, &ftrace_events, list) {
1384 if (call->id == event_id)
1385 break;
1364 } 1386 }
1365 1387
1366 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1388 err = -EINVAL;
1389 if (!call)
1390 goto out_unlock;
1367 1391
1368 /* really apply the filter to the events */ 1392 err = -EEXIST;
1369 err = replace_preds(system, NULL, ps, filter_string, false); 1393 if (event->filter)
1370 if (err) { 1394 goto out_unlock;
1371 append_filter_err(ps, system->filter); 1395
1372 filter_free_subsystem_preds(system, 2); 1396 filter = __alloc_preds();
1397 if (IS_ERR(filter)) {
1398 err = PTR_ERR(filter);
1399 goto out_unlock;
1373 } 1400 }
1374 1401
1375out: 1402 err = -ENOMEM;
1403 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1404 if (!ps)
1405 goto free_preds;
1406
1407 parse_init(ps, filter_ops, filter_str);
1408 err = filter_parse(ps);
1409 if (err)
1410 goto free_ps;
1411
1412 err = replace_preds(call, filter, ps, filter_str, false);
1413 if (!err)
1414 event->filter = filter;
1415
1416free_ps:
1376 filter_opstack_clear(ps); 1417 filter_opstack_clear(ps);
1377 postfix_clear(ps); 1418 postfix_clear(ps);
1378 kfree(ps); 1419 kfree(ps);
1420
1421free_preds:
1422 if (err)
1423 __free_preds(filter);
1424
1379out_unlock: 1425out_unlock:
1380 mutex_unlock(&event_mutex); 1426 mutex_unlock(&event_mutex);
1381 1427
1382 return err; 1428 return err;
1383} 1429}
1384 1430
1431#endif /* CONFIG_EVENT_PROFILE */
1432
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index c74848ddb85a..dff8c84ddf17 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -66,44 +66,47 @@ static void __always_unused ____ftrace_check_##name(void) \
66#undef __field 66#undef __field
67#define __field(type, item) \ 67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \ 69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \ 71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \ 72 if (!ret) \
73 return 0; 73 return 0;
74 74
75#undef __field_desc 75#undef __field_desc
76#define __field_desc(type, container, item) \ 76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \ 78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \ 79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \ 80 sizeof(field.container.item), \
81 is_signed_type(type)); \
81 if (!ret) \ 82 if (!ret) \
82 return 0; 83 return 0;
83 84
84#undef __array 85#undef __array
85#define __array(type, item, len) \ 86#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \ 88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
88 offsetof(typeof(field), item), \ 89 offsetof(typeof(field), item), \
89 sizeof(field.item)); \ 90 sizeof(field.item), is_signed_type(type)); \
90 if (!ret) \ 91 if (!ret) \
91 return 0; 92 return 0;
92 93
93#undef __array_desc 94#undef __array_desc
94#define __array_desc(type, container, item, len) \ 95#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \ 97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
97 offsetof(typeof(field), container.item), \ 98 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \ 99 sizeof(field.container.item), \
100 is_signed_type(type)); \
99 if (!ret) \ 101 if (!ret) \
100 return 0; 102 return 0;
101 103
102#undef __dynamic_array 104#undef __dynamic_array
103#define __dynamic_array(type, item) \ 105#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \ 107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
106 offsetof(typeof(field), item)); \ 108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
107 if (!ret) \ 110 if (!ret) \
108 return 0; 111 return 0;
109 112
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
131 134
132#include "trace_entries.h" 135#include "trace_entries.h"
133 136
134
135#undef __field 137#undef __field
136#define __field(type, item) \ 138#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -193,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 195
194#include "trace_entries.h" 196#include "trace_entries.h"
195 197
198static int ftrace_raw_init_event(struct ftrace_event_call *call)
199{
200 INIT_LIST_HEAD(&call->fields);
201 return 0;
202}
196 203
197#undef __field 204#undef __field
198#define __field(type, item) 205#define __field(type, item)
@@ -211,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
211 218
212#undef FTRACE_ENTRY 219#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 221 \
216struct ftrace_event_call __used \ 222struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 223__attribute__((__aligned__(4))) \
@@ -219,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 225 .name = #call, \
220 .id = type, \ 226 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 227 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 228 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 229 .show_format = ftrace_format_##call, \
224 .define_fields = ftrace_define_fields_##call, \ 230 .define_fields = ftrace_define_fields_##call, \
225}; \ 231}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 232
232#include "trace_entries.h" 233#include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..aff5f80b59b8
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1523 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp);
277
278static DEFINE_MUTEX(probe_lock);
279static LIST_HEAD(probe_list);
280
281static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs);
284
285/*
286 * Allocate new trace_probe and initialize it (including kprobes).
287 */
288static struct trace_probe *alloc_trace_probe(const char *group,
289 const char *event,
290 void *addr,
291 const char *symbol,
292 unsigned long offs,
293 int nargs, int is_return)
294{
295 struct trace_probe *tp;
296
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp)
299 return ERR_PTR(-ENOMEM);
300
301 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL);
303 if (!tp->symbol)
304 goto error;
305 tp->rp.kp.symbol_name = tp->symbol;
306 tp->rp.kp.offset = offs;
307 } else
308 tp->rp.kp.addr = addr;
309
310 if (is_return)
311 tp->rp.handler = kretprobe_dispatcher;
312 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher;
314
315 if (!event)
316 goto error;
317 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name)
319 goto error;
320
321 if (!group)
322 goto error;
323 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system)
325 goto error;
326
327 INIT_LIST_HEAD(&tp->list);
328 return tp;
329error:
330 kfree(tp->call.name);
331 kfree(tp->symbol);
332 kfree(tp);
333 return ERR_PTR(-ENOMEM);
334}
335
336static void free_probe_arg(struct probe_arg *arg)
337{
338 if (arg->fetch.func == fetch_symbol)
339 free_symbol_cache(arg->fetch.data);
340 else if (arg->fetch.func == fetch_indirect)
341 free_indirect_fetch_data(arg->fetch.data);
342 kfree(arg->name);
343}
344
345static void free_trace_probe(struct trace_probe *tp)
346{
347 int i;
348
349 for (i = 0; i < tp->nr_args; i++)
350 free_probe_arg(&tp->args[i]);
351
352 kfree(tp->call.system);
353 kfree(tp->call.name);
354 kfree(tp->symbol);
355 kfree(tp);
356}
357
358static struct trace_probe *find_probe_event(const char *event,
359 const char *group)
360{
361 struct trace_probe *tp;
362
363 list_for_each_entry(tp, &probe_list, list)
364 if (strcmp(tp->call.name, event) == 0 &&
365 strcmp(tp->call.system, group) == 0)
366 return tp;
367 return NULL;
368}
369
370/* Unregister a trace_probe and probe_event: call with locking probe_lock */
371static void unregister_trace_probe(struct trace_probe *tp)
372{
373 if (probe_is_return(tp))
374 unregister_kretprobe(&tp->rp);
375 else
376 unregister_kprobe(&tp->rp.kp);
377 list_del(&tp->list);
378 unregister_probe_event(tp);
379}
380
381/* Register a trace_probe and probe_event */
382static int register_trace_probe(struct trace_probe *tp)
383{
384 struct trace_probe *old_tp;
385 int ret;
386
387 mutex_lock(&probe_lock);
388
389 /* register as an event */
390 old_tp = find_probe_event(tp->call.name, tp->call.system);
391 if (old_tp) {
392 /* delete old event */
393 unregister_trace_probe(old_tp);
394 free_trace_probe(old_tp);
395 }
396 ret = register_probe_event(tp);
397 if (ret) {
398 pr_warning("Faild to register probe event(%d)\n", ret);
399 goto end;
400 }
401
402 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
403 if (probe_is_return(tp))
404 ret = register_kretprobe(&tp->rp);
405 else
406 ret = register_kprobe(&tp->rp.kp);
407
408 if (ret) {
409 pr_warning("Could not insert probe(%d)\n", ret);
410 if (ret == -EILSEQ) {
411 pr_warning("Probing address(0x%p) is not an "
412 "instruction boundary.\n",
413 tp->rp.kp.addr);
414 ret = -EINVAL;
415 }
416 unregister_probe_event(tp);
417 } else
418 list_add_tail(&tp->list, &probe_list);
419end:
420 mutex_unlock(&probe_lock);
421 return ret;
422}
423
424/* Split symbol and offset. */
425static int split_symbol_offset(char *symbol, unsigned long *offset)
426{
427 char *tmp;
428 int ret;
429
430 if (!offset)
431 return -EINVAL;
432
433 tmp = strchr(symbol, '+');
434 if (tmp) {
435 /* skip sign because strict_strtol doesn't accept '+' */
436 ret = strict_strtoul(tmp + 1, 0, offset);
437 if (ret)
438 return ret;
439 *tmp = '\0';
440 } else
441 *offset = 0;
442 return 0;
443}
444
445#define PARAM_MAX_ARGS 16
446#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
447
448static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
449{
450 int ret = 0;
451 unsigned long param;
452
453 if (strcmp(arg, "retval") == 0) {
454 if (is_return) {
455 ff->func = fetch_retvalue;
456 ff->data = NULL;
457 } else
458 ret = -EINVAL;
459 } else if (strncmp(arg, "stack", 5) == 0) {
460 if (arg[5] == '\0') {
461 ff->func = fetch_stack_address;
462 ff->data = NULL;
463 } else if (isdigit(arg[5])) {
464 ret = strict_strtoul(arg + 5, 10, &param);
465 if (ret || param > PARAM_MAX_STACK)
466 ret = -EINVAL;
467 else {
468 ff->func = fetch_stack;
469 ff->data = (void *)param;
470 }
471 } else
472 ret = -EINVAL;
473 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
474 ret = strict_strtoul(arg + 3, 10, &param);
475 if (ret || param > PARAM_MAX_ARGS)
476 ret = -EINVAL;
477 else {
478 ff->func = fetch_argument;
479 ff->data = (void *)param;
480 }
481 } else
482 ret = -EINVAL;
483 return ret;
484}
485
486/* Recursive argument parser */
487static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
488{
489 int ret = 0;
490 unsigned long param;
491 long offset;
492 char *tmp;
493
494 switch (arg[0]) {
495 case '$':
496 ret = parse_probe_vars(arg + 1, ff, is_return);
497 break;
498 case '%': /* named register */
499 ret = regs_query_register_offset(arg + 1);
500 if (ret >= 0) {
501 ff->func = fetch_register;
502 ff->data = (void *)(unsigned long)ret;
503 ret = 0;
504 }
505 break;
506 case '@': /* memory or symbol */
507 if (isdigit(arg[1])) {
508 ret = strict_strtoul(arg + 1, 0, &param);
509 if (ret)
510 break;
511 ff->func = fetch_memory;
512 ff->data = (void *)param;
513 } else {
514 ret = split_symbol_offset(arg + 1, &offset);
515 if (ret)
516 break;
517 ff->data = alloc_symbol_cache(arg + 1, offset);
518 if (ff->data)
519 ff->func = fetch_symbol;
520 else
521 ret = -EINVAL;
522 }
523 break;
524 case '+': /* indirect memory */
525 case '-':
526 tmp = strchr(arg, '(');
527 if (!tmp) {
528 ret = -EINVAL;
529 break;
530 }
531 *tmp = '\0';
532 ret = strict_strtol(arg + 1, 0, &offset);
533 if (ret)
534 break;
535 if (arg[0] == '-')
536 offset = -offset;
537 arg = tmp + 1;
538 tmp = strrchr(arg, ')');
539 if (tmp) {
540 struct indirect_fetch_data *id;
541 *tmp = '\0';
542 id = kzalloc(sizeof(struct indirect_fetch_data),
543 GFP_KERNEL);
544 if (!id)
545 return -ENOMEM;
546 id->offset = offset;
547 ret = __parse_probe_arg(arg, &id->orig, is_return);
548 if (ret)
549 kfree(id);
550 else {
551 ff->func = fetch_indirect;
552 ff->data = (void *)id;
553 }
554 } else
555 ret = -EINVAL;
556 break;
557 default:
558 /* TODO: support custom handler */
559 ret = -EINVAL;
560 }
561 return ret;
562}
563
564/* String length checking wrapper */
565static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
566{
567 if (strlen(arg) > MAX_ARGSTR_LEN) {
568 pr_info("Argument is too long.: %s\n", arg);
569 return -ENOSPC;
570 }
571 return __parse_probe_arg(arg, ff, is_return);
572}
573
574/* Return 1 if name is reserved or already used by another argument */
575static int conflict_field_name(const char *name,
576 struct probe_arg *args, int narg)
577{
578 int i;
579 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
580 if (strcmp(reserved_field_names[i], name) == 0)
581 return 1;
582 for (i = 0; i < narg; i++)
583 if (strcmp(args[i].name, name) == 0)
584 return 1;
585 return 0;
586}
587
588static int create_trace_probe(int argc, char **argv)
589{
590 /*
591 * Argument syntax:
592 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
593 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
594 * Fetch args:
595 * $argN : fetch Nth of function argument. (N:0-)
596 * $retval : fetch return value
597 * $stack : fetch stack address
598 * $stackN : fetch Nth of stack (N:0-)
599 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
600 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
601 * %REG : fetch register REG
602 * Indirect memory fetch:
603 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
604 * Alias name of args:
605 * NAME=FETCHARG : set NAME as alias of FETCHARG.
606 */
607 struct trace_probe *tp;
608 int i, ret = 0;
609 int is_return = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0;
612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN];
614
615 if (argc < 2) {
616 pr_info("Probe point is not specified.\n");
617 return -EINVAL;
618 }
619
620 if (argv[0][0] == 'p')
621 is_return = 0;
622 else if (argv[0][0] == 'r')
623 is_return = 1;
624 else {
625 pr_info("Probe definition must be started with 'p' or 'r'.\n");
626 return -EINVAL;
627 }
628
629 if (argv[0][1] == ':') {
630 event = &argv[0][2];
631 if (strchr(event, '/')) {
632 group = event;
633 event = strchr(group, '/') + 1;
634 event[-1] = '\0';
635 if (strlen(group) == 0) {
636 pr_info("Group name is not specifiled\n");
637 return -EINVAL;
638 }
639 }
640 if (strlen(event) == 0) {
641 pr_info("Event name is not specifiled\n");
642 return -EINVAL;
643 }
644 }
645
646 if (isdigit(argv[1][0])) {
647 if (is_return) {
648 pr_info("Return probe point must be a symbol.\n");
649 return -EINVAL;
650 }
651 /* an address specified */
652 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
653 if (ret) {
654 pr_info("Failed to parse address.\n");
655 return ret;
656 }
657 } else {
658 /* a symbol specified */
659 symbol = argv[1];
660 /* TODO: support .init module functions */
661 ret = split_symbol_offset(symbol, &offset);
662 if (ret) {
663 pr_info("Failed to parse symbol.\n");
664 return ret;
665 }
666 if (offset && is_return) {
667 pr_info("Return probe must be used without offset.\n");
668 return -EINVAL;
669 }
670 }
671 argc -= 2; argv += 2;
672
673 /* setup a probe */
674 if (!group)
675 group = KPROBE_EVENT_SYSTEM;
676 if (!event) {
677 /* Make a new event name */
678 if (symbol)
679 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
680 is_return ? 'r' : 'p', symbol, offset);
681 else
682 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
683 is_return ? 'r' : 'p', addr);
684 event = buf;
685 }
686 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
687 is_return);
688 if (IS_ERR(tp)) {
689 pr_info("Failed to allocate trace_probe.(%d)\n",
690 (int)PTR_ERR(tp));
691 return PTR_ERR(tp);
692 }
693
694 /* parse arguments */
695 ret = 0;
696 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
697 /* Parse argument name */
698 arg = strchr(argv[i], '=');
699 if (arg)
700 *arg++ = '\0';
701 else
702 arg = argv[i];
703
704 if (conflict_field_name(argv[i], tp->args, i)) {
705 pr_info("Argument%d name '%s' conflicts with "
706 "another field.\n", i, argv[i]);
707 ret = -EINVAL;
708 goto error;
709 }
710
711 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
712 if (!tp->args[i].name) {
713 pr_info("Failed to allocate argument%d name '%s'.\n",
714 i, argv[i]);
715 ret = -ENOMEM;
716 goto error;
717 }
718
719 /* Parse fetch argument */
720 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
721 if (ret) {
722 pr_info("Parse error at argument%d. (%d)\n", i, ret);
723 kfree(tp->args[i].name);
724 goto error;
725 }
726
727 tp->nr_args++;
728 }
729
730 ret = register_trace_probe(tp);
731 if (ret)
732 goto error;
733 return 0;
734
735error:
736 free_trace_probe(tp);
737 return ret;
738}
739
740static void cleanup_all_probes(void)
741{
742 struct trace_probe *tp;
743
744 mutex_lock(&probe_lock);
745 /* TODO: Use batch unregistration */
746 while (!list_empty(&probe_list)) {
747 tp = list_entry(probe_list.next, struct trace_probe, list);
748 unregister_trace_probe(tp);
749 free_trace_probe(tp);
750 }
751 mutex_unlock(&probe_lock);
752}
753
754
755/* Probes listing interfaces */
756static void *probes_seq_start(struct seq_file *m, loff_t *pos)
757{
758 mutex_lock(&probe_lock);
759 return seq_list_start(&probe_list, *pos);
760}
761
762static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
763{
764 return seq_list_next(v, &probe_list, pos);
765}
766
767static void probes_seq_stop(struct seq_file *m, void *v)
768{
769 mutex_unlock(&probe_lock);
770}
771
772static int probes_seq_show(struct seq_file *m, void *v)
773{
774 struct trace_probe *tp = v;
775 int i, ret;
776 char buf[MAX_ARGSTR_LEN + 1];
777
778 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
779 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
780
781 if (!tp->symbol)
782 seq_printf(m, " 0x%p", tp->rp.kp.addr);
783 else if (tp->rp.kp.offset)
784 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
785 else
786 seq_printf(m, " %s", probe_symbol(tp));
787
788 for (i = 0; i < tp->nr_args; i++) {
789 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
790 if (ret < 0) {
791 pr_warning("Argument%d decoding error(%d).\n", i, ret);
792 return ret;
793 }
794 seq_printf(m, " %s=%s", tp->args[i].name, buf);
795 }
796 seq_printf(m, "\n");
797 return 0;
798}
799
800static const struct seq_operations probes_seq_op = {
801 .start = probes_seq_start,
802 .next = probes_seq_next,
803 .stop = probes_seq_stop,
804 .show = probes_seq_show
805};
806
807static int probes_open(struct inode *inode, struct file *file)
808{
809 if ((file->f_mode & FMODE_WRITE) &&
810 (file->f_flags & O_TRUNC))
811 cleanup_all_probes();
812
813 return seq_open(file, &probes_seq_op);
814}
815
816static int command_trace_probe(const char *buf)
817{
818 char **argv;
819 int argc = 0, ret = 0;
820
821 argv = argv_split(GFP_KERNEL, buf, &argc);
822 if (!argv)
823 return -ENOMEM;
824
825 if (argc)
826 ret = create_trace_probe(argc, argv);
827
828 argv_free(argv);
829 return ret;
830}
831
832#define WRITE_BUFSIZE 128
833
834static ssize_t probes_write(struct file *file, const char __user *buffer,
835 size_t count, loff_t *ppos)
836{
837 char *kbuf, *tmp;
838 int ret;
839 size_t done;
840 size_t size;
841
842 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
843 if (!kbuf)
844 return -ENOMEM;
845
846 ret = done = 0;
847 while (done < count) {
848 size = count - done;
849 if (size >= WRITE_BUFSIZE)
850 size = WRITE_BUFSIZE - 1;
851 if (copy_from_user(kbuf, buffer + done, size)) {
852 ret = -EFAULT;
853 goto out;
854 }
855 kbuf[size] = '\0';
856 tmp = strchr(kbuf, '\n');
857 if (tmp) {
858 *tmp = '\0';
859 size = tmp - kbuf + 1;
860 } else if (done + size < count) {
861 pr_warning("Line length is too long: "
862 "Should be less than %d.", WRITE_BUFSIZE);
863 ret = -EINVAL;
864 goto out;
865 }
866 done += size;
867 /* Remove comments */
868 tmp = strchr(kbuf, '#');
869 if (tmp)
870 *tmp = '\0';
871
872 ret = command_trace_probe(kbuf);
873 if (ret)
874 goto out;
875 }
876 ret = done;
877out:
878 kfree(kbuf);
879 return ret;
880}
881
882static const struct file_operations kprobe_events_ops = {
883 .owner = THIS_MODULE,
884 .open = probes_open,
885 .read = seq_read,
886 .llseek = seq_lseek,
887 .release = seq_release,
888 .write = probes_write,
889};
890
891/* Probes profiling interfaces */
892static int probes_profile_seq_show(struct seq_file *m, void *v)
893{
894 struct trace_probe *tp = v;
895
896 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
897 tp->rp.kp.nmissed);
898
899 return 0;
900}
901
902static const struct seq_operations profile_seq_op = {
903 .start = probes_seq_start,
904 .next = probes_seq_next,
905 .stop = probes_seq_stop,
906 .show = probes_profile_seq_show
907};
908
909static int profile_open(struct inode *inode, struct file *file)
910{
911 return seq_open(file, &profile_seq_op);
912}
913
914static const struct file_operations kprobe_profile_ops = {
915 .owner = THIS_MODULE,
916 .open = profile_open,
917 .read = seq_read,
918 .llseek = seq_lseek,
919 .release = seq_release,
920};
921
922/* Kprobe handler */
923static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
924{
925 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
926 struct kprobe_trace_entry *entry;
927 struct ring_buffer_event *event;
928 struct ring_buffer *buffer;
929 int size, i, pc;
930 unsigned long irq_flags;
931 struct ftrace_event_call *call = &tp->call;
932
933 tp->nhit++;
934
935 local_save_flags(irq_flags);
936 pc = preempt_count();
937
938 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
939
940 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
941 irq_flags, pc);
942 if (!event)
943 return 0;
944
945 entry = ring_buffer_event_data(event);
946 entry->nargs = tp->nr_args;
947 entry->ip = (unsigned long)kp->addr;
948 for (i = 0; i < tp->nr_args; i++)
949 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
950
951 if (!filter_current_check_discard(buffer, call, entry, event))
952 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
953 return 0;
954}
955
956/* Kretprobe handler */
957static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
958 struct pt_regs *regs)
959{
960 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
961 struct kretprobe_trace_entry *entry;
962 struct ring_buffer_event *event;
963 struct ring_buffer *buffer;
964 int size, i, pc;
965 unsigned long irq_flags;
966 struct ftrace_event_call *call = &tp->call;
967
968 local_save_flags(irq_flags);
969 pc = preempt_count();
970
971 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
972
973 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
974 irq_flags, pc);
975 if (!event)
976 return 0;
977
978 entry = ring_buffer_event_data(event);
979 entry->nargs = tp->nr_args;
980 entry->func = (unsigned long)tp->rp.kp.addr;
981 entry->ret_ip = (unsigned long)ri->ret_addr;
982 for (i = 0; i < tp->nr_args; i++)
983 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
984
985 if (!filter_current_check_discard(buffer, call, entry, event))
986 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
987
988 return 0;
989}
990
991/* Event entry printers */
992enum print_line_t
993print_kprobe_event(struct trace_iterator *iter, int flags)
994{
995 struct kprobe_trace_entry *field;
996 struct trace_seq *s = &iter->seq;
997 struct trace_event *event;
998 struct trace_probe *tp;
999 int i;
1000
1001 field = (struct kprobe_trace_entry *)iter->ent;
1002 event = ftrace_find_event(field->ent.type);
1003 tp = container_of(event, struct trace_probe, event);
1004
1005 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1006 goto partial;
1007
1008 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1009 goto partial;
1010
1011 if (!trace_seq_puts(s, ")"))
1012 goto partial;
1013
1014 for (i = 0; i < field->nargs; i++)
1015 if (!trace_seq_printf(s, " %s=%lx",
1016 tp->args[i].name, field->args[i]))
1017 goto partial;
1018
1019 if (!trace_seq_puts(s, "\n"))
1020 goto partial;
1021
1022 return TRACE_TYPE_HANDLED;
1023partial:
1024 return TRACE_TYPE_PARTIAL_LINE;
1025}
1026
1027enum print_line_t
1028print_kretprobe_event(struct trace_iterator *iter, int flags)
1029{
1030 struct kretprobe_trace_entry *field;
1031 struct trace_seq *s = &iter->seq;
1032 struct trace_event *event;
1033 struct trace_probe *tp;
1034 int i;
1035
1036 field = (struct kretprobe_trace_entry *)iter->ent;
1037 event = ftrace_find_event(field->ent.type);
1038 tp = container_of(event, struct trace_probe, event);
1039
1040 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1041 goto partial;
1042
1043 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1044 goto partial;
1045
1046 if (!trace_seq_puts(s, " <- "))
1047 goto partial;
1048
1049 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1050 goto partial;
1051
1052 if (!trace_seq_puts(s, ")"))
1053 goto partial;
1054
1055 for (i = 0; i < field->nargs; i++)
1056 if (!trace_seq_printf(s, " %s=%lx",
1057 tp->args[i].name, field->args[i]))
1058 goto partial;
1059
1060 if (!trace_seq_puts(s, "\n"))
1061 goto partial;
1062
1063 return TRACE_TYPE_HANDLED;
1064partial:
1065 return TRACE_TYPE_PARTIAL_LINE;
1066}
1067
1068static int probe_event_enable(struct ftrace_event_call *call)
1069{
1070 struct trace_probe *tp = (struct trace_probe *)call->data;
1071
1072 tp->flags |= TP_FLAG_TRACE;
1073 if (probe_is_return(tp))
1074 return enable_kretprobe(&tp->rp);
1075 else
1076 return enable_kprobe(&tp->rp.kp);
1077}
1078
1079static void probe_event_disable(struct ftrace_event_call *call)
1080{
1081 struct trace_probe *tp = (struct trace_probe *)call->data;
1082
1083 tp->flags &= ~TP_FLAG_TRACE;
1084 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1085 if (probe_is_return(tp))
1086 disable_kretprobe(&tp->rp);
1087 else
1088 disable_kprobe(&tp->rp.kp);
1089 }
1090}
1091
1092static int probe_event_raw_init(struct ftrace_event_call *event_call)
1093{
1094 INIT_LIST_HEAD(&event_call->fields);
1095
1096 return 0;
1097}
1098
1099#undef DEFINE_FIELD
1100#define DEFINE_FIELD(type, item, name, is_signed) \
1101 do { \
1102 ret = trace_define_field(event_call, #type, name, \
1103 offsetof(typeof(field), item), \
1104 sizeof(field.item), is_signed, \
1105 FILTER_OTHER); \
1106 if (ret) \
1107 return ret; \
1108 } while (0)
1109
1110static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1111{
1112 int ret, i;
1113 struct kprobe_trace_entry field;
1114 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1115
1116 ret = trace_define_common_fields(event_call);
1117 if (!ret)
1118 return ret;
1119
1120 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1121 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1122 /* Set argument names as fields */
1123 for (i = 0; i < tp->nr_args; i++)
1124 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1125 return 0;
1126}
1127
1128static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1129{
1130 int ret, i;
1131 struct kretprobe_trace_entry field;
1132 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1133
1134 ret = trace_define_common_fields(event_call);
1135 if (!ret)
1136 return ret;
1137
1138 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1139 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1141 /* Set argument names as fields */
1142 for (i = 0; i < tp->nr_args; i++)
1143 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1144 return 0;
1145}
1146
1147static int __probe_event_show_format(struct trace_seq *s,
1148 struct trace_probe *tp, const char *fmt,
1149 const char *arg)
1150{
1151 int i;
1152
1153 /* Show format */
1154 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1155 return 0;
1156
1157 for (i = 0; i < tp->nr_args; i++)
1158 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1159 return 0;
1160
1161 if (!trace_seq_printf(s, "\", %s", arg))
1162 return 0;
1163
1164 for (i = 0; i < tp->nr_args; i++)
1165 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1166 return 0;
1167
1168 return trace_seq_puts(s, "\n");
1169}
1170
1171#undef SHOW_FIELD
1172#define SHOW_FIELD(type, item, name) \
1173 do { \
1174 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1175 "offset:%u;\tsize:%u;\n", name, \
1176 (unsigned int)offsetof(typeof(field), item),\
1177 (unsigned int)sizeof(type)); \
1178 if (!ret) \
1179 return 0; \
1180 } while (0)
1181
1182static int kprobe_event_show_format(struct ftrace_event_call *call,
1183 struct trace_seq *s)
1184{
1185 struct kprobe_trace_entry field __attribute__((unused));
1186 int ret, i;
1187 struct trace_probe *tp = (struct trace_probe *)call->data;
1188
1189 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1190 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1191
1192 /* Show fields */
1193 for (i = 0; i < tp->nr_args; i++)
1194 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1195 trace_seq_puts(s, "\n");
1196
1197 return __probe_event_show_format(s, tp, "(%lx)",
1198 "REC->" FIELD_STRING_IP);
1199}
1200
1201static int kretprobe_event_show_format(struct ftrace_event_call *call,
1202 struct trace_seq *s)
1203{
1204 struct kretprobe_trace_entry field __attribute__((unused));
1205 int ret, i;
1206 struct trace_probe *tp = (struct trace_probe *)call->data;
1207
1208 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1209 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1210 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1211
1212 /* Show fields */
1213 for (i = 0; i < tp->nr_args; i++)
1214 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1215 trace_seq_puts(s, "\n");
1216
1217 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1218 "REC->" FIELD_STRING_FUNC
1219 ", REC->" FIELD_STRING_RETIP);
1220}
1221
1222#ifdef CONFIG_EVENT_PROFILE
1223
1224/* Kprobe profile handler */
1225static __kprobes int kprobe_profile_func(struct kprobe *kp,
1226 struct pt_regs *regs)
1227{
1228 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1229 struct ftrace_event_call *call = &tp->call;
1230 struct kprobe_trace_entry *entry;
1231 struct trace_entry *ent;
1232 int size, __size, i, pc, __cpu;
1233 unsigned long irq_flags;
1234 char *trace_buf;
1235 char *raw_data;
1236 int rctx;
1237
1238 pc = preempt_count();
1239 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1240 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1241 size -= sizeof(u32);
1242 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1243 "profile buffer not large enough"))
1244 return 0;
1245
1246 /*
1247 * Protect the non nmi buffer
1248 * This also protects the rcu read side
1249 */
1250 local_irq_save(irq_flags);
1251
1252 rctx = perf_swevent_get_recursion_context();
1253 if (rctx < 0)
1254 goto end_recursion;
1255
1256 __cpu = smp_processor_id();
1257
1258 if (in_nmi())
1259 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1260 else
1261 trace_buf = rcu_dereference(perf_trace_buf);
1262
1263 if (!trace_buf)
1264 goto end;
1265
1266 raw_data = per_cpu_ptr(trace_buf, __cpu);
1267
1268 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1269 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1270 entry = (struct kprobe_trace_entry *)raw_data;
1271 ent = &entry->ent;
1272
1273 tracing_generic_entry_update(ent, irq_flags, pc);
1274 ent->type = call->id;
1275 entry->nargs = tp->nr_args;
1276 entry->ip = (unsigned long)kp->addr;
1277 for (i = 0; i < tp->nr_args; i++)
1278 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1279 perf_tp_event(call->id, entry->ip, 1, entry, size);
1280
1281end:
1282 perf_swevent_put_recursion_context(rctx);
1283end_recursion:
1284 local_irq_restore(irq_flags);
1285
1286 return 0;
1287}
1288
1289/* Kretprobe profile handler */
1290static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1291 struct pt_regs *regs)
1292{
1293 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1294 struct ftrace_event_call *call = &tp->call;
1295 struct kretprobe_trace_entry *entry;
1296 struct trace_entry *ent;
1297 int size, __size, i, pc, __cpu;
1298 unsigned long irq_flags;
1299 char *trace_buf;
1300 char *raw_data;
1301 int rctx;
1302
1303 pc = preempt_count();
1304 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1305 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1306 size -= sizeof(u32);
1307 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1308 "profile buffer not large enough"))
1309 return 0;
1310
1311 /*
1312 * Protect the non nmi buffer
1313 * This also protects the rcu read side
1314 */
1315 local_irq_save(irq_flags);
1316
1317 rctx = perf_swevent_get_recursion_context();
1318 if (rctx < 0)
1319 goto end_recursion;
1320
1321 __cpu = smp_processor_id();
1322
1323 if (in_nmi())
1324 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1325 else
1326 trace_buf = rcu_dereference(perf_trace_buf);
1327
1328 if (!trace_buf)
1329 goto end;
1330
1331 raw_data = per_cpu_ptr(trace_buf, __cpu);
1332
1333 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1334 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1335 entry = (struct kretprobe_trace_entry *)raw_data;
1336 ent = &entry->ent;
1337
1338 tracing_generic_entry_update(ent, irq_flags, pc);
1339 ent->type = call->id;
1340 entry->nargs = tp->nr_args;
1341 entry->func = (unsigned long)tp->rp.kp.addr;
1342 entry->ret_ip = (unsigned long)ri->ret_addr;
1343 for (i = 0; i < tp->nr_args; i++)
1344 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1345 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1346
1347end:
1348 perf_swevent_put_recursion_context(rctx);
1349end_recursion:
1350 local_irq_restore(irq_flags);
1351
1352 return 0;
1353}
1354
1355static int probe_profile_enable(struct ftrace_event_call *call)
1356{
1357 struct trace_probe *tp = (struct trace_probe *)call->data;
1358
1359 tp->flags |= TP_FLAG_PROFILE;
1360
1361 if (probe_is_return(tp))
1362 return enable_kretprobe(&tp->rp);
1363 else
1364 return enable_kprobe(&tp->rp.kp);
1365}
1366
1367static void probe_profile_disable(struct ftrace_event_call *call)
1368{
1369 struct trace_probe *tp = (struct trace_probe *)call->data;
1370
1371 tp->flags &= ~TP_FLAG_PROFILE;
1372
1373 if (!(tp->flags & TP_FLAG_TRACE)) {
1374 if (probe_is_return(tp))
1375 disable_kretprobe(&tp->rp);
1376 else
1377 disable_kprobe(&tp->rp.kp);
1378 }
1379}
1380#endif /* CONFIG_EVENT_PROFILE */
1381
1382
1383static __kprobes
1384int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1385{
1386 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1387
1388 if (tp->flags & TP_FLAG_TRACE)
1389 kprobe_trace_func(kp, regs);
1390#ifdef CONFIG_EVENT_PROFILE
1391 if (tp->flags & TP_FLAG_PROFILE)
1392 kprobe_profile_func(kp, regs);
1393#endif /* CONFIG_EVENT_PROFILE */
1394 return 0; /* We don't tweek kernel, so just return 0 */
1395}
1396
1397static __kprobes
1398int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1399{
1400 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1401
1402 if (tp->flags & TP_FLAG_TRACE)
1403 kretprobe_trace_func(ri, regs);
1404#ifdef CONFIG_EVENT_PROFILE
1405 if (tp->flags & TP_FLAG_PROFILE)
1406 kretprobe_profile_func(ri, regs);
1407#endif /* CONFIG_EVENT_PROFILE */
1408 return 0; /* We don't tweek kernel, so just return 0 */
1409}
1410
1411static int register_probe_event(struct trace_probe *tp)
1412{
1413 struct ftrace_event_call *call = &tp->call;
1414 int ret;
1415
1416 /* Initialize ftrace_event_call */
1417 if (probe_is_return(tp)) {
1418 tp->event.trace = print_kretprobe_event;
1419 call->raw_init = probe_event_raw_init;
1420 call->show_format = kretprobe_event_show_format;
1421 call->define_fields = kretprobe_event_define_fields;
1422 } else {
1423 tp->event.trace = print_kprobe_event;
1424 call->raw_init = probe_event_raw_init;
1425 call->show_format = kprobe_event_show_format;
1426 call->define_fields = kprobe_event_define_fields;
1427 }
1428 call->event = &tp->event;
1429 call->id = register_ftrace_event(&tp->event);
1430 if (!call->id)
1431 return -ENODEV;
1432 call->enabled = 0;
1433 call->regfunc = probe_event_enable;
1434 call->unregfunc = probe_event_disable;
1435
1436#ifdef CONFIG_EVENT_PROFILE
1437 atomic_set(&call->profile_count, -1);
1438 call->profile_enable = probe_profile_enable;
1439 call->profile_disable = probe_profile_disable;
1440#endif
1441 call->data = tp;
1442 ret = trace_add_event_call(call);
1443 if (ret) {
1444 pr_info("Failed to register kprobe event: %s\n", call->name);
1445 unregister_ftrace_event(&tp->event);
1446 }
1447 return ret;
1448}
1449
1450static void unregister_probe_event(struct trace_probe *tp)
1451{
1452 /* tp->event is unregistered in trace_remove_event_call() */
1453 trace_remove_event_call(&tp->call);
1454}
1455
1456/* Make a debugfs interface for controling probe points */
1457static __init int init_kprobe_trace(void)
1458{
1459 struct dentry *d_tracer;
1460 struct dentry *entry;
1461
1462 d_tracer = tracing_init_dentry();
1463 if (!d_tracer)
1464 return 0;
1465
1466 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1467 NULL, &kprobe_events_ops);
1468
1469 /* Event list interface */
1470 if (!entry)
1471 pr_warning("Could not create debugfs "
1472 "'kprobe_events' entry\n");
1473
1474 /* Profile interface */
1475 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1476 NULL, &kprobe_profile_ops);
1477
1478 if (!entry)
1479 pr_warning("Could not create debugfs "
1480 "'kprobe_profile' entry\n");
1481 return 0;
1482}
1483fs_initcall(init_kprobe_trace);
1484
1485
1486#ifdef CONFIG_FTRACE_STARTUP_TEST
1487
1488static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1489 int a4, int a5, int a6)
1490{
1491 return a1 + a2 + a3 + a4 + a5 + a6;
1492}
1493
1494static __init int kprobe_trace_self_tests_init(void)
1495{
1496 int ret;
1497 int (*target)(int, int, int, int, int, int);
1498
1499 target = kprobe_trace_selftest_target;
1500
1501 pr_info("Testing kprobe tracing: ");
1502
1503 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1504 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1505 if (WARN_ON_ONCE(ret))
1506 pr_warning("error enabling function entry\n");
1507
1508 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1509 "$retval");
1510 if (WARN_ON_ONCE(ret))
1511 pr_warning("error enabling function return\n");
1512
1513 ret = target(1, 2, 3, 4, 5, 6);
1514
1515 cleanup_all_probes();
1516
1517 pr_cont("OK\n");
1518 return 0;
1519}
1520
1521late_initcall(kprobe_trace_self_tests_init);
1522
1523#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..ddfa0fd43bc0
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,550 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter;
48#endif
49 struct hlist_node ksym_hlist;
50};
51
52static struct trace_array *ksym_trace_array;
53
54static unsigned int ksym_filter_entry_count;
55static unsigned int ksym_tracing_enabled;
56
57static HLIST_HEAD(ksym_filter_head);
58
59static DEFINE_MUTEX(ksym_tracer_mutex);
60
61#ifdef CONFIG_PROFILE_KSYM_TRACER
62
63#define MAX_UL_INT 0xffffffff
64
65void ksym_collect_stats(unsigned long hbp_hit_addr)
66{
67 struct hlist_node *node;
68 struct trace_ksym *entry;
69
70 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) &&
73 (entry->counter <= MAX_UL_INT)) {
74 entry->counter++;
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, void *data)
83{
84 struct ring_buffer_event *event;
85 struct ksym_trace_entry *entry;
86 struct pt_regs *regs = data;
87 struct ring_buffer *buffer;
88 int pc;
89
90 if (!ksym_tracing_enabled)
91 return;
92
93 buffer = ksym_trace_array->buffer;
94
95 pc = preempt_count();
96
97 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
98 sizeof(*entry), 0, pc);
99 if (!event)
100 return;
101
102 entry = ring_buffer_event_data(event);
103 entry->ip = instruction_pointer(regs);
104 entry->type = hw_breakpoint_type(hbp);
105 entry->addr = hw_breakpoint_addr(hbp);
106 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
107
108#ifdef CONFIG_PROFILE_KSYM_TRACER
109 ksym_collect_stats(hw_breakpoint_addr(hbp));
110#endif /* CONFIG_PROFILE_KSYM_TRACER */
111
112 trace_buffer_unlock_commit(buffer, event, 0, pc);
113}
114
115/* Valid access types are represented as
116 *
117 * rw- : Set Read/Write Access Breakpoint
118 * -w- : Set Write Access Breakpoint
119 * --- : Clear Breakpoints
120 * --x : Set Execution Break points (Not available yet)
121 *
122 */
123static int ksym_trace_get_access_type(char *str)
124{
125 int access = 0;
126
127 if (str[0] == 'r')
128 access |= HW_BREAKPOINT_R;
129
130 if (str[1] == 'w')
131 access |= HW_BREAKPOINT_W;
132
133 if (str[2] == 'x')
134 access |= HW_BREAKPOINT_X;
135
136 switch (access) {
137 case HW_BREAKPOINT_R:
138 case HW_BREAKPOINT_W:
139 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
140 return access;
141 default:
142 return -EINVAL;
143 }
144}
145
146/*
147 * There can be several possible malformed requests and we attempt to capture
148 * all of them. We enumerate some of the rules
149 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
150 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
151 * <module>:<ksym_name>:<op>.
152 * 2. No delimiter symbol ':' in the input string
153 * 3. Spurious operator symbols or symbols not in their respective positions
154 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
155 * 5. Kernel symbol not a part of /proc/kallsyms
156 * 6. Duplicate requests
157 */
158static int parse_ksym_trace_str(char *input_string, char **ksymname,
159 unsigned long *addr)
160{
161 int ret;
162
163 *ksymname = strsep(&input_string, ":");
164 *addr = kallsyms_lookup_name(*ksymname);
165
166 /* Check for malformed request: (2), (1) and (5) */
167 if ((!input_string) ||
168 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
169 (*addr == 0))
170 return -EINVAL;;
171
172 ret = ksym_trace_get_access_type(input_string);
173
174 return ret;
175}
176
177int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
178{
179 struct trace_ksym *entry;
180 int ret = -ENOMEM;
181
182 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
183 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
184 " new requests for tracing can be accepted now.\n",
185 KSYM_TRACER_MAX);
186 return -ENOSPC;
187 }
188
189 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
190 if (!entry)
191 return -ENOMEM;
192
193 hw_breakpoint_init(&entry->attr);
194
195 entry->attr.bp_type = op;
196 entry->attr.bp_addr = addr;
197 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
198
199 ret = -EAGAIN;
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler);
202
203 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again"
206 " later!!\n");
207 goto err;
208 }
209
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212
213 return 0;
214
215err:
216 kfree(entry);
217
218 return ret;
219}
220
221static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
222 size_t count, loff_t *ppos)
223{
224 struct trace_ksym *entry;
225 struct hlist_node *node;
226 struct trace_seq *s;
227 ssize_t cnt = 0;
228 int ret;
229
230 s = kmalloc(sizeof(*s), GFP_KERNEL);
231 if (!s)
232 return -ENOMEM;
233 trace_seq_init(s);
234
235 mutex_lock(&ksym_tracer_mutex);
236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
239 if (entry->attr.bp_type == HW_BREAKPOINT_R)
240 ret = trace_seq_puts(s, "r--\n");
241 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
242 ret = trace_seq_puts(s, "-w-\n");
243 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
244 ret = trace_seq_puts(s, "rw-\n");
245 WARN_ON_ONCE(!ret);
246 }
247
248 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
249
250 mutex_unlock(&ksym_tracer_mutex);
251
252 kfree(s);
253
254 return cnt;
255}
256
257static void __ksym_trace_reset(void)
258{
259 struct trace_ksym *entry;
260 struct hlist_node *node, *node1;
261
262 mutex_lock(&ksym_tracer_mutex);
263 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
264 ksym_hlist) {
265 unregister_wide_hw_breakpoint(entry->ksym_hbp);
266 ksym_filter_entry_count--;
267 hlist_del_rcu(&(entry->ksym_hlist));
268 synchronize_rcu();
269 kfree(entry);
270 }
271 mutex_unlock(&ksym_tracer_mutex);
272}
273
274static ssize_t ksym_trace_filter_write(struct file *file,
275 const char __user *buffer,
276 size_t count, loff_t *ppos)
277{
278 struct trace_ksym *entry;
279 struct hlist_node *node;
280 char *input_string, *ksymname = NULL;
281 unsigned long ksym_addr = 0;
282 int ret, op, changed = 0;
283
284 input_string = kzalloc(count + 1, GFP_KERNEL);
285 if (!input_string)
286 return -ENOMEM;
287
288 if (copy_from_user(input_string, buffer, count)) {
289 kfree(input_string);
290 return -EFAULT;
291 }
292 input_string[count] = '\0';
293
294 strstrip(input_string);
295
296 /*
297 * Clear all breakpoints if:
298 * 1: echo > ksym_trace_filter
299 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter
301 */
302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset();
305 kfree(input_string);
306 return count;
307 }
308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0) {
311 kfree(input_string);
312 return ret;
313 }
314
315 mutex_lock(&ksym_tracer_mutex);
316
317 ret = -EINVAL;
318 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
319 if (entry->attr.bp_addr == ksym_addr) {
320 /* Check for malformed request: (6) */
321 if (entry->attr.bp_type != op)
322 changed = 1;
323 else
324 goto out;
325 break;
326 }
327 }
328 if (changed) {
329 unregister_wide_hw_breakpoint(entry->ksym_hbp);
330 entry->attr.bp_type = op;
331 ret = 0;
332 if (op > 0) {
333 entry->ksym_hbp =
334 register_wide_hw_breakpoint(&entry->attr,
335 ksym_hbp_handler);
336 if (IS_ERR(entry->ksym_hbp))
337 ret = PTR_ERR(entry->ksym_hbp);
338 else
339 goto out;
340 }
341 /* Error or "symbol:---" case: drop it */
342 ksym_filter_entry_count--;
343 hlist_del_rcu(&(entry->ksym_hlist));
344 synchronize_rcu();
345 kfree(entry);
346 goto out;
347 } else {
348 /* Check for malformed request: (4) */
349 if (op == 0)
350 goto out;
351 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
352 }
353out:
354 mutex_unlock(&ksym_tracer_mutex);
355
356 kfree(input_string);
357
358 if (!ret)
359 ret = count;
360 return ret;
361}
362
363static const struct file_operations ksym_tracing_fops = {
364 .open = tracing_open_generic,
365 .read = ksym_trace_filter_read,
366 .write = ksym_trace_filter_write,
367};
368
369static void ksym_trace_reset(struct trace_array *tr)
370{
371 ksym_tracing_enabled = 0;
372 __ksym_trace_reset();
373}
374
375static int ksym_trace_init(struct trace_array *tr)
376{
377 int cpu, ret = 0;
378
379 for_each_online_cpu(cpu)
380 tracing_reset(tr, cpu);
381 ksym_tracing_enabled = 1;
382 ksym_trace_array = tr;
383
384 return ret;
385}
386
387static void ksym_trace_print_header(struct seq_file *m)
388{
389 seq_puts(m,
390 "# TASK-PID CPU# Symbol "
391 "Type Function\n");
392 seq_puts(m,
393 "# | | | "
394 " | |\n");
395}
396
397static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
398{
399 struct trace_entry *entry = iter->ent;
400 struct trace_seq *s = &iter->seq;
401 struct ksym_trace_entry *field;
402 char str[KSYM_SYMBOL_LEN];
403 int ret;
404
405 if (entry->type != TRACE_KSYM)
406 return TRACE_TYPE_UNHANDLED;
407
408 trace_assign_type(field, entry);
409
410 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
411 entry->pid, iter->cpu, (char *)field->addr);
412 if (!ret)
413 return TRACE_TYPE_PARTIAL_LINE;
414
415 switch (field->type) {
416 case HW_BREAKPOINT_R:
417 ret = trace_seq_printf(s, " R ");
418 break;
419 case HW_BREAKPOINT_W:
420 ret = trace_seq_printf(s, " W ");
421 break;
422 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
423 ret = trace_seq_printf(s, " RW ");
424 break;
425 default:
426 return TRACE_TYPE_PARTIAL_LINE;
427 }
428
429 if (!ret)
430 return TRACE_TYPE_PARTIAL_LINE;
431
432 sprint_symbol(str, field->ip);
433 ret = trace_seq_printf(s, "%s\n", str);
434 if (!ret)
435 return TRACE_TYPE_PARTIAL_LINE;
436
437 return TRACE_TYPE_HANDLED;
438}
439
440struct tracer ksym_tracer __read_mostly =
441{
442 .name = "ksym_tracer",
443 .init = ksym_trace_init,
444 .reset = ksym_trace_reset,
445#ifdef CONFIG_FTRACE_SELFTEST
446 .selftest = trace_selftest_startup_ksym,
447#endif
448 .print_header = ksym_trace_print_header,
449 .print_line = ksym_trace_output
450};
451
452__init static int init_ksym_trace(void)
453{
454 struct dentry *d_tracer;
455 struct dentry *entry;
456
457 d_tracer = tracing_init_dentry();
458 ksym_filter_entry_count = 0;
459
460 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
461 NULL, &ksym_tracing_fops);
462 if (!entry)
463 pr_warning("Could not create debugfs "
464 "'ksym_trace_filter' file\n");
465
466 return register_tracer(&ksym_tracer);
467}
468device_initcall(init_ksym_trace);
469
470
471#ifdef CONFIG_PROFILE_KSYM_TRACER
472static int ksym_tracer_stat_headers(struct seq_file *m)
473{
474 seq_puts(m, " Access Type ");
475 seq_puts(m, " Symbol Counter\n");
476 seq_puts(m, " ----------- ");
477 seq_puts(m, " ------ -------\n");
478 return 0;
479}
480
481static int ksym_tracer_stat_show(struct seq_file *m, void *v)
482{
483 struct hlist_node *stat = v;
484 struct trace_ksym *entry;
485 int access_type = 0;
486 char fn_name[KSYM_NAME_LEN];
487
488 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
489
490 access_type = entry->attr.bp_type;
491
492 switch (access_type) {
493 case HW_BREAKPOINT_R:
494 seq_puts(m, " R ");
495 break;
496 case HW_BREAKPOINT_W:
497 seq_puts(m, " W ");
498 break;
499 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
500 seq_puts(m, " RW ");
501 break;
502 default:
503 seq_puts(m, " NA ");
504 }
505
506 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
507 seq_printf(m, " %-36s", fn_name);
508 else
509 seq_printf(m, " %-36s", "<NA>");
510 seq_printf(m, " %15lu\n", entry->counter);
511
512 return 0;
513}
514
515static void *ksym_tracer_stat_start(struct tracer_stat *trace)
516{
517 return ksym_filter_head.first;
518}
519
520static void *
521ksym_tracer_stat_next(void *v, int idx)
522{
523 struct hlist_node *stat = v;
524
525 return stat->next;
526}
527
528static struct tracer_stat ksym_tracer_stats = {
529 .name = "ksym_tracer",
530 .stat_start = ksym_tracer_stat_start,
531 .stat_next = ksym_tracer_stat_next,
532 .stat_headers = ksym_tracer_stat_headers,
533 .stat_show = ksym_tracer_stat_show
534};
535
536__init static int ksym_tracer_stat_init(void)
537{
538 int ret;
539
540 ret = register_stat_tracer(&ksym_tracer_stats);
541 if (ret) {
542 printk(KERN_WARNING "Warning: could not register "
543 "ksym tracer stats\n");
544 return 1;
545 }
546
547 return 0;
548}
549fs_initcall(ksym_tracer_stat_init);
550#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ddee9c593732..57501d90096a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,32 +51,6 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
51 return syscalls_metadata[nr]; 51 return syscalls_metadata[nr];
52} 52}
53 53
54int syscall_name_to_nr(char *name)
55{
56 int i;
57
58 if (!syscalls_metadata)
59 return -1;
60
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
64 return i;
65 }
66 }
67 return -1;
68}
69
70void set_syscall_enter_id(int num, int id)
71{
72 syscalls_metadata[num]->enter_id = id;
73}
74
75void set_syscall_exit_id(int num, int id)
76{
77 syscalls_metadata[num]->exit_id = id;
78}
79
80enum print_line_t 54enum print_line_t
81print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
82{ 56{
@@ -93,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
93 if (!entry) 67 if (!entry)
94 goto end; 68 goto end;
95 69
96 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
97 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
98 goto end; 72 goto end;
99 } 73 }
@@ -148,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
148 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
149 } 123 }
150 124
151 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
152 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
153 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
154 } 128 }
@@ -166,24 +140,19 @@ extern char *__bad_type_size(void);
166#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
167 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
168 __bad_type_size() : \ 142 __bad_type_size() : \
169 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
170 145
171int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
172{ 147{
173 int i; 148 int i;
174 int nr;
175 int ret; 149 int ret;
176 struct syscall_metadata *entry; 150 struct syscall_metadata *entry = call->data;
177 struct syscall_trace_enter trace; 151 struct syscall_trace_enter trace;
178 int offset = offsetof(struct syscall_trace_enter, args); 152 int offset = offsetof(struct syscall_trace_enter, args);
179 153
180 nr = syscall_name_to_nr(call->data); 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
181 entry = syscall_nr_to_meta(nr); 155 "\tsigned:%u;\n",
182
183 if (!entry)
184 return 0;
185
186 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
187 SYSCALL_FIELD(int, nr)); 156 SYSCALL_FIELD(int, nr));
188 if (!ret) 157 if (!ret)
189 return 0; 158 return 0;
@@ -193,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
193 entry->args[i]); 162 entry->args[i]);
194 if (!ret) 163 if (!ret)
195 return 0; 164 return 0;
196 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
197 sizeof(unsigned long)); 166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
198 if (!ret) 169 if (!ret)
199 return 0; 170 return 0;
200 offset += sizeof(unsigned long); 171 offset += sizeof(unsigned long);
@@ -226,8 +197,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
226 struct syscall_trace_exit trace; 197 struct syscall_trace_exit trace;
227 198
228 ret = trace_seq_printf(s, 199 ret = trace_seq_printf(s,
229 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
230 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
231 SYSCALL_FIELD(int, nr), 204 SYSCALL_FIELD(int, nr),
232 SYSCALL_FIELD(long, ret)); 205 SYSCALL_FIELD(long, ret));
233 if (!ret) 206 if (!ret)
@@ -239,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
239int syscall_enter_define_fields(struct ftrace_event_call *call) 212int syscall_enter_define_fields(struct ftrace_event_call *call)
240{ 213{
241 struct syscall_trace_enter trace; 214 struct syscall_trace_enter trace;
242 struct syscall_metadata *meta; 215 struct syscall_metadata *meta = call->data;
243 int ret; 216 int ret;
244 int nr;
245 int i; 217 int i;
246 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
247 219
248 nr = syscall_name_to_nr(call->data);
249 meta = syscall_nr_to_meta(nr);
250
251 if (!meta)
252 return 0;
253
254 ret = trace_define_common_fields(call); 220 ret = trace_define_common_fields(call);
255 if (ret) 221 if (ret)
256 return ret; 222 return ret;
257 223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret)
226 return ret;
227
258 for (i = 0; i < meta->nb_args; i++) { 228 for (i = 0; i < meta->nb_args; i++) {
259 ret = trace_define_field(call, meta->types[i], 229 ret = trace_define_field(call, meta->types[i],
260 meta->args[i], offset, 230 meta->args[i], offset,
@@ -275,7 +245,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
275 if (ret) 245 if (ret)
276 return ret; 246 return ret;
277 247
278 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, 248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret)
250 return ret;
251
252 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
279 FILTER_OTHER); 253 FILTER_OTHER);
280 254
281 return ret; 255 return ret;
@@ -302,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
302 276
303 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 277 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
304 278
305 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 279 event = trace_current_buffer_lock_reserve(&buffer,
306 size, 0, 0); 280 sys_data->enter_event->id, size, 0, 0);
307 if (!event) 281 if (!event)
308 return; 282 return;
309 283
@@ -334,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
334 if (!sys_data) 308 if (!sys_data)
335 return; 309 return;
336 310
337 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 311 event = trace_current_buffer_lock_reserve(&buffer,
338 sizeof(*entry), 0, 0); 312 sys_data->exit_event->id, sizeof(*entry), 0, 0);
339 if (!event) 313 if (!event)
340 return; 314 return;
341 315
@@ -348,14 +322,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
348 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 322 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
349} 323}
350 324
351int reg_event_syscall_enter(void *ptr) 325int reg_event_syscall_enter(struct ftrace_event_call *call)
352{ 326{
353 int ret = 0; 327 int ret = 0;
354 int num; 328 int num;
355 char *name;
356 329
357 name = (char *)ptr; 330 num = ((struct syscall_metadata *)call->data)->syscall_nr;
358 num = syscall_name_to_nr(name);
359 if (num < 0 || num >= NR_syscalls) 331 if (num < 0 || num >= NR_syscalls)
360 return -ENOSYS; 332 return -ENOSYS;
361 mutex_lock(&syscall_trace_lock); 333 mutex_lock(&syscall_trace_lock);
@@ -372,13 +344,11 @@ int reg_event_syscall_enter(void *ptr)
372 return ret; 344 return ret;
373} 345}
374 346
375void unreg_event_syscall_enter(void *ptr) 347void unreg_event_syscall_enter(struct ftrace_event_call *call)
376{ 348{
377 int num; 349 int num;
378 char *name;
379 350
380 name = (char *)ptr; 351 num = ((struct syscall_metadata *)call->data)->syscall_nr;
381 num = syscall_name_to_nr(name);
382 if (num < 0 || num >= NR_syscalls) 352 if (num < 0 || num >= NR_syscalls)
383 return; 353 return;
384 mutex_lock(&syscall_trace_lock); 354 mutex_lock(&syscall_trace_lock);
@@ -389,14 +359,12 @@ void unreg_event_syscall_enter(void *ptr)
389 mutex_unlock(&syscall_trace_lock); 359 mutex_unlock(&syscall_trace_lock);
390} 360}
391 361
392int reg_event_syscall_exit(void *ptr) 362int reg_event_syscall_exit(struct ftrace_event_call *call)
393{ 363{
394 int ret = 0; 364 int ret = 0;
395 int num; 365 int num;
396 char *name;
397 366
398 name = (char *)ptr; 367 num = ((struct syscall_metadata *)call->data)->syscall_nr;
399 num = syscall_name_to_nr(name);
400 if (num < 0 || num >= NR_syscalls) 368 if (num < 0 || num >= NR_syscalls)
401 return -ENOSYS; 369 return -ENOSYS;
402 mutex_lock(&syscall_trace_lock); 370 mutex_lock(&syscall_trace_lock);
@@ -413,13 +381,11 @@ int reg_event_syscall_exit(void *ptr)
413 return ret; 381 return ret;
414} 382}
415 383
416void unreg_event_syscall_exit(void *ptr) 384void unreg_event_syscall_exit(struct ftrace_event_call *call)
417{ 385{
418 int num; 386 int num;
419 char *name;
420 387
421 name = (char *)ptr; 388 num = ((struct syscall_metadata *)call->data)->syscall_nr;
422 num = syscall_name_to_nr(name);
423 if (num < 0 || num >= NR_syscalls) 389 if (num < 0 || num >= NR_syscalls)
424 return; 390 return;
425 mutex_lock(&syscall_trace_lock); 391 mutex_lock(&syscall_trace_lock);
@@ -430,13 +396,17 @@ void unreg_event_syscall_exit(void *ptr)
430 mutex_unlock(&syscall_trace_lock); 396 mutex_unlock(&syscall_trace_lock);
431} 397}
432 398
433struct trace_event event_syscall_enter = { 399int init_syscall_trace(struct ftrace_event_call *call)
434 .trace = print_syscall_enter, 400{
435}; 401 int id;
436 402
437struct trace_event event_syscall_exit = { 403 id = register_ftrace_event(call->event);
438 .trace = print_syscall_exit, 404 if (!id)
439}; 405 return -ENODEV;
406 call->id = id;
407 INIT_LIST_HEAD(&call->fields);
408 return 0;
409}
440 410
441int __init init_ftrace_syscalls(void) 411int __init init_ftrace_syscalls(void)
442{ 412{
@@ -454,6 +424,10 @@ int __init init_ftrace_syscalls(void)
454 for (i = 0; i < NR_syscalls; i++) { 424 for (i = 0; i < NR_syscalls; i++) {
455 addr = arch_syscall_addr(i); 425 addr = arch_syscall_addr(i);
456 meta = find_syscall_meta(addr); 426 meta = find_syscall_meta(addr);
427 if (!meta)
428 continue;
429
430 meta->syscall_nr = i;
457 syscalls_metadata[i] = meta; 431 syscalls_metadata[i] = meta;
458 } 432 }
459 433
@@ -473,8 +447,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
473 struct syscall_metadata *sys_data; 447 struct syscall_metadata *sys_data;
474 struct syscall_trace_enter *rec; 448 struct syscall_trace_enter *rec;
475 unsigned long flags; 449 unsigned long flags;
450 char *trace_buf;
476 char *raw_data; 451 char *raw_data;
477 int syscall_nr; 452 int syscall_nr;
453 int rctx;
478 int size; 454 int size;
479 int cpu; 455 int cpu;
480 456
@@ -498,41 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
498 /* Protect the per cpu buffer, begin the rcu read side */ 474 /* Protect the per cpu buffer, begin the rcu read side */
499 local_irq_save(flags); 475 local_irq_save(flags);
500 476
477 rctx = perf_swevent_get_recursion_context();
478 if (rctx < 0)
479 goto end_recursion;
480
501 cpu = smp_processor_id(); 481 cpu = smp_processor_id();
502 482
503 if (in_nmi()) 483 trace_buf = rcu_dereference(perf_trace_buf);
504 raw_data = rcu_dereference(trace_profile_buf_nmi);
505 else
506 raw_data = rcu_dereference(trace_profile_buf);
507 484
508 if (!raw_data) 485 if (!trace_buf)
509 goto end; 486 goto end;
510 487
511 raw_data = per_cpu_ptr(raw_data, cpu); 488 raw_data = per_cpu_ptr(trace_buf, cpu);
512 489
513 /* zero the dead bytes from align to not leak stack to user */ 490 /* zero the dead bytes from align to not leak stack to user */
514 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 491 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
515 492
516 rec = (struct syscall_trace_enter *) raw_data; 493 rec = (struct syscall_trace_enter *) raw_data;
517 tracing_generic_entry_update(&rec->ent, 0, 0); 494 tracing_generic_entry_update(&rec->ent, 0, 0);
518 rec->ent.type = sys_data->enter_id; 495 rec->ent.type = sys_data->enter_event->id;
519 rec->nr = syscall_nr; 496 rec->nr = syscall_nr;
520 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 497 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
521 (unsigned long *)&rec->args); 498 (unsigned long *)&rec->args);
522 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 499 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
523 500
524end: 501end:
502 perf_swevent_put_recursion_context(rctx);
503end_recursion:
525 local_irq_restore(flags); 504 local_irq_restore(flags);
526} 505}
527 506
528int reg_prof_syscall_enter(char *name) 507int prof_sysenter_enable(struct ftrace_event_call *call)
529{ 508{
530 int ret = 0; 509 int ret = 0;
531 int num; 510 int num;
532 511
533 num = syscall_name_to_nr(name); 512 num = ((struct syscall_metadata *)call->data)->syscall_nr;
534 if (num < 0 || num >= NR_syscalls)
535 return -ENOSYS;
536 513
537 mutex_lock(&syscall_trace_lock); 514 mutex_lock(&syscall_trace_lock);
538 if (!sys_prof_refcount_enter) 515 if (!sys_prof_refcount_enter)
@@ -548,13 +525,11 @@ int reg_prof_syscall_enter(char *name)
548 return ret; 525 return ret;
549} 526}
550 527
551void unreg_prof_syscall_enter(char *name) 528void prof_sysenter_disable(struct ftrace_event_call *call)
552{ 529{
553 int num; 530 int num;
554 531
555 num = syscall_name_to_nr(name); 532 num = ((struct syscall_metadata *)call->data)->syscall_nr;
556 if (num < 0 || num >= NR_syscalls)
557 return;
558 533
559 mutex_lock(&syscall_trace_lock); 534 mutex_lock(&syscall_trace_lock);
560 sys_prof_refcount_enter--; 535 sys_prof_refcount_enter--;
@@ -570,7 +545,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
570 struct syscall_trace_exit *rec; 545 struct syscall_trace_exit *rec;
571 unsigned long flags; 546 unsigned long flags;
572 int syscall_nr; 547 int syscall_nr;
548 char *trace_buf;
573 char *raw_data; 549 char *raw_data;
550 int rctx;
574 int size; 551 int size;
575 int cpu; 552 int cpu;
576 553
@@ -596,17 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
596 573
597 /* Protect the per cpu buffer, begin the rcu read side */ 574 /* Protect the per cpu buffer, begin the rcu read side */
598 local_irq_save(flags); 575 local_irq_save(flags);
576
577 rctx = perf_swevent_get_recursion_context();
578 if (rctx < 0)
579 goto end_recursion;
580
599 cpu = smp_processor_id(); 581 cpu = smp_processor_id();
600 582
601 if (in_nmi()) 583 trace_buf = rcu_dereference(perf_trace_buf);
602 raw_data = rcu_dereference(trace_profile_buf_nmi);
603 else
604 raw_data = rcu_dereference(trace_profile_buf);
605 584
606 if (!raw_data) 585 if (!trace_buf)
607 goto end; 586 goto end;
608 587
609 raw_data = per_cpu_ptr(raw_data, cpu); 588 raw_data = per_cpu_ptr(trace_buf, cpu);
610 589
611 /* zero the dead bytes from align to not leak stack to user */ 590 /* zero the dead bytes from align to not leak stack to user */
612 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 591 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -614,24 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
614 rec = (struct syscall_trace_exit *)raw_data; 593 rec = (struct syscall_trace_exit *)raw_data;
615 594
616 tracing_generic_entry_update(&rec->ent, 0, 0); 595 tracing_generic_entry_update(&rec->ent, 0, 0);
617 rec->ent.type = sys_data->exit_id; 596 rec->ent.type = sys_data->exit_event->id;
618 rec->nr = syscall_nr; 597 rec->nr = syscall_nr;
619 rec->ret = syscall_get_return_value(current, regs); 598 rec->ret = syscall_get_return_value(current, regs);
620 599
621 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 600 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
622 601
623end: 602end:
603 perf_swevent_put_recursion_context(rctx);
604end_recursion:
624 local_irq_restore(flags); 605 local_irq_restore(flags);
625} 606}
626 607
627int reg_prof_syscall_exit(char *name) 608int prof_sysexit_enable(struct ftrace_event_call *call)
628{ 609{
629 int ret = 0; 610 int ret = 0;
630 int num; 611 int num;
631 612
632 num = syscall_name_to_nr(name); 613 num = ((struct syscall_metadata *)call->data)->syscall_nr;
633 if (num < 0 || num >= NR_syscalls)
634 return -ENOSYS;
635 614
636 mutex_lock(&syscall_trace_lock); 615 mutex_lock(&syscall_trace_lock);
637 if (!sys_prof_refcount_exit) 616 if (!sys_prof_refcount_exit)
@@ -647,13 +626,11 @@ int reg_prof_syscall_exit(char *name)
647 return ret; 626 return ret;
648} 627}
649 628
650void unreg_prof_syscall_exit(char *name) 629void prof_sysexit_disable(struct ftrace_event_call *call)
651{ 630{
652 int num; 631 int num;
653 632
654 num = syscall_name_to_nr(name); 633 num = ((struct syscall_metadata *)call->data)->syscall_nr;
655 if (num < 0 || num >= NR_syscalls)
656 return;
657 634
658 mutex_lock(&syscall_trace_lock); 635 mutex_lock(&syscall_trace_lock);
659 sys_prof_refcount_exit--; 636 sys_prof_refcount_exit--;
diff --git a/samples/Kconfig b/samples/Kconfig
index b92bde3c6a89..e4be84ac3d38 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
40 default m 40 default m
41 depends on SAMPLE_KPROBES && KRETPROBES 41 depends on SAMPLE_KPROBES && KRETPROBES
42 42
43config SAMPLE_HW_BREAKPOINT
44 tristate "Build kernel hardware breakpoint examples -- loadable module only"
45 depends on HAVE_HW_BREAKPOINT && m
46 help
47 This builds kernel hardware breakpoint example modules.
48
43endif # SAMPLES 49endif # SAMPLES
44 50
diff --git a/samples/Makefile b/samples/Makefile
index 43343a03b1f4..0f15e6d77fd6 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ 3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
4 hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000000..0f5c31c2fc47
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000000..29525500df00
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,87 @@
1/*
2 * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * usage: insmod data_breakpoint.ko ksym=<ksym_name>
19 *
20 * This file is a kernel module that places a breakpoint over ksym_name kernel
21 * variable using Hardware Breakpoint register. The corresponding handler which
22 * prints a backtrace is invoked everytime a write operation is performed on
23 * that variable.
24 *
25 * Copyright (C) IBM Corporation, 2009
26 *
27 * Author: K.Prasad <prasad@linux.vnet.ibm.com>
28 */
29#include <linux/module.h> /* Needed by all modules */
30#include <linux/kernel.h> /* Needed for KERN_INFO */
31#include <linux/init.h> /* Needed for the macros */
32#include <linux/kallsyms.h>
33
34#include <linux/perf_event.h>
35#include <linux/hw_breakpoint.h>
36
37struct perf_event **sample_hbp;
38
39static char ksym_name[KSYM_NAME_LEN] = "pid_max";
40module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
41MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
42 " write operations on the kernel symbol");
43
44static void sample_hbp_handler(struct perf_event *temp, void *data)
45{
46 printk(KERN_INFO "%s value is changed\n", ksym_name);
47 dump_stack();
48 printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
49}
50
51static int __init hw_break_module_init(void)
52{
53 int ret;
54 DEFINE_BREAKPOINT_ATTR(attr);
55
56 attr.bp_addr = kallsyms_lookup_name(ksym_name);
57 attr.bp_len = HW_BREAKPOINT_LEN_4;
58 attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
59
60 sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
61 if (IS_ERR(sample_hbp)) {
62 ret = PTR_ERR(sample_hbp);
63 goto fail;
64 }
65
66 printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
67
68 return 0;
69
70fail:
71 printk(KERN_INFO "Breakpoint registration failed\n");
72
73 return ret;
74}
75
76static void __exit hw_break_module_exit(void)
77{
78 unregister_wide_hw_breakpoint(sample_hbp);
79 printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
80}
81
82module_init(hw_break_module_init);
83module_exit(hw_break_module_exit);
84
85MODULE_LICENSE("GPL");
86MODULE_AUTHOR("K.Prasad");
87MODULE_DESCRIPTION("ksym breakpoint");
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index ea9f8a58678f..241310e59cd6 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1852,10 +1852,17 @@ sub tracepoint_munge($) {
1852 my $tracepointname = 0; 1852 my $tracepointname = 0;
1853 my $tracepointargs = 0; 1853 my $tracepointargs = 0;
1854 1854
1855 if($prototype =~ m/TRACE_EVENT\((.*?),/) { 1855 if ($prototype =~ m/TRACE_EVENT\((.*?),/) {
1856 $tracepointname = $1; 1856 $tracepointname = $1;
1857 } 1857 }
1858 if($prototype =~ m/TP_PROTO\((.*?)\)/) { 1858 if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) {
1859 $tracepointname = $1;
1860 }
1861 if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) {
1862 $tracepointname = $2;
1863 }
1864 $tracepointname =~ s/^\s+//; #strip leading whitespace
1865 if ($prototype =~ m/TP_PROTO\((.*?)\)/) {
1859 $tracepointargs = $1; 1866 $tracepointargs = $1;
1860 } 1867 }
1861 if (($tracepointname eq 0) || ($tracepointargs eq 0)) { 1868 if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
@@ -1920,7 +1927,9 @@ sub process_state3_function($$) {
1920 if ($prototype =~ /SYSCALL_DEFINE/) { 1927 if ($prototype =~ /SYSCALL_DEFINE/) {
1921 syscall_munge(); 1928 syscall_munge();
1922 } 1929 }
1923 if ($prototype =~ /TRACE_EVENT/) { 1930 if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ ||
1931 $prototype =~ /DEFINE_SINGLE_EVENT/)
1932 {
1924 tracepoint_munge($file); 1933 tracepoint_munge($file);
1925 } 1934 }
1926 dump_function($prototype, $file); 1935 dump_function($prototype, $file);
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 0854f110bf7f..fe08660ce0bd 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -12,6 +12,7 @@ perf*.1
12perf*.xml 12perf*.xml
13perf*.html 13perf*.html
14common-cmds.h 14common-cmds.h
15perf.data
15tags 16tags
16TAGS 17TAGS
17cscope* 18cscope*
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
new file mode 100644
index 000000000000..ae525ac5a2ce
--- /dev/null
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -0,0 +1,120 @@
1perf-bench(1)
2============
3
4NAME
5----
6perf-bench - General framework for benchmark suites
7
8SYNOPSIS
9--------
10[verse]
11'perf bench' [<common options>] <subsystem> <suite> [<options>]
12
13DESCRIPTION
14-----------
15This 'perf bench' command is general framework for benchmark suites.
16
17COMMON OPTIONS
18--------------
19-f::
20--format=::
21Specify format style.
22Current available format styles are,
23
24'default'::
25Default style. This is mainly for human reading.
26---------------------
27% perf bench sched pipe # with no style specify
28(executing 1000000 pipe operations between two tasks)
29 Total time:5.855 sec
30 5.855061 usecs/op
31 170792 ops/sec
32---------------------
33
34'simple'::
35This simple style is friendly for automated
36processing by scripts.
37---------------------
38% perf bench --format=simple sched pipe # specified simple
395.988
40---------------------
41
42SUBSYSTEM
43---------
44
45'sched'::
46 Scheduler and IPC mechanisms.
47
48SUITES FOR 'sched'
49~~~~~~~~~~~~~~~~~~
50*messaging*::
51Suite for evaluating performance of scheduler and IPC mechanisms.
52Based on hackbench by Rusty Russell.
53
54Options of *pipe*
55^^^^^^^^^^^^^^^^^
56-p::
57--pipe::
58Use pipe() instead of socketpair()
59
60-t::
61--thread::
62Be multi thread instead of multi process
63
64-g::
65--group=::
66Specify number of groups
67
68-l::
69--loop=::
70Specify number of loops
71
72Example of *messaging*
73^^^^^^^^^^^^^^^^^^^^^^
74
75---------------------
76% perf bench sched messaging # run with default
77options (20 sender and receiver processes per group)
78(10 groups == 400 processes run)
79
80 Total time:0.308 sec
81
82% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups
83(20 sender and receiver threads per group)
84(20 groups == 800 threads run)
85
86 Total time:0.582 sec
87---------------------
88
89*pipe*::
90Suite for pipe() system call.
91Based on pipe-test-1m.c by Ingo Molnar.
92
93Options of *pipe*
94^^^^^^^^^^^^^^^^^
95-l::
96--loop=::
97Specify number of loops.
98
99Example of *pipe*
100^^^^^^^^^^^^^^^^^
101
102---------------------
103% perf bench sched pipe
104(executing 1000000 pipe operations between two tasks)
105
106 Total time:8.091 sec
107 8.091833 usecs/op
108 123581 ops/sec
109
110% perf bench sched pipe -l 1000 # loop 1000
111(executing 1000 pipe operations between two tasks)
112
113 Total time:0.016 sec
114 16.948000 usecs/op
115 59004 ops/sec
116---------------------
117
118SEE ALSO
119--------
120linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
new file mode 100644
index 000000000000..01b642c0bf8f
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -0,0 +1,34 @@
1perf-buildid-list(1)
2====================
3
4NAME
5----
6perf-buildid-list - List the buildids in a perf.data file
7
8SYNOPSIS
9--------
10[verse]
11'perf buildid-list <options>'
12
13DESCRIPTION
14-----------
15This command displays the buildids found in a perf.data file, so that other
16tools can be used to fetch packages with matching symbol tables for use by
17perf report.
18
19OPTIONS
20-------
21-i::
22--input=::
23 Input file name. (default: perf.data)
24-f::
25--force::
26 Don't do ownership validation.
27-v::
28--verbose::
29 Be more verbose.
30
31SEE ALSO
32--------
33linkperf:perf-record[1], linkperf:perf-top[1],
34linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 000000000000..44b0ce35c28a
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
1perf-kmem(1)
2==============
3
4NAME
5----
6perf-kmem - Tool to trace/measure kernel memory(slab) properties
7
8SYNOPSIS
9--------
10[verse]
11'perf kmem' {record} [<options>]
12
13DESCRIPTION
14-----------
15There's two variants of perf kmem:
16
17 'perf kmem record <command>' to record the kmem events
18 of an arbitrary workload.
19
20 'perf kmem' to report kernel memory statistics.
21
22OPTIONS
23-------
24-i <file>::
25--input=<file>::
26 Select the input file (default: perf.data)
27
28--stat=<caller|alloc>::
29 Select per callsite or per allocation statistics
30
31-s <key[,key2...]>::
32--sort=<key[,key2...]>::
33 Sort the output (default: frag,hit,bytes)
34
35-l <num>::
36--line=<num>::
37 Print n lines only
38
39--raw-ip::
40 Print raw ip instead of symbol
41
42SEE ALSO
43--------
44linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
new file mode 100644
index 000000000000..9270594e6dfd
--- /dev/null
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -0,0 +1,49 @@
1perf-probe(1)
2=============
3
4NAME
5----
6perf-probe - Define new dynamic tracepoints
7
8SYNOPSIS
9--------
10[verse]
11'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...]
12or
13'perf probe' [options] 'PROBE' ['PROBE' ...]
14
15
16DESCRIPTION
17-----------
18This command defines dynamic tracepoint events, by symbol and registers
19without debuginfo, or by C expressions (C line numbers, C function names,
20and C local variables) with debuginfo.
21
22
23OPTIONS
24-------
25-k::
26--vmlinux=PATH::
27 Specify vmlinux path which has debuginfo (Dwarf binary).
28
29-v::
30--verbose::
31 Be more verbose (show parsed arguments, etc).
32
33-a::
34--add::
35 Define a probe point (see PROBE SYNTAX for detail)
36
37PROBE SYNTAX
38------------
39Probe points are defined by following syntax.
40
41 "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]"
42
43'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function.
44It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
45'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
46
47SEE ALSO
48--------
49linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0ff23de9e453..fc46c0b40f6e 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -26,11 +26,19 @@ OPTIONS
26 26
27-e:: 27-e::
28--event=:: 28--event=::
29 Select the PMU event. Selection can be a symbolic event name 29 Select the PMU event. Selection can be:
30 (use 'perf list' to list all events) or a raw PMU
31 event (eventsel+umask) in the form of rNNN where NNN is a
32 hexadecimal event descriptor.
33 30
31 - a symbolic event name (use 'perf list' to list all events)
32
33 - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
34 hexadecimal event descriptor.
35
36 - a hardware breakpoint event in the form of '\mem:addr[:access]'
37 where addr is the address in memory you want to break in.
38 Access is the memory access type (read, write, execute) it can
39 be passed as follows: '\mem:addr[:[r][w][x]]'.
40 If you want to profile read-write accesses in 0x1000, just set
41 'mem:0x1000:rw'.
34-a:: 42-a::
35 System-wide collection. 43 System-wide collection.
36 44
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 59f0b846cd71..9dccb180b7af 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -24,11 +24,11 @@ OPTIONS
24--dsos=:: 24--dsos=::
25 Only consider symbols in these dsos. CSV that understands 25 Only consider symbols in these dsos. CSV that understands
26 file://filename entries. 26 file://filename entries.
27-n 27-n::
28--show-nr-samples 28--show-nr-samples::
29 Show the number of samples for each symbol 29 Show the number of samples for each symbol
30-T 30-T::
31--threads 31--threads::
32 Show per-thread event counters 32 Show per-thread event counters
33-C:: 33-C::
34--comms=:: 34--comms=::
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index a7910099d6fd..4b1788355eca 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -31,9 +31,12 @@ OPTIONS
31-w:: 31-w::
32--width=:: 32--width=::
33 Select the width of the SVG file (default: 1000) 33 Select the width of the SVG file (default: 1000)
34-p:: 34-P::
35--power-only:: 35--power-only::
36 Only output the CPU power section of the diagram 36 Only output the CPU power section of the diagram
37-p::
38--process::
39 Select the processes to display, by name or PID
37 40
38 41
39SEE ALSO 42SEE ALSO
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt
new file mode 100644
index 000000000000..c5f55f439091
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace-perl.txt
@@ -0,0 +1,219 @@
1perf-trace-perl(1)
2==================
3
4NAME
5----
6perf-trace-perl - Process trace data with a Perl script
7
8SYNOPSIS
9--------
10[verse]
11'perf trace' [-s [lang]:script[.ext] ]
12
13DESCRIPTION
14-----------
15
16This perf trace option is used to process perf trace data using perf's
17built-in Perl interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given
19Perl script, if any.
20
21STARTER SCRIPTS
22---------------
23
24You can avoid reading the rest of this document by running 'perf trace
25-g perl' in the same directory as an existing perf.data trace file.
26That will generate a starter script containing a handler for each of
27the event types in the trace file; it simply prints every available
28field for each event in the trace file.
29
30You can also look at the existing scripts in
31~/libexec/perf-core/scripts/perl for typical examples showing how to
32do basic things like aggregate event data, print results, etc. Also,
33the check-perf-trace.pl script, while not interesting for its results,
34attempts to exercise all of the main scripting features.
35
36EVENT HANDLERS
37--------------
38
39When perf trace is invoked using a trace script, a user-defined
40'handler function' is called for each event in the trace. If there's
41no handler function defined for a given event type, the event is
42ignored (or passed to a 'trace_handled' function, see below) and the
43next event is processed.
44
45Most of the event's field values are passed as arguments to the
46handler function; some of the less common ones aren't - those are
47available as calls back into the perf executable (see below).
48
49As an example, the following perf record command can be used to record
50all sched_wakeup events in the system:
51
52 # perf record -c 1 -f -a -M -R -e sched:sched_wakeup
53
54Traces meant to be processed using a script should be recorded with
55the above options: -c 1 says to sample every event, -a to enable
56system-wide collection, -M to multiplex the output, and -R to collect
57raw samples.
58
59The format file for the sched_wakep event defines the following fields
60(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
61
62----
63 format:
64 field:unsigned short common_type;
65 field:unsigned char common_flags;
66 field:unsigned char common_preempt_count;
67 field:int common_pid;
68 field:int common_lock_depth;
69
70 field:char comm[TASK_COMM_LEN];
71 field:pid_t pid;
72 field:int prio;
73 field:int success;
74 field:int target_cpu;
75----
76
77The handler function for this event would be defined as:
78
79----
80sub sched::sched_wakeup
81{
82 my ($event_name, $context, $common_cpu, $common_secs,
83 $common_nsecs, $common_pid, $common_comm,
84 $comm, $pid, $prio, $success, $target_cpu) = @_;
85}
86----
87
88The handler function takes the form subsystem::event_name.
89
90The $common_* arguments in the handler's argument list are the set of
91arguments passed to all event handlers; some of the fields correspond
92to the common_* fields in the format file, but some are synthesized,
93and some of the common_* fields aren't common enough to to be passed
94to every event as arguments but are available as library functions.
95
96Here's a brief description of each of the invariant event args:
97
98 $event_name the name of the event as text
99 $context an opaque 'cookie' used in calls back into perf
100 $common_cpu the cpu the event occurred on
101 $common_secs the secs portion of the event timestamp
102 $common_nsecs the nsecs portion of the event timestamp
103 $common_pid the pid of the current task
104 $common_comm the name of the current process
105
106All of the remaining fields in the event's format file have
107counterparts as handler function arguments of the same name, as can be
108seen in the example above.
109
110The above provides the basics needed to directly access every field of
111every event in a trace, which covers 90% of what you need to know to
112write a useful trace script. The sections below cover the rest.
113
114SCRIPT LAYOUT
115-------------
116
117Every perf trace Perl script should start by setting up a Perl module
118search path and 'use'ing a few support modules (see module
119descriptions below):
120
121----
122 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
123 use lib "./Perf-Trace-Util/lib";
124 use Perf::Trace::Core;
125 use Perf::Trace::Context;
126 use Perf::Trace::Util;
127----
128
129The rest of the script can contain handler functions and support
130functions in any order.
131
132Aside from the event handler functions discussed above, every script
133can implement a set of optional functions:
134
135*trace_begin*, if defined, is called before any event is processed and
136gives scripts a chance to do setup tasks:
137
138----
139 sub trace_begin
140 {
141 }
142----
143
144*trace_end*, if defined, is called after all events have been
145 processed and gives scripts a chance to do end-of-script tasks, such
146 as display results:
147
148----
149sub trace_end
150{
151}
152----
153
154*trace_unhandled*, if defined, is called after for any event that
155 doesn't have a handler explicitly defined for it. The standard set
156 of common arguments are passed into it:
157
158----
159sub trace_unhandled
160{
161 my ($event_name, $context, $common_cpu, $common_secs,
162 $common_nsecs, $common_pid, $common_comm) = @_;
163}
164----
165
166The remaining sections provide descriptions of each of the available
167built-in perf trace Perl modules and their associated functions.
168
169AVAILABLE MODULES AND FUNCTIONS
170-------------------------------
171
172The following sections describe the functions and variables available
173via the various Perf::Trace::* Perl modules. To use the functions and
174variables from the given module, add the corresponding 'use
175Perf::Trace::XXX' line to your perf trace script.
176
177Perf::Trace::Core Module
178~~~~~~~~~~~~~~~~~~~~~~~~
179
180These functions provide some essential functions to user scripts.
181
182The *flag_str* and *symbol_str* functions provide human-readable
183strings for flag and symbolic fields. These correspond to the strings
184and values parsed from the 'print fmt' fields of the event format
185files:
186
187 flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
188 symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
189
190Perf::Trace::Context Module
191~~~~~~~~~~~~~~~~~~~~~~~~~~~
192
193Some of the 'common' fields in the event format file aren't all that
194common, but need to be made accessible to user scripts nonetheless.
195
196Perf::Trace::Context defines a set of functions that can be used to
197access this data in the context of the current event. Each of these
198functions expects a $context variable, which is the same as the
199$context variable passed into every event handler as the second
200argument.
201
202 common_pc($context) - returns common_preempt count for the current event
203 common_flags($context) - returns common_flags for the current event
204 common_lock_depth($context) - returns common_lock_depth for the current event
205
206Perf::Trace::Util Module
207~~~~~~~~~~~~~~~~~~~~~~~~
208
209Various utility functions for use with perf trace:
210
211 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
212 nsecs_secs($nsecs) - returns whole secs portion given nsecs
213 nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
214 nsecs_str($nsecs) - returns printable string in the form secs.nsecs
215 avg($total, $n) - returns average given a sum and a total number of values
216
217SEE ALSO
218--------
219linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 41ed75398ca9..07065efa60e0 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -20,6 +20,15 @@ OPTIONS
20--dump-raw-trace=:: 20--dump-raw-trace=::
21 Display verbose dump of the trace data. 21 Display verbose dump of the trace data.
22 22
23-s::
24--script=::
25 Process trace data with the given script ([lang]:script[.ext]).
26
27-g::
28--gen-script=::
29 Generate perf-trace.[ext] starter script for given language,
30 using current perf.data.
31
23SEE ALSO 32SEE ALSO
24-------- 33--------
25linkperf:perf-record[1] 34linkperf:perf-record[1], linkperf:perf-trace-perl[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7e190d522cd5..23ec66098bdc 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -2,6 +2,7 @@
2all:: 2all::
3 3
4# Define V=1 to have a more verbose compile. 4# Define V=1 to have a more verbose compile.
5# Define V=2 to have an even more verbose compile.
5# 6#
6# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() 7# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
7# or vsnprintf() return -1 instead of number of characters which would 8# or vsnprintf() return -1 instead of number of characters which would
@@ -145,6 +146,10 @@ all::
145# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call 146# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
146# your external grep (e.g., if your system lacks grep, if its grep is 147# your external grep (e.g., if your system lacks grep, if its grep is
147# broken, or spawning external process is slower than built-in grep perf has). 148# broken, or spawning external process is slower than built-in grep perf has).
149#
150# Define LDFLAGS=-static to build a static binary.
151#
152# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
148 153
149PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 154PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
150 @$(SHELL_PATH) util/PERF-VERSION-GEN 155 @$(SHELL_PATH) util/PERF-VERSION-GEN
@@ -157,20 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
157uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') 162uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
158uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') 163uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
159 164
160#
161# Add -m32 for cross-builds:
162#
163ifdef NO_64BIT
164 MBITS := -m32
165else
166 #
167 # If we're on a 64-bit kernel, use -m64:
168 #
169 ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
170 MBITS := -m64
171 endif
172endif
173
174# CFLAGS and LDFLAGS are for the users to override from the command line. 165# CFLAGS and LDFLAGS are for the users to override from the command line.
175 166
176# 167#
@@ -200,8 +191,15 @@ EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition
200EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes 191EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes
201EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement 192EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement
202 193
203CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) 194ifeq ("$(origin DEBUG)", "command line")
204LDFLAGS = -lpthread -lrt -lelf -lm 195 PERF_DEBUG = $(DEBUG)
196endif
197ifndef PERF_DEBUG
198 CFLAGS_OPTIMIZE = -O6
199endif
200
201CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
202EXTLIBS = -lpthread -lrt -lelf -lm
205ALL_CFLAGS = $(CFLAGS) 203ALL_CFLAGS = $(CFLAGS)
206ALL_LDFLAGS = $(LDFLAGS) 204ALL_LDFLAGS = $(LDFLAGS)
207STRIP ?= strip 205STRIP ?= strip
@@ -252,6 +250,9 @@ PTHREAD_LIBS = -lpthread
252# explicitly what architecture to check for. Fix this up for yours.. 250# explicitly what architecture to check for. Fix this up for yours..
253SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ 251SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
254 252
253ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y)
254 CFLAGS := $(CFLAGS) -fstack-protector-all
255endif
255 256
256 257
257### --- END CONFIGURATION SECTION --- 258### --- END CONFIGURATION SECTION ---
@@ -327,8 +328,28 @@ LIB_FILE=libperf.a
327LIB_H += ../../include/linux/perf_event.h 328LIB_H += ../../include/linux/perf_event.h
328LIB_H += ../../include/linux/rbtree.h 329LIB_H += ../../include/linux/rbtree.h
329LIB_H += ../../include/linux/list.h 330LIB_H += ../../include/linux/list.h
331LIB_H += ../../include/linux/stringify.h
332LIB_H += util/include/linux/bitmap.h
333LIB_H += util/include/linux/bitops.h
334LIB_H += util/include/linux/compiler.h
335LIB_H += util/include/linux/ctype.h
336LIB_H += util/include/linux/kernel.h
330LIB_H += util/include/linux/list.h 337LIB_H += util/include/linux/list.h
338LIB_H += util/include/linux/module.h
339LIB_H += util/include/linux/poison.h
340LIB_H += util/include/linux/prefetch.h
341LIB_H += util/include/linux/rbtree.h
342LIB_H += util/include/linux/string.h
343LIB_H += util/include/linux/types.h
344LIB_H += util/include/asm/asm-offsets.h
345LIB_H += util/include/asm/bitops.h
346LIB_H += util/include/asm/byteorder.h
347LIB_H += util/include/asm/swab.h
348LIB_H += util/include/asm/system.h
349LIB_H += util/include/asm/uaccess.h
331LIB_H += perf.h 350LIB_H += perf.h
351LIB_H += util/debugfs.h
352LIB_H += util/event.h
332LIB_H += util/types.h 353LIB_H += util/types.h
333LIB_H += util/levenshtein.h 354LIB_H += util/levenshtein.h
334LIB_H += util/parse-options.h 355LIB_H += util/parse-options.h
@@ -342,15 +363,22 @@ LIB_H += util/strlist.h
342LIB_H += util/run-command.h 363LIB_H += util/run-command.h
343LIB_H += util/sigchain.h 364LIB_H += util/sigchain.h
344LIB_H += util/symbol.h 365LIB_H += util/symbol.h
345LIB_H += util/module.h
346LIB_H += util/color.h 366LIB_H += util/color.h
347LIB_H += util/values.h 367LIB_H += util/values.h
368LIB_H += util/sort.h
369LIB_H += util/hist.h
370LIB_H += util/thread.h
371LIB_H += util/data_map.h
372LIB_H += util/probe-finder.h
373LIB_H += util/probe-event.h
348 374
349LIB_OBJS += util/abspath.o 375LIB_OBJS += util/abspath.o
350LIB_OBJS += util/alias.o 376LIB_OBJS += util/alias.o
351LIB_OBJS += util/config.o 377LIB_OBJS += util/config.o
352LIB_OBJS += util/ctype.o 378LIB_OBJS += util/ctype.o
379LIB_OBJS += util/debugfs.o
353LIB_OBJS += util/environment.o 380LIB_OBJS += util/environment.o
381LIB_OBJS += util/event.o
354LIB_OBJS += util/exec_cmd.o 382LIB_OBJS += util/exec_cmd.o
355LIB_OBJS += util/help.o 383LIB_OBJS += util/help.o
356LIB_OBJS += util/levenshtein.o 384LIB_OBJS += util/levenshtein.o
@@ -358,6 +386,9 @@ LIB_OBJS += util/parse-options.o
358LIB_OBJS += util/parse-events.o 386LIB_OBJS += util/parse-events.o
359LIB_OBJS += util/path.o 387LIB_OBJS += util/path.o
360LIB_OBJS += util/rbtree.o 388LIB_OBJS += util/rbtree.o
389LIB_OBJS += util/bitmap.o
390LIB_OBJS += util/hweight.o
391LIB_OBJS += util/find_next_bit.o
361LIB_OBJS += util/run-command.o 392LIB_OBJS += util/run-command.o
362LIB_OBJS += util/quote.o 393LIB_OBJS += util/quote.o
363LIB_OBJS += util/strbuf.o 394LIB_OBJS += util/strbuf.o
@@ -367,7 +398,6 @@ LIB_OBJS += util/usage.o
367LIB_OBJS += util/wrapper.o 398LIB_OBJS += util/wrapper.o
368LIB_OBJS += util/sigchain.o 399LIB_OBJS += util/sigchain.o
369LIB_OBJS += util/symbol.o 400LIB_OBJS += util/symbol.o
370LIB_OBJS += util/module.o
371LIB_OBJS += util/color.o 401LIB_OBJS += util/color.o
372LIB_OBJS += util/pager.o 402LIB_OBJS += util/pager.o
373LIB_OBJS += util/header.o 403LIB_OBJS += util/header.o
@@ -379,11 +409,25 @@ LIB_OBJS += util/thread.o
379LIB_OBJS += util/trace-event-parse.o 409LIB_OBJS += util/trace-event-parse.o
380LIB_OBJS += util/trace-event-read.o 410LIB_OBJS += util/trace-event-read.o
381LIB_OBJS += util/trace-event-info.o 411LIB_OBJS += util/trace-event-info.o
412LIB_OBJS += util/trace-event-perl.o
382LIB_OBJS += util/svghelper.o 413LIB_OBJS += util/svghelper.o
414LIB_OBJS += util/sort.o
415LIB_OBJS += util/hist.o
416LIB_OBJS += util/data_map.o
417LIB_OBJS += util/probe-event.o
383 418
384BUILTIN_OBJS += builtin-annotate.o 419BUILTIN_OBJS += builtin-annotate.o
420
421BUILTIN_OBJS += builtin-bench.o
422
423# Benchmark modules
424BUILTIN_OBJS += bench/sched-messaging.o
425BUILTIN_OBJS += bench/sched-pipe.o
426BUILTIN_OBJS += bench/mem-memcpy.o
427
385BUILTIN_OBJS += builtin-help.o 428BUILTIN_OBJS += builtin-help.o
386BUILTIN_OBJS += builtin-sched.o 429BUILTIN_OBJS += builtin-sched.o
430BUILTIN_OBJS += builtin-buildid-list.o
387BUILTIN_OBJS += builtin-list.o 431BUILTIN_OBJS += builtin-list.o
388BUILTIN_OBJS += builtin-record.o 432BUILTIN_OBJS += builtin-record.o
389BUILTIN_OBJS += builtin-report.o 433BUILTIN_OBJS += builtin-report.o
@@ -391,9 +435,16 @@ BUILTIN_OBJS += builtin-stat.o
391BUILTIN_OBJS += builtin-timechart.o 435BUILTIN_OBJS += builtin-timechart.o
392BUILTIN_OBJS += builtin-top.o 436BUILTIN_OBJS += builtin-top.o
393BUILTIN_OBJS += builtin-trace.o 437BUILTIN_OBJS += builtin-trace.o
438BUILTIN_OBJS += builtin-probe.o
439BUILTIN_OBJS += builtin-kmem.o
394 440
395PERFLIBS = $(LIB_FILE) 441PERFLIBS = $(LIB_FILE)
396 442
443ifeq ($(V), 2)
444 QUIET_STDERR = ">/dev/null"
445else
446 QUIET_STDERR = ">/dev/null 2>&1"
447endif
397# 448#
398# Platform specific tweaks 449# Platform specific tweaks
399# 450#
@@ -421,36 +472,58 @@ ifeq ($(uname_S),Darwin)
421 PTHREAD_LIBS = 472 PTHREAD_LIBS =
422endif 473endif
423 474
424ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) 475ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
425 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) 476ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
477 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
478endif
479
480 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
426 BASIC_CFLAGS += -DLIBELF_NO_MMAP 481 BASIC_CFLAGS += -DLIBELF_NO_MMAP
427 endif 482 endif
428else 483else
429 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); 484 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
430endif 485endif
431 486
487ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
488 msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
489 BASIC_CFLAGS += -DNO_LIBDWARF
490else
491 EXTLIBS += -lelf -ldwarf
492 LIB_OBJS += util/probe-finder.o
493endif
494
495PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
496PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
497
498ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
499 BASIC_CFLAGS += -DNO_LIBPERL
500else
501 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
502 LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o
503endif
504
432ifdef NO_DEMANGLE 505ifdef NO_DEMANGLE
433 BASIC_CFLAGS += -DNO_DEMANGLE 506 BASIC_CFLAGS += -DNO_DEMANGLE
434else 507else
435 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") 508 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y")
436 509
437 ifeq ($(has_bfd),y) 510 ifeq ($(has_bfd),y)
438 EXTLIBS += -lbfd 511 EXTLIBS += -lbfd
439 else 512 else
440 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") 513 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y")
441 ifeq ($(has_bfd_iberty),y) 514 ifeq ($(has_bfd_iberty),y)
442 EXTLIBS += -lbfd -liberty 515 EXTLIBS += -lbfd -liberty
443 else 516 else
444 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") 517 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y")
445 ifeq ($(has_bfd_iberty_z),y) 518 ifeq ($(has_bfd_iberty_z),y)
446 EXTLIBS += -lbfd -liberty -lz 519 EXTLIBS += -lbfd -liberty -lz
447 else 520 else
448 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y") 521 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y")
449 ifeq ($(has_cplus_demangle),y) 522 ifeq ($(has_cplus_demangle),y)
450 EXTLIBS += -liberty 523 EXTLIBS += -liberty
451 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 524 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
452 else 525 else
453 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 526 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
454 BASIC_CFLAGS += -DNO_DEMANGLE 527 BASIC_CFLAGS += -DNO_DEMANGLE
455 endif 528 endif
456 endif 529 endif
@@ -787,6 +860,25 @@ util/config.o: util/config.c PERF-CFLAGS
787util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS 860util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS
788 $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 861 $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
789 862
863# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing
864# from <string.h> that comes from kernel headers wrapping.
865KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//`
866
867util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS
868 $(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
869
870util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
871 $(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
872
873util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
874 $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
875
876util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS
877 $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
878
879scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS
880 $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
881
790perf-%$X: %.o $(PERFLIBS) 882perf-%$X: %.o $(PERFLIBS)
791 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 883 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
792 884
@@ -894,6 +986,13 @@ export perfexec_instdir
894install: all 986install: all
895 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 987 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
896 $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' 988 $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
989 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
990 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
991 $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
992 $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
993 $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
994 $(INSTALL) scripts/perl/Perf-Trace-Util/Makefile.PL -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
995 $(INSTALL) scripts/perl/Perf-Trace-Util/README -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util'
897ifdef BUILT_INS 996ifdef BUILT_INS
898 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 997 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
899 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 998 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -979,7 +1078,7 @@ distclean: clean
979# $(RM) configure 1078# $(RM) configure
980 1079
981clean: 1080clean:
982 $(RM) *.o */*.o $(LIB_FILE) 1081 $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
983 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X 1082 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
984 $(RM) $(TEST_PROGRAMS) 1083 $(RM) $(TEST_PROGRAMS)
985 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* 1084 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope*
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
new file mode 100644
index 000000000000..f7781c6267c0
--- /dev/null
+++ b/tools/perf/bench/bench.h
@@ -0,0 +1,17 @@
1#ifndef BENCH_H
2#define BENCH_H
3
4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
7
8#define BENCH_FORMAT_DEFAULT_STR "default"
9#define BENCH_FORMAT_DEFAULT 0
10#define BENCH_FORMAT_SIMPLE_STR "simple"
11#define BENCH_FORMAT_SIMPLE 1
12
13#define BENCH_FORMAT_UNKNOWN -1
14
15extern int bench_format;
16
17#endif
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 000000000000..89773178e894
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,193 @@
1/*
2 * mem-memcpy.c
3 *
4 * memcpy: Simple memory copy in various ways
5 *
6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7 */
8#include <ctype.h>
9
10#include "../perf.h"
11#include "../util/util.h"
12#include "../util/parse-options.h"
13#include "../util/string.h"
14#include "../util/header.h"
15#include "bench.h"
16
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/time.h>
21#include <errno.h>
22
23#define K 1024
24
25static const char *length_str = "1MB";
26static const char *routine = "default";
27static int use_clock = 0;
28static int clock_fd;
29
30static const struct option options[] = {
31 OPT_STRING('l', "length", &length_str, "1MB",
32 "Specify length of memory to copy. "
33 "available unit: B, MB, GB (upper and lower)"),
34 OPT_STRING('r', "routine", &routine, "default",
35 "Specify routine to copy"),
36 OPT_BOOLEAN('c', "clock", &use_clock,
37 "Use CPU clock for measuring"),
38 OPT_END()
39};
40
41struct routine {
42 const char *name;
43 const char *desc;
44 void * (*fn)(void *dst, const void *src, size_t len);
45};
46
47struct routine routines[] = {
48 { "default",
49 "Default memcpy() provided by glibc",
50 memcpy },
51 { NULL,
52 NULL,
53 NULL }
54};
55
56static const char * const bench_mem_memcpy_usage[] = {
57 "perf bench mem memcpy <options>",
58 NULL
59};
60
61static struct perf_event_attr clock_attr = {
62 .type = PERF_TYPE_HARDWARE,
63 .config = PERF_COUNT_HW_CPU_CYCLES
64};
65
66static void init_clock(void)
67{
68 clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
69
70 if (clock_fd < 0 && errno == ENOSYS)
71 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
72 else
73 BUG_ON(clock_fd < 0);
74}
75
76static u64 get_clock(void)
77{
78 int ret;
79 u64 clk;
80
81 ret = read(clock_fd, &clk, sizeof(u64));
82 BUG_ON(ret != sizeof(u64));
83
84 return clk;
85}
86
87static double timeval2double(struct timeval *ts)
88{
89 return (double)ts->tv_sec +
90 (double)ts->tv_usec / (double)1000000;
91}
92
93int bench_mem_memcpy(int argc, const char **argv,
94 const char *prefix __used)
95{
96 int i;
97 void *dst, *src;
98 size_t length;
99 double bps = 0.0;
100 struct timeval tv_start, tv_end, tv_diff;
101 u64 clock_start, clock_end, clock_diff;
102
103 clock_start = clock_end = clock_diff = 0ULL;
104 argc = parse_options(argc, argv, options,
105 bench_mem_memcpy_usage, 0);
106
107 tv_diff.tv_sec = 0;
108 tv_diff.tv_usec = 0;
109 length = (size_t)perf_atoll((char *)length_str);
110
111 if ((s64)length <= 0) {
112 fprintf(stderr, "Invalid length:%s\n", length_str);
113 return 1;
114 }
115
116 for (i = 0; routines[i].name; i++) {
117 if (!strcmp(routines[i].name, routine))
118 break;
119 }
120 if (!routines[i].name) {
121 printf("Unknown routine:%s\n", routine);
122 printf("Available routines...\n");
123 for (i = 0; routines[i].name; i++) {
124 printf("\t%s ... %s\n",
125 routines[i].name, routines[i].desc);
126 }
127 return 1;
128 }
129
130 dst = zalloc(length);
131 if (!dst)
132 die("memory allocation failed - maybe length is too large?\n");
133
134 src = zalloc(length);
135 if (!src)
136 die("memory allocation failed - maybe length is too large?\n");
137
138 if (bench_format == BENCH_FORMAT_DEFAULT) {
139 printf("# Copying %s Bytes from %p to %p ...\n\n",
140 length_str, src, dst);
141 }
142
143 if (use_clock) {
144 init_clock();
145 clock_start = get_clock();
146 } else {
147 BUG_ON(gettimeofday(&tv_start, NULL));
148 }
149
150 routines[i].fn(dst, src, length);
151
152 if (use_clock) {
153 clock_end = get_clock();
154 clock_diff = clock_end - clock_start;
155 } else {
156 BUG_ON(gettimeofday(&tv_end, NULL));
157 timersub(&tv_end, &tv_start, &tv_diff);
158 bps = (double)((double)length / timeval2double(&tv_diff));
159 }
160
161 switch (bench_format) {
162 case BENCH_FORMAT_DEFAULT:
163 if (use_clock) {
164 printf(" %14lf Clock/Byte\n",
165 (double)clock_diff / (double)length);
166 } else {
167 if (bps < K)
168 printf(" %14lf B/Sec\n", bps);
169 else if (bps < K * K)
170 printf(" %14lfd KB/Sec\n", bps / 1024);
171 else if (bps < K * K * K)
172 printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
173 else {
174 printf(" %14lf GB/Sec\n",
175 bps / 1024 / 1024 / 1024);
176 }
177 }
178 break;
179 case BENCH_FORMAT_SIMPLE:
180 if (use_clock) {
181 printf("%14lf\n",
182 (double)clock_diff / (double)length);
183 } else
184 printf("%lf\n", bps);
185 break;
186 default:
187 /* reaching this means there's some disaster: */
188 die("unknown format: %d\n", bench_format);
189 break;
190 }
191
192 return 0;
193}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
new file mode 100644
index 000000000000..605a2a959aa8
--- /dev/null
+++ b/tools/perf/bench/sched-messaging.c
@@ -0,0 +1,336 @@
1/*
2 *
3 * builtin-bench-messaging.c
4 *
5 * messaging: Benchmark for scheduler and IPC mechanisms
6 *
7 * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
8 * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
9 *
10 */
11
12#include "../perf.h"
13#include "../util/util.h"
14#include "../util/parse-options.h"
15#include "../builtin.h"
16#include "bench.h"
17
18/* Test groups of 20 processes spraying to 20 receivers */
19#include <pthread.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <errno.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/socket.h>
27#include <sys/wait.h>
28#include <sys/time.h>
29#include <sys/poll.h>
30#include <limits.h>
31
32#define DATASIZE 100
33
34static int use_pipes = 0;
35static unsigned int loops = 100;
36static unsigned int thread_mode = 0;
37static unsigned int num_groups = 10;
38
39struct sender_context {
40 unsigned int num_fds;
41 int ready_out;
42 int wakefd;
43 int out_fds[0];
44};
45
46struct receiver_context {
47 unsigned int num_packets;
48 int in_fds[2];
49 int ready_out;
50 int wakefd;
51};
52
53static void barf(const char *msg)
54{
55 fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
56 exit(1);
57}
58
59static void fdpair(int fds[2])
60{
61 if (use_pipes) {
62 if (pipe(fds) == 0)
63 return;
64 } else {
65 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
66 return;
67 }
68
69 barf(use_pipes ? "pipe()" : "socketpair()");
70}
71
72/* Block until we're ready to go */
73static void ready(int ready_out, int wakefd)
74{
75 char dummy;
76 struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
77
78 /* Tell them we're ready. */
79 if (write(ready_out, &dummy, 1) != 1)
80 barf("CLIENT: ready write");
81
82 /* Wait for "GO" signal */
83 if (poll(&pollfd, 1, -1) != 1)
84 barf("poll");
85}
86
87/* Sender sprays loops messages down each file descriptor */
88static void *sender(struct sender_context *ctx)
89{
90 char data[DATASIZE];
91 unsigned int i, j;
92
93 ready(ctx->ready_out, ctx->wakefd);
94
95 /* Now pump to every receiver. */
96 for (i = 0; i < loops; i++) {
97 for (j = 0; j < ctx->num_fds; j++) {
98 int ret, done = 0;
99
100again:
101 ret = write(ctx->out_fds[j], data + done,
102 sizeof(data)-done);
103 if (ret < 0)
104 barf("SENDER: write");
105 done += ret;
106 if (done < DATASIZE)
107 goto again;
108 }
109 }
110
111 return NULL;
112}
113
114
115/* One receiver per fd */
116static void *receiver(struct receiver_context* ctx)
117{
118 unsigned int i;
119
120 if (!thread_mode)
121 close(ctx->in_fds[1]);
122
123 /* Wait for start... */
124 ready(ctx->ready_out, ctx->wakefd);
125
126 /* Receive them all */
127 for (i = 0; i < ctx->num_packets; i++) {
128 char data[DATASIZE];
129 int ret, done = 0;
130
131again:
132 ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
133 if (ret < 0)
134 barf("SERVER: read");
135 done += ret;
136 if (done < DATASIZE)
137 goto again;
138 }
139
140 return NULL;
141}
142
143static pthread_t create_worker(void *ctx, void *(*func)(void *))
144{
145 pthread_attr_t attr;
146 pthread_t childid;
147 int err;
148
149 if (!thread_mode) {
150 /* process mode */
151 /* Fork the receiver. */
152 switch (fork()) {
153 case -1:
154 barf("fork()");
155 break;
156 case 0:
157 (*func) (ctx);
158 exit(0);
159 break;
160 default:
161 break;
162 }
163
164 return (pthread_t)0;
165 }
166
167 if (pthread_attr_init(&attr) != 0)
168 barf("pthread_attr_init:");
169
170#ifndef __ia64__
171 if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
172 barf("pthread_attr_setstacksize");
173#endif
174
175 err = pthread_create(&childid, &attr, func, ctx);
176 if (err != 0) {
177 fprintf(stderr, "pthread_create failed: %s (%d)\n",
178 strerror(err), err);
179 exit(-1);
180 }
181 return childid;
182}
183
184static void reap_worker(pthread_t id)
185{
186 int proc_status;
187 void *thread_status;
188
189 if (!thread_mode) {
190 /* process mode */
191 wait(&proc_status);
192 if (!WIFEXITED(proc_status))
193 exit(1);
194 } else {
195 pthread_join(id, &thread_status);
196 }
197}
198
199/* One group of senders and receivers */
200static unsigned int group(pthread_t *pth,
201 unsigned int num_fds,
202 int ready_out,
203 int wakefd)
204{
205 unsigned int i;
206 struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
207 + num_fds * sizeof(int));
208
209 if (!snd_ctx)
210 barf("malloc()");
211
212 for (i = 0; i < num_fds; i++) {
213 int fds[2];
214 struct receiver_context *ctx = malloc(sizeof(*ctx));
215
216 if (!ctx)
217 barf("malloc()");
218
219
220 /* Create the pipe between client and server */
221 fdpair(fds);
222
223 ctx->num_packets = num_fds * loops;
224 ctx->in_fds[0] = fds[0];
225 ctx->in_fds[1] = fds[1];
226 ctx->ready_out = ready_out;
227 ctx->wakefd = wakefd;
228
229 pth[i] = create_worker(ctx, (void *)receiver);
230
231 snd_ctx->out_fds[i] = fds[1];
232 if (!thread_mode)
233 close(fds[0]);
234 }
235
236 /* Now we have all the fds, fork the senders */
237 for (i = 0; i < num_fds; i++) {
238 snd_ctx->ready_out = ready_out;
239 snd_ctx->wakefd = wakefd;
240 snd_ctx->num_fds = num_fds;
241
242 pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
243 }
244
245 /* Close the fds we have left */
246 if (!thread_mode)
247 for (i = 0; i < num_fds; i++)
248 close(snd_ctx->out_fds[i]);
249
250 /* Return number of children to reap */
251 return num_fds * 2;
252}
253
254static const struct option options[] = {
255 OPT_BOOLEAN('p', "pipe", &use_pipes,
256 "Use pipe() instead of socketpair()"),
257 OPT_BOOLEAN('t', "thread", &thread_mode,
258 "Be multi thread instead of multi process"),
259 OPT_INTEGER('g', "group", &num_groups,
260 "Specify number of groups"),
261 OPT_INTEGER('l', "loop", &loops,
262 "Specify number of loops"),
263 OPT_END()
264};
265
266static const char * const bench_sched_message_usage[] = {
267 "perf bench sched messaging <options>",
268 NULL
269};
270
271int bench_sched_messaging(int argc, const char **argv,
272 const char *prefix __used)
273{
274 unsigned int i, total_children;
275 struct timeval start, stop, diff;
276 unsigned int num_fds = 20;
277 int readyfds[2], wakefds[2];
278 char dummy;
279 pthread_t *pth_tab;
280
281 argc = parse_options(argc, argv, options,
282 bench_sched_message_usage, 0);
283
284 pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
285 if (!pth_tab)
286 barf("main:malloc()");
287
288 fdpair(readyfds);
289 fdpair(wakefds);
290
291 total_children = 0;
292 for (i = 0; i < num_groups; i++)
293 total_children += group(pth_tab+total_children, num_fds,
294 readyfds[1], wakefds[0]);
295
296 /* Wait for everyone to be ready */
297 for (i = 0; i < total_children; i++)
298 if (read(readyfds[0], &dummy, 1) != 1)
299 barf("Reading for readyfds");
300
301 gettimeofday(&start, NULL);
302
303 /* Kick them off */
304 if (write(wakefds[1], &dummy, 1) != 1)
305 barf("Writing to start them");
306
307 /* Reap them all */
308 for (i = 0; i < total_children; i++)
309 reap_worker(pth_tab[i]);
310
311 gettimeofday(&stop, NULL);
312
313 timersub(&stop, &start, &diff);
314
315 switch (bench_format) {
316 case BENCH_FORMAT_DEFAULT:
317 printf("# %d sender and receiver %s per group\n",
318 num_fds, thread_mode ? "threads" : "processes");
319 printf("# %d groups == %d %s run\n\n",
320 num_groups, num_groups * 2 * num_fds,
321 thread_mode ? "threads" : "processes");
322 printf(" %14s: %lu.%03lu [sec]\n", "Total time",
323 diff.tv_sec, diff.tv_usec/1000);
324 break;
325 case BENCH_FORMAT_SIMPLE:
326 printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
327 break;
328 default:
329 /* reaching here is something disaster */
330 fprintf(stderr, "Unknown format:%d\n", bench_format);
331 exit(1);
332 break;
333 }
334
335 return 0;
336}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
new file mode 100644
index 000000000000..238185f97977
--- /dev/null
+++ b/tools/perf/bench/sched-pipe.c
@@ -0,0 +1,124 @@
1/*
2 *
3 * builtin-bench-pipe.c
4 *
5 * pipe: Benchmark for pipe()
6 *
7 * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
8 * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
9 * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
10 *
11 */
12
13#include "../perf.h"
14#include "../util/util.h"
15#include "../util/parse-options.h"
16#include "../builtin.h"
17#include "bench.h"
18
19#include <unistd.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <signal.h>
23#include <sys/wait.h>
24#include <linux/unistd.h>
25#include <string.h>
26#include <errno.h>
27#include <assert.h>
28#include <sys/time.h>
29#include <sys/types.h>
30
31#define LOOPS_DEFAULT 1000000
32static int loops = LOOPS_DEFAULT;
33
34static const struct option options[] = {
35 OPT_INTEGER('l', "loop", &loops,
36 "Specify number of loops"),
37 OPT_END()
38};
39
40static const char * const bench_sched_pipe_usage[] = {
41 "perf bench sched pipe <options>",
42 NULL
43};
44
45int bench_sched_pipe(int argc, const char **argv,
46 const char *prefix __used)
47{
48 int pipe_1[2], pipe_2[2];
49 int m = 0, i;
50 struct timeval start, stop, diff;
51 unsigned long long result_usec = 0;
52
53 /*
54 * why does "ret" exist?
55 * discarding returned value of read(), write()
56 * causes error in building environment for perf
57 */
58 int ret, wait_stat;
59 pid_t pid, retpid;
60
61 argc = parse_options(argc, argv, options,
62 bench_sched_pipe_usage, 0);
63
64 assert(!pipe(pipe_1));
65 assert(!pipe(pipe_2));
66
67 pid = fork();
68 assert(pid >= 0);
69
70 gettimeofday(&start, NULL);
71
72 if (!pid) {
73 for (i = 0; i < loops; i++) {
74 ret = read(pipe_1[0], &m, sizeof(int));
75 ret = write(pipe_2[1], &m, sizeof(int));
76 }
77 } else {
78 for (i = 0; i < loops; i++) {
79 ret = write(pipe_1[1], &m, sizeof(int));
80 ret = read(pipe_2[0], &m, sizeof(int));
81 }
82 }
83
84 gettimeofday(&stop, NULL);
85 timersub(&stop, &start, &diff);
86
87 if (pid) {
88 retpid = waitpid(pid, &wait_stat, 0);
89 assert((retpid == pid) && WIFEXITED(wait_stat));
90 return 0;
91 }
92
93 switch (bench_format) {
94 case BENCH_FORMAT_DEFAULT:
95 printf("# Extecuted %d pipe operations between two tasks\n\n",
96 loops);
97
98 result_usec = diff.tv_sec * 1000000;
99 result_usec += diff.tv_usec;
100
101 printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
102 diff.tv_sec, diff.tv_usec/1000);
103
104 printf(" %14lf usecs/op\n",
105 (double)result_usec / (double)loops);
106 printf(" %14d ops/sec\n",
107 (int)((double)loops /
108 ((double)result_usec / (double)1000000)));
109 break;
110
111 case BENCH_FORMAT_SIMPLE:
112 printf("%lu.%03lu\n",
113 diff.tv_sec, diff.tv_usec / 1000);
114 break;
115
116 default:
117 /* reaching here is something disaster */
118 fprintf(stderr, "Unknown format:%d\n", bench_format);
119 exit(1);
120 break;
121 }
122
123 return 0;
124}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1ec741615814..0bf2e8f9af57 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -19,29 +19,26 @@
19#include "perf.h" 19#include "perf.h"
20#include "util/debug.h" 20#include "util/debug.h"
21 21
22#include "util/event.h"
22#include "util/parse-options.h" 23#include "util/parse-options.h"
23#include "util/parse-events.h" 24#include "util/parse-events.h"
24#include "util/thread.h" 25#include "util/thread.h"
26#include "util/sort.h"
27#include "util/hist.h"
28#include "util/data_map.h"
25 29
26static char const *input_name = "perf.data"; 30static char const *input_name = "perf.data";
27 31
28static char default_sort_order[] = "comm,symbol";
29static char *sort_order = default_sort_order;
30
31static int force; 32static int force;
32static int input;
33static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
34 33
35static int full_paths; 34static int full_paths;
36 35
37static int print_line; 36static int print_line;
38 37
39static unsigned long page_size; 38struct sym_hist {
40static unsigned long mmap_window = 32; 39 u64 sum;
41 40 u64 ip[0];
42static struct rb_root threads; 41};
43static struct thread *last_match;
44
45 42
46struct sym_ext { 43struct sym_ext {
47 struct rb_node node; 44 struct rb_node node;
@@ -49,247 +46,38 @@ struct sym_ext {
49 char *path; 46 char *path;
50}; 47};
51 48
52/* 49struct sym_priv {
53 * histogram, sorted on item, collects counts 50 struct sym_hist *hist;
54 */ 51 struct sym_ext *ext;
55
56static struct rb_root hist;
57
58struct hist_entry {
59 struct rb_node rb_node;
60
61 struct thread *thread;
62 struct map *map;
63 struct dso *dso;
64 struct symbol *sym;
65 u64 ip;
66 char level;
67
68 uint32_t count;
69};
70
71/*
72 * configurable sorting bits
73 */
74
75struct sort_entry {
76 struct list_head list;
77
78 const char *header;
79
80 int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
81 int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
82 size_t (*print)(FILE *fp, struct hist_entry *);
83};
84
85/* --sort pid */
86
87static int64_t
88sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
89{
90 return right->thread->pid - left->thread->pid;
91}
92
93static size_t
94sort__thread_print(FILE *fp, struct hist_entry *self)
95{
96 return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
97}
98
99static struct sort_entry sort_thread = {
100 .header = " Command: Pid",
101 .cmp = sort__thread_cmp,
102 .print = sort__thread_print,
103};
104
105/* --sort comm */
106
107static int64_t
108sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
109{
110 return right->thread->pid - left->thread->pid;
111}
112
113static int64_t
114sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
115{
116 char *comm_l = left->thread->comm;
117 char *comm_r = right->thread->comm;
118
119 if (!comm_l || !comm_r) {
120 if (!comm_l && !comm_r)
121 return 0;
122 else if (!comm_l)
123 return -1;
124 else
125 return 1;
126 }
127
128 return strcmp(comm_l, comm_r);
129}
130
131static size_t
132sort__comm_print(FILE *fp, struct hist_entry *self)
133{
134 return fprintf(fp, "%16s", self->thread->comm);
135}
136
137static struct sort_entry sort_comm = {
138 .header = " Command",
139 .cmp = sort__comm_cmp,
140 .collapse = sort__comm_collapse,
141 .print = sort__comm_print,
142};
143
144/* --sort dso */
145
146static int64_t
147sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
148{
149 struct dso *dso_l = left->dso;
150 struct dso *dso_r = right->dso;
151
152 if (!dso_l || !dso_r) {
153 if (!dso_l && !dso_r)
154 return 0;
155 else if (!dso_l)
156 return -1;
157 else
158 return 1;
159 }
160
161 return strcmp(dso_l->name, dso_r->name);
162}
163
164static size_t
165sort__dso_print(FILE *fp, struct hist_entry *self)
166{
167 if (self->dso)
168 return fprintf(fp, "%-25s", self->dso->name);
169
170 return fprintf(fp, "%016llx ", (u64)self->ip);
171}
172
173static struct sort_entry sort_dso = {
174 .header = "Shared Object ",
175 .cmp = sort__dso_cmp,
176 .print = sort__dso_print,
177};
178
179/* --sort symbol */
180
181static int64_t
182sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
183{
184 u64 ip_l, ip_r;
185
186 if (left->sym == right->sym)
187 return 0;
188
189 ip_l = left->sym ? left->sym->start : left->ip;
190 ip_r = right->sym ? right->sym->start : right->ip;
191
192 return (int64_t)(ip_r - ip_l);
193}
194
195static size_t
196sort__sym_print(FILE *fp, struct hist_entry *self)
197{
198 size_t ret = 0;
199
200 if (verbose)
201 ret += fprintf(fp, "%#018llx ", (u64)self->ip);
202
203 if (self->sym) {
204 ret += fprintf(fp, "[%c] %s",
205 self->dso == kernel_dso ? 'k' : '.', self->sym->name);
206 } else {
207 ret += fprintf(fp, "%#016llx", (u64)self->ip);
208 }
209
210 return ret;
211}
212
213static struct sort_entry sort_sym = {
214 .header = "Symbol",
215 .cmp = sort__sym_cmp,
216 .print = sort__sym_print,
217};
218
219static int sort__need_collapse = 0;
220
221struct sort_dimension {
222 const char *name;
223 struct sort_entry *entry;
224 int taken;
225}; 52};
226 53
227static struct sort_dimension sort_dimensions[] = { 54static struct symbol_conf symbol_conf = {
228 { .name = "pid", .entry = &sort_thread, }, 55 .priv_size = sizeof(struct sym_priv),
229 { .name = "comm", .entry = &sort_comm, }, 56 .try_vmlinux_path = true,
230 { .name = "dso", .entry = &sort_dso, },
231 { .name = "symbol", .entry = &sort_sym, },
232}; 57};
233 58
234static LIST_HEAD(hist_entry__sort_list); 59static const char *sym_hist_filter;
235 60
236static int sort_dimension__add(char *tok) 61static int symbol_filter(struct map *map __used, struct symbol *sym)
237{ 62{
238 unsigned int i; 63 if (sym_hist_filter == NULL ||
239 64 strcmp(sym->name, sym_hist_filter) == 0) {
240 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { 65 struct sym_priv *priv = symbol__priv(sym);
241 struct sort_dimension *sd = &sort_dimensions[i]; 66 const int size = (sizeof(*priv->hist) +
242 67 (sym->end - sym->start) * sizeof(u64));
243 if (sd->taken)
244 continue;
245
246 if (strncasecmp(tok, sd->name, strlen(tok)))
247 continue;
248
249 if (sd->entry->collapse)
250 sort__need_collapse = 1;
251
252 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
253 sd->taken = 1;
254 68
69 priv->hist = malloc(size);
70 if (priv->hist)
71 memset(priv->hist, 0, size);
255 return 0; 72 return 0;
256 } 73 }
257 74 /*
258 return -ESRCH; 75 * FIXME: We should really filter it out, as we don't want to go thru symbols
259} 76 * we're not interested, and if a DSO ends up with no symbols, delete it too,
260 77 * but right now the kernel loading routines in symbol.c bail out if no symbols
261static int64_t 78 * are found, fix it later.
262hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) 79 */
263{ 80 return 0;
264 struct sort_entry *se;
265 int64_t cmp = 0;
266
267 list_for_each_entry(se, &hist_entry__sort_list, list) {
268 cmp = se->cmp(left, right);
269 if (cmp)
270 break;
271 }
272
273 return cmp;
274}
275
276static int64_t
277hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
278{
279 struct sort_entry *se;
280 int64_t cmp = 0;
281
282 list_for_each_entry(se, &hist_entry__sort_list, list) {
283 int64_t (*f)(struct hist_entry *, struct hist_entry *);
284
285 f = se->collapse ?: se->cmp;
286
287 cmp = f(left, right);
288 if (cmp)
289 break;
290 }
291
292 return cmp;
293} 81}
294 82
295/* 83/*
@@ -299,380 +87,81 @@ static void hist_hit(struct hist_entry *he, u64 ip)
299{ 87{
300 unsigned int sym_size, offset; 88 unsigned int sym_size, offset;
301 struct symbol *sym = he->sym; 89 struct symbol *sym = he->sym;
90 struct sym_priv *priv;
91 struct sym_hist *h;
302 92
303 he->count++; 93 he->count++;
304 94
305 if (!sym || !sym->hist) 95 if (!sym || !he->map)
96 return;
97
98 priv = symbol__priv(sym);
99 if (!priv->hist)
306 return; 100 return;
307 101
308 sym_size = sym->end - sym->start; 102 sym_size = sym->end - sym->start;
309 offset = ip - sym->start; 103 offset = ip - sym->start;
310 104
105 if (verbose)
106 fprintf(stderr, "%s: ip=%Lx\n", __func__,
107 he->map->unmap_ip(he->map, ip));
108
311 if (offset >= sym_size) 109 if (offset >= sym_size)
312 return; 110 return;
313 111
314 sym->hist_sum++; 112 h = priv->hist;
315 sym->hist[offset]++; 113 h->sum++;
114 h->ip[offset]++;
316 115
317 if (verbose >= 3) 116 if (verbose >= 3)
318 printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", 117 printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
319 (void *)(unsigned long)he->sym->start, 118 (void *)(unsigned long)he->sym->start,
320 he->sym->name, 119 he->sym->name,
321 (void *)(unsigned long)ip, ip - he->sym->start, 120 (void *)(unsigned long)ip, ip - he->sym->start,
322 sym->hist[offset]); 121 h->ip[offset]);
323} 122}
324 123
325static int 124static int hist_entry__add(struct addr_location *al, u64 count)
326hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
327 struct symbol *sym, u64 ip, char level)
328{ 125{
329 struct rb_node **p = &hist.rb_node; 126 bool hit;
330 struct rb_node *parent = NULL; 127 struct hist_entry *he = __hist_entry__add(al, NULL, count, &hit);
331 struct hist_entry *he; 128 if (he == NULL)
332 struct hist_entry entry = {
333 .thread = thread,
334 .map = map,
335 .dso = dso,
336 .sym = sym,
337 .ip = ip,
338 .level = level,
339 .count = 1,
340 };
341 int cmp;
342
343 while (*p != NULL) {
344 parent = *p;
345 he = rb_entry(parent, struct hist_entry, rb_node);
346
347 cmp = hist_entry__cmp(&entry, he);
348
349 if (!cmp) {
350 hist_hit(he, ip);
351
352 return 0;
353 }
354
355 if (cmp < 0)
356 p = &(*p)->rb_left;
357 else
358 p = &(*p)->rb_right;
359 }
360
361 he = malloc(sizeof(*he));
362 if (!he)
363 return -ENOMEM; 129 return -ENOMEM;
364 *he = entry; 130 hist_hit(he, al->addr);
365 rb_link_node(&he->rb_node, parent, p);
366 rb_insert_color(&he->rb_node, &hist);
367
368 return 0; 131 return 0;
369} 132}
370 133
371static void hist_entry__free(struct hist_entry *he) 134static int process_sample_event(event_t *event)
372{
373 free(he);
374}
375
376/*
377 * collapse the histogram
378 */
379
380static struct rb_root collapse_hists;
381
382static void collapse__insert_entry(struct hist_entry *he)
383{
384 struct rb_node **p = &collapse_hists.rb_node;
385 struct rb_node *parent = NULL;
386 struct hist_entry *iter;
387 int64_t cmp;
388
389 while (*p != NULL) {
390 parent = *p;
391 iter = rb_entry(parent, struct hist_entry, rb_node);
392
393 cmp = hist_entry__collapse(iter, he);
394
395 if (!cmp) {
396 iter->count += he->count;
397 hist_entry__free(he);
398 return;
399 }
400
401 if (cmp < 0)
402 p = &(*p)->rb_left;
403 else
404 p = &(*p)->rb_right;
405 }
406
407 rb_link_node(&he->rb_node, parent, p);
408 rb_insert_color(&he->rb_node, &collapse_hists);
409}
410
411static void collapse__resort(void)
412{
413 struct rb_node *next;
414 struct hist_entry *n;
415
416 if (!sort__need_collapse)
417 return;
418
419 next = rb_first(&hist);
420 while (next) {
421 n = rb_entry(next, struct hist_entry, rb_node);
422 next = rb_next(&n->rb_node);
423
424 rb_erase(&n->rb_node, &hist);
425 collapse__insert_entry(n);
426 }
427}
428
429/*
430 * reverse the map, sort on count.
431 */
432
433static struct rb_root output_hists;
434
435static void output__insert_entry(struct hist_entry *he)
436{ 135{
437 struct rb_node **p = &output_hists.rb_node; 136 struct addr_location al;
438 struct rb_node *parent = NULL;
439 struct hist_entry *iter;
440 137
441 while (*p != NULL) { 138 dump_printf("(IP, %d): %d: %p\n", event->header.misc,
442 parent = *p; 139 event->ip.pid, (void *)(long)event->ip.ip);
443 iter = rb_entry(parent, struct hist_entry, rb_node);
444 140
445 if (he->count > iter->count) 141 if (event__preprocess_sample(event, &al, symbol_filter) < 0) {
446 p = &(*p)->rb_left;
447 else
448 p = &(*p)->rb_right;
449 }
450
451 rb_link_node(&he->rb_node, parent, p);
452 rb_insert_color(&he->rb_node, &output_hists);
453}
454
455static void output__resort(void)
456{
457 struct rb_node *next;
458 struct hist_entry *n;
459 struct rb_root *tree = &hist;
460
461 if (sort__need_collapse)
462 tree = &collapse_hists;
463
464 next = rb_first(tree);
465
466 while (next) {
467 n = rb_entry(next, struct hist_entry, rb_node);
468 next = rb_next(&n->rb_node);
469
470 rb_erase(&n->rb_node, tree);
471 output__insert_entry(n);
472 }
473}
474
475static unsigned long total = 0,
476 total_mmap = 0,
477 total_comm = 0,
478 total_fork = 0,
479 total_unknown = 0;
480
481static int
482process_sample_event(event_t *event, unsigned long offset, unsigned long head)
483{
484 char level;
485 int show = 0;
486 struct dso *dso = NULL;
487 struct thread *thread;
488 u64 ip = event->ip.ip;
489 struct map *map = NULL;
490
491 thread = threads__findnew(event->ip.pid, &threads, &last_match);
492
493 dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
494 (void *)(offset + head),
495 (void *)(long)(event->header.size),
496 event->header.misc,
497 event->ip.pid,
498 (void *)(long)ip);
499
500 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
501
502 if (thread == NULL) {
503 fprintf(stderr, "problem processing %d event, skipping it.\n", 142 fprintf(stderr, "problem processing %d event, skipping it.\n",
504 event->header.type); 143 event->header.type);
505 return -1; 144 return -1;
506 } 145 }
507 146
508 if (event->header.misc & PERF_RECORD_MISC_KERNEL) { 147 if (hist_entry__add(&al, 1)) {
509 show = SHOW_KERNEL; 148 fprintf(stderr, "problem incrementing symbol count, "
510 level = 'k'; 149 "skipping event\n");
511
512 dso = kernel_dso;
513
514 dump_printf(" ...... dso: %s\n", dso->name);
515
516 } else if (event->header.misc & PERF_RECORD_MISC_USER) {
517
518 show = SHOW_USER;
519 level = '.';
520
521 map = thread__find_map(thread, ip);
522 if (map != NULL) {
523 ip = map->map_ip(map, ip);
524 dso = map->dso;
525 } else {
526 /*
527 * If this is outside of all known maps,
528 * and is a negative address, try to look it
529 * up in the kernel dso, as it might be a
530 * vsyscall (which executes in user-mode):
531 */
532 if ((long long)ip < 0)
533 dso = kernel_dso;
534 }
535 dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
536
537 } else {
538 show = SHOW_HV;
539 level = 'H';
540 dump_printf(" ...... dso: [hypervisor]\n");
541 }
542
543 if (show & show_mask) {
544 struct symbol *sym = NULL;
545
546 if (dso)
547 sym = dso->find_symbol(dso, ip);
548
549 if (hist_entry__add(thread, map, dso, sym, ip, level)) {
550 fprintf(stderr,
551 "problem incrementing symbol count, skipping event\n");
552 return -1;
553 }
554 }
555 total++;
556
557 return 0;
558}
559
560static int
561process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
562{
563 struct thread *thread;
564 struct map *map = map__new(&event->mmap, NULL, 0);
565
566 thread = threads__findnew(event->mmap.pid, &threads, &last_match);
567
568 dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
569 (void *)(offset + head),
570 (void *)(long)(event->header.size),
571 event->mmap.pid,
572 (void *)(long)event->mmap.start,
573 (void *)(long)event->mmap.len,
574 (void *)(long)event->mmap.pgoff,
575 event->mmap.filename);
576
577 if (thread == NULL || map == NULL) {
578 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
579 return 0;
580 }
581
582 thread__insert_map(thread, map);
583 total_mmap++;
584
585 return 0;
586}
587
588static int
589process_comm_event(event_t *event, unsigned long offset, unsigned long head)
590{
591 struct thread *thread;
592
593 thread = threads__findnew(event->comm.pid, &threads, &last_match);
594 dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
595 (void *)(offset + head),
596 (void *)(long)(event->header.size),
597 event->comm.comm, event->comm.pid);
598
599 if (thread == NULL ||
600 thread__set_comm(thread, event->comm.comm)) {
601 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
602 return -1;
603 }
604 total_comm++;
605
606 return 0;
607}
608
609static int
610process_fork_event(event_t *event, unsigned long offset, unsigned long head)
611{
612 struct thread *thread;
613 struct thread *parent;
614
615 thread = threads__findnew(event->fork.pid, &threads, &last_match);
616 parent = threads__findnew(event->fork.ppid, &threads, &last_match);
617 dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
618 (void *)(offset + head),
619 (void *)(long)(event->header.size),
620 event->fork.pid, event->fork.ppid);
621
622 /*
623 * A thread clone will have the same PID for both
624 * parent and child.
625 */
626 if (thread == parent)
627 return 0;
628
629 if (!thread || !parent || thread__fork(thread, parent)) {
630 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
631 return -1;
632 }
633 total_fork++;
634
635 return 0;
636}
637
638static int
639process_event(event_t *event, unsigned long offset, unsigned long head)
640{
641 switch (event->header.type) {
642 case PERF_RECORD_SAMPLE:
643 return process_sample_event(event, offset, head);
644
645 case PERF_RECORD_MMAP:
646 return process_mmap_event(event, offset, head);
647
648 case PERF_RECORD_COMM:
649 return process_comm_event(event, offset, head);
650
651 case PERF_RECORD_FORK:
652 return process_fork_event(event, offset, head);
653 /*
654 * We dont process them right now but they are fine:
655 */
656
657 case PERF_RECORD_THROTTLE:
658 case PERF_RECORD_UNTHROTTLE:
659 return 0;
660
661 default:
662 return -1; 150 return -1;
663 } 151 }
664 152
665 return 0; 153 return 0;
666} 154}
667 155
668static int 156static int parse_line(FILE *file, struct hist_entry *he, u64 len)
669parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
670{ 157{
158 struct symbol *sym = he->sym;
671 char *line = NULL, *tmp, *tmp2; 159 char *line = NULL, *tmp, *tmp2;
672 static const char *prev_line; 160 static const char *prev_line;
673 static const char *prev_color; 161 static const char *prev_color;
674 unsigned int offset; 162 unsigned int offset;
675 size_t line_len; 163 size_t line_len;
164 u64 start;
676 s64 line_ip; 165 s64 line_ip;
677 int ret; 166 int ret;
678 char *c; 167 char *c;
@@ -709,22 +198,26 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
709 line_ip = -1; 198 line_ip = -1;
710 } 199 }
711 200
201 start = he->map->unmap_ip(he->map, sym->start);
202
712 if (line_ip != -1) { 203 if (line_ip != -1) {
713 const char *path = NULL; 204 const char *path = NULL;
714 unsigned int hits = 0; 205 unsigned int hits = 0;
715 double percent = 0.0; 206 double percent = 0.0;
716 const char *color; 207 const char *color;
717 struct sym_ext *sym_ext = sym->priv; 208 struct sym_priv *priv = symbol__priv(sym);
209 struct sym_ext *sym_ext = priv->ext;
210 struct sym_hist *h = priv->hist;
718 211
719 offset = line_ip - start; 212 offset = line_ip - start;
720 if (offset < len) 213 if (offset < len)
721 hits = sym->hist[offset]; 214 hits = h->ip[offset];
722 215
723 if (offset < len && sym_ext) { 216 if (offset < len && sym_ext) {
724 path = sym_ext[offset].path; 217 path = sym_ext[offset].path;
725 percent = sym_ext[offset].percent; 218 percent = sym_ext[offset].percent;
726 } else if (sym->hist_sum) 219 } else if (h->sum)
727 percent = 100.0 * hits / sym->hist_sum; 220 percent = 100.0 * hits / h->sum;
728 221
729 color = get_percent_color(percent); 222 color = get_percent_color(percent);
730 223
@@ -777,9 +270,10 @@ static void insert_source_line(struct sym_ext *sym_ext)
777 rb_insert_color(&sym_ext->node, &root_sym_ext); 270 rb_insert_color(&sym_ext->node, &root_sym_ext);
778} 271}
779 272
780static void free_source_line(struct symbol *sym, int len) 273static void free_source_line(struct hist_entry *he, int len)
781{ 274{
782 struct sym_ext *sym_ext = sym->priv; 275 struct sym_priv *priv = symbol__priv(he->sym);
276 struct sym_ext *sym_ext = priv->ext;
783 int i; 277 int i;
784 278
785 if (!sym_ext) 279 if (!sym_ext)
@@ -789,26 +283,30 @@ static void free_source_line(struct symbol *sym, int len)
789 free(sym_ext[i].path); 283 free(sym_ext[i].path);
790 free(sym_ext); 284 free(sym_ext);
791 285
792 sym->priv = NULL; 286 priv->ext = NULL;
793 root_sym_ext = RB_ROOT; 287 root_sym_ext = RB_ROOT;
794} 288}
795 289
796/* Get the filename:line for the colored entries */ 290/* Get the filename:line for the colored entries */
797static void 291static void
798get_source_line(struct symbol *sym, u64 start, int len, const char *filename) 292get_source_line(struct hist_entry *he, int len, const char *filename)
799{ 293{
294 struct symbol *sym = he->sym;
295 u64 start;
800 int i; 296 int i;
801 char cmd[PATH_MAX * 2]; 297 char cmd[PATH_MAX * 2];
802 struct sym_ext *sym_ext; 298 struct sym_ext *sym_ext;
299 struct sym_priv *priv = symbol__priv(sym);
300 struct sym_hist *h = priv->hist;
803 301
804 if (!sym->hist_sum) 302 if (!h->sum)
805 return; 303 return;
806 304
807 sym->priv = calloc(len, sizeof(struct sym_ext)); 305 sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
808 if (!sym->priv) 306 if (!priv->ext)
809 return; 307 return;
810 308
811 sym_ext = sym->priv; 309 start = he->map->unmap_ip(he->map, sym->start);
812 310
813 for (i = 0; i < len; i++) { 311 for (i = 0; i < len; i++) {
814 char *path = NULL; 312 char *path = NULL;
@@ -816,7 +314,7 @@ get_source_line(struct symbol *sym, u64 start, int len, const char *filename)
816 u64 offset; 314 u64 offset;
817 FILE *fp; 315 FILE *fp;
818 316
819 sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; 317 sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
820 if (sym_ext[i].percent <= 0.5) 318 if (sym_ext[i].percent <= 0.5)
821 continue; 319 continue;
822 320
@@ -870,33 +368,34 @@ static void print_summary(const char *filename)
870 } 368 }
871} 369}
872 370
873static void annotate_sym(struct dso *dso, struct symbol *sym) 371static void annotate_sym(struct hist_entry *he)
874{ 372{
875 const char *filename = dso->name, *d_filename; 373 struct map *map = he->map;
876 u64 start, end, len; 374 struct dso *dso = map->dso;
375 struct symbol *sym = he->sym;
376 const char *filename = dso->long_name, *d_filename;
377 u64 len;
877 char command[PATH_MAX*2]; 378 char command[PATH_MAX*2];
878 FILE *file; 379 FILE *file;
879 380
880 if (!filename) 381 if (!filename)
881 return; 382 return;
882 if (sym->module) 383
883 filename = sym->module->path; 384 if (verbose)
884 else if (dso == kernel_dso) 385 fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n",
885 filename = vmlinux_name; 386 __func__, filename, sym->name,
886 387 map->unmap_ip(map, sym->start),
887 start = sym->obj_start; 388 map->unmap_ip(map, sym->end));
888 if (!start) 389
889 start = sym->start;
890 if (full_paths) 390 if (full_paths)
891 d_filename = filename; 391 d_filename = filename;
892 else 392 else
893 d_filename = basename(filename); 393 d_filename = basename(filename);
894 394
895 end = start + sym->end - sym->start + 1;
896 len = sym->end - sym->start; 395 len = sym->end - sym->start;
897 396
898 if (print_line) { 397 if (print_line) {
899 get_source_line(sym, start, len, filename); 398 get_source_line(he, len, filename);
900 print_summary(filename); 399 print_summary(filename);
901 } 400 }
902 401
@@ -905,10 +404,12 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
905 printf("------------------------------------------------\n"); 404 printf("------------------------------------------------\n");
906 405
907 if (verbose >= 2) 406 if (verbose >= 2)
908 printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); 407 printf("annotating [%p] %30s : [%p] %30s\n",
408 dso, dso->long_name, sym, sym->name);
909 409
910 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", 410 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
911 (u64)start, (u64)end, filename, filename); 411 map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end),
412 filename, filename);
912 413
913 if (verbose >= 3) 414 if (verbose >= 3)
914 printf("doing: %s\n", command); 415 printf("doing: %s\n", command);
@@ -918,159 +419,78 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
918 return; 419 return;
919 420
920 while (!feof(file)) { 421 while (!feof(file)) {
921 if (parse_line(file, sym, start, len) < 0) 422 if (parse_line(file, he, len) < 0)
922 break; 423 break;
923 } 424 }
924 425
925 pclose(file); 426 pclose(file);
926 if (print_line) 427 if (print_line)
927 free_source_line(sym, len); 428 free_source_line(he, len);
928} 429}
929 430
930static void find_annotations(void) 431static void find_annotations(void)
931{ 432{
932 struct rb_node *nd; 433 struct rb_node *nd;
933 struct dso *dso;
934 int count = 0;
935
936 list_for_each_entry(dso, &dsos, node) {
937
938 for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
939 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
940
941 if (sym->hist) {
942 annotate_sym(dso, sym);
943 count++;
944 }
945 }
946 }
947
948 if (!count)
949 printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
950}
951
952static int __cmd_annotate(void)
953{
954 int ret, rc = EXIT_FAILURE;
955 unsigned long offset = 0;
956 unsigned long head = 0;
957 struct stat input_stat;
958 event_t *event;
959 uint32_t size;
960 char *buf;
961
962 register_idle_thread(&threads, &last_match);
963
964 input = open(input_name, O_RDONLY);
965 if (input < 0) {
966 perror("failed to open file");
967 exit(-1);
968 }
969
970 ret = fstat(input, &input_stat);
971 if (ret < 0) {
972 perror("failed to stat file");
973 exit(-1);
974 }
975
976 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
977 fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
978 exit(-1);
979 }
980
981 if (!input_stat.st_size) {
982 fprintf(stderr, "zero-sized file, nothing to do!\n");
983 exit(0);
984 }
985
986 if (load_kernel() < 0) {
987 perror("failed to load kernel symbols");
988 return EXIT_FAILURE;
989 }
990
991remap:
992 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
993 MAP_SHARED, input, offset);
994 if (buf == MAP_FAILED) {
995 perror("failed to mmap file");
996 exit(-1);
997 }
998
999more:
1000 event = (event_t *)(buf + head);
1001 434
1002 size = event->header.size; 435 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
1003 if (!size) 436 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
1004 size = 8; 437 struct sym_priv *priv;
1005 438
1006 if (head + event->header.size >= page_size * mmap_window) { 439 if (he->sym == NULL)
1007 unsigned long shift = page_size * (head / page_size); 440 continue;
1008 int munmap_ret;
1009
1010 munmap_ret = munmap(buf, page_size * mmap_window);
1011 assert(munmap_ret == 0);
1012
1013 offset += shift;
1014 head -= shift;
1015 goto remap;
1016 }
1017
1018 size = event->header.size;
1019
1020 dump_printf("%p [%p]: event: %d\n",
1021 (void *)(offset + head),
1022 (void *)(long)event->header.size,
1023 event->header.type);
1024
1025 if (!size || process_event(event, offset, head) < 0) {
1026
1027 dump_printf("%p [%p]: skipping unknown header type: %d\n",
1028 (void *)(offset + head),
1029 (void *)(long)(event->header.size),
1030 event->header.type);
1031 441
1032 total_unknown++; 442 priv = symbol__priv(he->sym);
443 if (priv->hist == NULL)
444 continue;
1033 445
446 annotate_sym(he);
1034 /* 447 /*
1035 * assume we lost track of the stream, check alignment, and 448 * Since we have a hist_entry per IP for the same symbol, free
1036 * increment a single u64 in the hope to catch on again 'soon'. 449 * he->sym->hist to signal we already processed this symbol.
1037 */ 450 */
1038 451 free(priv->hist);
1039 if (unlikely(head & 7)) 452 priv->hist = NULL;
1040 head &= ~7ULL;
1041
1042 size = 8;
1043 } 453 }
454}
1044 455
1045 head += size; 456static struct perf_file_handler file_handler = {
457 .process_sample_event = process_sample_event,
458 .process_mmap_event = event__process_mmap,
459 .process_comm_event = event__process_comm,
460 .process_fork_event = event__process_task,
461};
1046 462
1047 if (offset + head < (unsigned long)input_stat.st_size) 463static int __cmd_annotate(void)
1048 goto more; 464{
465 struct perf_header *header;
466 struct thread *idle;
467 int ret;
1049 468
1050 rc = EXIT_SUCCESS; 469 idle = register_idle_thread();
1051 close(input); 470 register_perf_file_handler(&file_handler);
1052 471
1053 dump_printf(" IP events: %10ld\n", total); 472 ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
1054 dump_printf(" mmap events: %10ld\n", total_mmap); 473 &event__cwdlen, &event__cwd);
1055 dump_printf(" comm events: %10ld\n", total_comm); 474 if (ret)
1056 dump_printf(" fork events: %10ld\n", total_fork); 475 return ret;
1057 dump_printf(" unknown events: %10ld\n", total_unknown);
1058 476
1059 if (dump_trace) 477 if (dump_trace) {
478 event__print_totals();
1060 return 0; 479 return 0;
480 }
1061 481
1062 if (verbose >= 3) 482 if (verbose > 3)
1063 threads__fprintf(stdout, &threads); 483 threads__fprintf(stdout);
1064 484
1065 if (verbose >= 2) 485 if (verbose > 2)
1066 dsos__fprintf(stdout); 486 dsos__fprintf(stdout);
1067 487
1068 collapse__resort(); 488 collapse__resort();
1069 output__resort(); 489 output__resort(event__total[0]);
1070 490
1071 find_annotations(); 491 find_annotations();
1072 492
1073 return rc; 493 return ret;
1074} 494}
1075 495
1076static const char * const annotate_usage[] = { 496static const char * const annotate_usage[] = {
@@ -1088,8 +508,9 @@ static const struct option options[] = {
1088 "be more verbose (show symbol address, etc)"), 508 "be more verbose (show symbol address, etc)"),
1089 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 509 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1090 "dump raw trace in ASCII"), 510 "dump raw trace in ASCII"),
1091 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), 511 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1092 OPT_BOOLEAN('m', "modules", &modules, 512 "file", "vmlinux pathname"),
513 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
1093 "load module symbols - WARNING: use only with -k and LIVE kernel"), 514 "load module symbols - WARNING: use only with -k and LIVE kernel"),
1094 OPT_BOOLEAN('l', "print-line", &print_line, 515 OPT_BOOLEAN('l', "print-line", &print_line,
1095 "print matching source lines (may be slow)"), 516 "print matching source lines (may be slow)"),
@@ -1115,9 +536,8 @@ static void setup_sorting(void)
1115 536
1116int cmd_annotate(int argc, const char **argv, const char *prefix __used) 537int cmd_annotate(int argc, const char **argv, const char *prefix __used)
1117{ 538{
1118 symbol__init(); 539 if (symbol__init(&symbol_conf) < 0)
1119 540 return -1;
1120 page_size = getpagesize();
1121 541
1122 argc = parse_options(argc, argv, options, annotate_usage, 0); 542 argc = parse_options(argc, argv, options, annotate_usage, 0);
1123 543
@@ -1134,10 +554,13 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
1134 sym_hist_filter = argv[0]; 554 sym_hist_filter = argv[0];
1135 } 555 }
1136 556
1137 if (!sym_hist_filter)
1138 usage_with_options(annotate_usage, options);
1139
1140 setup_pager(); 557 setup_pager();
1141 558
559 if (field_sep && *field_sep == '.') {
560 fputs("'.' is the only non valid --field-separator argument\n",
561 stderr);
562 exit(129);
563 }
564
1142 return __cmd_annotate(); 565 return __cmd_annotate();
1143} 566}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
new file mode 100644
index 000000000000..e043eb83092a
--- /dev/null
+++ b/tools/perf/builtin-bench.c
@@ -0,0 +1,196 @@
1/*
2 *
3 * builtin-bench.c
4 *
5 * General benchmarking subsystem provided by perf
6 *
7 * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
8 *
9 */
10
11/*
12 *
13 * Available subsystem list:
14 * sched ... scheduler and IPC mechanism
15 * mem ... memory access performance
16 *
17 */
18
19#include "perf.h"
20#include "util/util.h"
21#include "util/parse-options.h"
22#include "builtin.h"
23#include "bench/bench.h"
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28
29struct bench_suite {
30 const char *name;
31 const char *summary;
32 int (*fn)(int, const char **, const char *);
33};
34
35static struct bench_suite sched_suites[] = {
36 { "messaging",
37 "Benchmark for scheduler and IPC mechanisms",
38 bench_sched_messaging },
39 { "pipe",
40 "Flood of communication over pipe() between two processes",
41 bench_sched_pipe },
42 { NULL,
43 NULL,
44 NULL }
45};
46
47static struct bench_suite mem_suites[] = {
48 { "memcpy",
49 "Simple memory copy in various ways",
50 bench_mem_memcpy },
51 { NULL,
52 NULL,
53 NULL }
54};
55
56struct bench_subsys {
57 const char *name;
58 const char *summary;
59 struct bench_suite *suites;
60};
61
62static struct bench_subsys subsystems[] = {
63 { "sched",
64 "scheduler and IPC mechanism",
65 sched_suites },
66 { "mem",
67 "memory access performance",
68 mem_suites },
69 { NULL,
70 NULL,
71 NULL }
72};
73
74static void dump_suites(int subsys_index)
75{
76 int i;
77
78 printf("List of available suites for %s...\n\n",
79 subsystems[subsys_index].name);
80
81 for (i = 0; subsystems[subsys_index].suites[i].name; i++)
82 printf("\t%s: %s\n",
83 subsystems[subsys_index].suites[i].name,
84 subsystems[subsys_index].suites[i].summary);
85
86 printf("\n");
87 return;
88}
89
90static char *bench_format_str;
91int bench_format = BENCH_FORMAT_DEFAULT;
92
93static const struct option bench_options[] = {
94 OPT_STRING('f', "format", &bench_format_str, "default",
95 "Specify format style"),
96 OPT_END()
97};
98
99static const char * const bench_usage[] = {
100 "perf bench [<common options>] <subsystem> <suite> [<options>]",
101 NULL
102};
103
104static void print_usage(void)
105{
106 int i;
107
108 printf("Usage: \n");
109 for (i = 0; bench_usage[i]; i++)
110 printf("\t%s\n", bench_usage[i]);
111 printf("\n");
112
113 printf("List of available subsystems...\n\n");
114
115 for (i = 0; subsystems[i].name; i++)
116 printf("\t%s: %s\n",
117 subsystems[i].name, subsystems[i].summary);
118 printf("\n");
119}
120
121static int bench_str2int(char *str)
122{
123 if (!str)
124 return BENCH_FORMAT_DEFAULT;
125
126 if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
127 return BENCH_FORMAT_DEFAULT;
128 else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
129 return BENCH_FORMAT_SIMPLE;
130
131 return BENCH_FORMAT_UNKNOWN;
132}
133
134int cmd_bench(int argc, const char **argv, const char *prefix __used)
135{
136 int i, j, status = 0;
137
138 if (argc < 2) {
139 /* No subsystem specified. */
140 print_usage();
141 goto end;
142 }
143
144 argc = parse_options(argc, argv, bench_options, bench_usage,
145 PARSE_OPT_STOP_AT_NON_OPTION);
146
147 bench_format = bench_str2int(bench_format_str);
148 if (bench_format == BENCH_FORMAT_UNKNOWN) {
149 printf("Unknown format descriptor:%s\n", bench_format_str);
150 goto end;
151 }
152
153 if (argc < 1) {
154 print_usage();
155 goto end;
156 }
157
158 for (i = 0; subsystems[i].name; i++) {
159 if (strcmp(subsystems[i].name, argv[0]))
160 continue;
161
162 if (argc < 2) {
163 /* No suite specified. */
164 dump_suites(i);
165 goto end;
166 }
167
168 for (j = 0; subsystems[i].suites[j].name; j++) {
169 if (strcmp(subsystems[i].suites[j].name, argv[1]))
170 continue;
171
172 if (bench_format == BENCH_FORMAT_DEFAULT)
173 printf("# Running %s/%s benchmark...\n",
174 subsystems[i].name,
175 subsystems[i].suites[j].name);
176 status = subsystems[i].suites[j].fn(argc - 1,
177 argv + 1, prefix);
178 goto end;
179 }
180
181 if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
182 dump_suites(i);
183 goto end;
184 }
185
186 printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
187 status = 1;
188 goto end;
189 }
190
191 printf("Unknown subsystem:%s\n", argv[0]);
192 status = 1;
193
194end:
195 return status;
196}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
new file mode 100644
index 000000000000..7dee9d19ab7a
--- /dev/null
+++ b/tools/perf/builtin-buildid-list.c
@@ -0,0 +1,116 @@
1/*
2 * builtin-buildid-list.c
3 *
4 * Builtin buildid-list command: list buildids in perf.data
5 *
6 * Copyright (C) 2009, Red Hat Inc.
7 * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com>
8 */
9#include "builtin.h"
10#include "perf.h"
11#include "util/cache.h"
12#include "util/data_map.h"
13#include "util/debug.h"
14#include "util/header.h"
15#include "util/parse-options.h"
16#include "util/symbol.h"
17
18static char const *input_name = "perf.data";
19static int force;
20
21static const char *const buildid_list_usage[] = {
22 "perf report [<options>]",
23 NULL
24};
25
26static const struct option options[] = {
27 OPT_STRING('i', "input", &input_name, "file",
28 "input file name"),
29 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
30 OPT_BOOLEAN('v', "verbose", &verbose,
31 "be more verbose"),
32 OPT_END()
33};
34
35static int perf_file_section__process_buildids(struct perf_file_section *self,
36 int feat, int fd)
37{
38 if (feat != HEADER_BUILD_ID)
39 return 0;
40
41 if (lseek(fd, self->offset, SEEK_SET) < 0) {
42 pr_warning("Failed to lseek to %Ld offset for buildids!\n",
43 self->offset);
44 return -1;
45 }
46
47 if (perf_header__read_build_ids(fd, self->offset, self->size)) {
48 pr_warning("Failed to read buildids!\n");
49 return -1;
50 }
51
52 return 0;
53}
54
55static int __cmd_buildid_list(void)
56{
57 int err = -1;
58 struct perf_header *header;
59 struct perf_file_header f_header;
60 struct stat input_stat;
61 int input = open(input_name, O_RDONLY);
62
63 if (input < 0) {
64 pr_err("failed to open file: %s", input_name);
65 if (!strcmp(input_name, "perf.data"))
66 pr_err(" (try 'perf record' first)");
67 pr_err("\n");
68 goto out;
69 }
70
71 err = fstat(input, &input_stat);
72 if (err < 0) {
73 perror("failed to stat file");
74 goto out_close;
75 }
76
77 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
78 pr_err("file %s not owned by current user or root\n",
79 input_name);
80 goto out_close;
81 }
82
83 if (!input_stat.st_size) {
84 pr_info("zero-sized file, nothing to do!\n");
85 goto out_close;
86 }
87
88 err = -1;
89 header = perf_header__new();
90 if (header == NULL)
91 goto out_close;
92
93 if (perf_file_header__read(&f_header, header, input) < 0) {
94 pr_warning("incompatible file format");
95 goto out_close;
96 }
97
98 err = perf_header__process_sections(header, input,
99 perf_file_section__process_buildids);
100
101 if (err < 0)
102 goto out_close;
103
104 dsos__fprintf_buildid(stdout);
105out_close:
106 close(input);
107out:
108 return err;
109}
110
111int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
112{
113 argc = parse_options(argc, argv, options, buildid_list_usage, 0);
114 setup_pager();
115 return __cmd_buildid_list();
116}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 4fb8734a796e..9f810b17c25c 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -61,8 +61,7 @@ static const char *get_man_viewer_info(const char *name)
61{ 61{
62 struct man_viewer_info_list *viewer; 62 struct man_viewer_info_list *viewer;
63 63
64 for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) 64 for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) {
65 {
66 if (!strcasecmp(name, viewer->name)) 65 if (!strcasecmp(name, viewer->name))
67 return viewer->info; 66 return viewer->info;
68 } 67 }
@@ -115,7 +114,7 @@ static int check_emacsclient_version(void)
115 return 0; 114 return 0;
116} 115}
117 116
118static void exec_woman_emacs(const char* path, const char *page) 117static void exec_woman_emacs(const char *path, const char *page)
119{ 118{
120 if (!check_emacsclient_version()) { 119 if (!check_emacsclient_version()) {
121 /* This works only with emacsclient version >= 22. */ 120 /* This works only with emacsclient version >= 22. */
@@ -129,7 +128,7 @@ static void exec_woman_emacs(const char* path, const char *page)
129 } 128 }
130} 129}
131 130
132static void exec_man_konqueror(const char* path, const char *page) 131static void exec_man_konqueror(const char *path, const char *page)
133{ 132{
134 const char *display = getenv("DISPLAY"); 133 const char *display = getenv("DISPLAY");
135 if (display && *display) { 134 if (display && *display) {
@@ -157,7 +156,7 @@ static void exec_man_konqueror(const char* path, const char *page)
157 } 156 }
158} 157}
159 158
160static void exec_man_man(const char* path, const char *page) 159static void exec_man_man(const char *path, const char *page)
161{ 160{
162 if (!path) 161 if (!path)
163 path = "man"; 162 path = "man";
@@ -180,7 +179,7 @@ static void add_man_viewer(const char *name)
180 179
181 while (*p) 180 while (*p)
182 p = &((*p)->next); 181 p = &((*p)->next);
183 *p = calloc(1, (sizeof(**p) + len + 1)); 182 *p = zalloc(sizeof(**p) + len + 1);
184 strncpy((*p)->name, name, len); 183 strncpy((*p)->name, name, len);
185} 184}
186 185
@@ -195,7 +194,7 @@ static void do_add_man_viewer_info(const char *name,
195 size_t len, 194 size_t len,
196 const char *value) 195 const char *value)
197{ 196{
198 struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); 197 struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
199 198
200 strncpy(new->name, name, len); 199 strncpy(new->name, name, len);
201 new->info = strdup(value); 200 new->info = strdup(value);
@@ -364,9 +363,8 @@ static void show_man_page(const char *perf_cmd)
364 363
365 setup_man_path(); 364 setup_man_path();
366 for (viewer = man_viewer_list; viewer; viewer = viewer->next) 365 for (viewer = man_viewer_list; viewer; viewer = viewer->next)
367 {
368 exec_viewer(viewer->name, page); /* will return when unable */ 366 exec_viewer(viewer->name, page); /* will return when unable */
369 } 367
370 if (fallback) 368 if (fallback)
371 exec_viewer(fallback, page); 369 exec_viewer(fallback, page);
372 exec_viewer("man", page); 370 exec_viewer("man", page);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
new file mode 100644
index 000000000000..047fef74bd52
--- /dev/null
+++ b/tools/perf/builtin-kmem.c
@@ -0,0 +1,807 @@
1#include "builtin.h"
2#include "perf.h"
3
4#include "util/util.h"
5#include "util/cache.h"
6#include "util/symbol.h"
7#include "util/thread.h"
8#include "util/header.h"
9
10#include "util/parse-options.h"
11#include "util/trace-event.h"
12
13#include "util/debug.h"
14#include "util/data_map.h"
15
16#include <linux/rbtree.h>
17
18struct alloc_stat;
19typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
20
21static char const *input_name = "perf.data";
22
23static struct perf_header *header;
24static u64 sample_type;
25
26static int alloc_flag;
27static int caller_flag;
28
29static int alloc_lines = -1;
30static int caller_lines = -1;
31
32static bool raw_ip;
33
34static char default_sort_order[] = "frag,hit,bytes";
35
36static int *cpunode_map;
37static int max_cpu_num;
38
39struct alloc_stat {
40 u64 call_site;
41 u64 ptr;
42 u64 bytes_req;
43 u64 bytes_alloc;
44 u32 hit;
45 u32 pingpong;
46
47 short alloc_cpu;
48
49 struct rb_node node;
50};
51
52static struct rb_root root_alloc_stat;
53static struct rb_root root_alloc_sorted;
54static struct rb_root root_caller_stat;
55static struct rb_root root_caller_sorted;
56
57static unsigned long total_requested, total_allocated;
58static unsigned long nr_allocs, nr_cross_allocs;
59
60struct raw_event_sample {
61 u32 size;
62 char data[0];
63};
64
65#define PATH_SYS_NODE "/sys/devices/system/node"
66
67static void init_cpunode_map(void)
68{
69 FILE *fp;
70 int i;
71
72 fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
73 if (!fp) {
74 max_cpu_num = 4096;
75 return;
76 }
77
78 if (fscanf(fp, "%d", &max_cpu_num) < 1)
79 die("Failed to read 'kernel_max' from sysfs");
80 max_cpu_num++;
81
82 cpunode_map = calloc(max_cpu_num, sizeof(int));
83 if (!cpunode_map)
84 die("calloc");
85 for (i = 0; i < max_cpu_num; i++)
86 cpunode_map[i] = -1;
87 fclose(fp);
88}
89
90static void setup_cpunode_map(void)
91{
92 struct dirent *dent1, *dent2;
93 DIR *dir1, *dir2;
94 unsigned int cpu, mem;
95 char buf[PATH_MAX];
96
97 init_cpunode_map();
98
99 dir1 = opendir(PATH_SYS_NODE);
100 if (!dir1)
101 return;
102
103 while (true) {
104 dent1 = readdir(dir1);
105 if (!dent1)
106 break;
107
108 if (sscanf(dent1->d_name, "node%u", &mem) < 1)
109 continue;
110
111 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
112 dir2 = opendir(buf);
113 if (!dir2)
114 continue;
115 while (true) {
116 dent2 = readdir(dir2);
117 if (!dent2)
118 break;
119 if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
120 continue;
121 cpunode_map[cpu] = mem;
122 }
123 }
124}
125
126static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
127 int bytes_req, int bytes_alloc, int cpu)
128{
129 struct rb_node **node = &root_alloc_stat.rb_node;
130 struct rb_node *parent = NULL;
131 struct alloc_stat *data = NULL;
132
133 while (*node) {
134 parent = *node;
135 data = rb_entry(*node, struct alloc_stat, node);
136
137 if (ptr > data->ptr)
138 node = &(*node)->rb_right;
139 else if (ptr < data->ptr)
140 node = &(*node)->rb_left;
141 else
142 break;
143 }
144
145 if (data && data->ptr == ptr) {
146 data->hit++;
147 data->bytes_req += bytes_req;
148 data->bytes_alloc += bytes_req;
149 } else {
150 data = malloc(sizeof(*data));
151 if (!data)
152 die("malloc");
153 data->ptr = ptr;
154 data->pingpong = 0;
155 data->hit = 1;
156 data->bytes_req = bytes_req;
157 data->bytes_alloc = bytes_alloc;
158
159 rb_link_node(&data->node, parent, node);
160 rb_insert_color(&data->node, &root_alloc_stat);
161 }
162 data->call_site = call_site;
163 data->alloc_cpu = cpu;
164}
165
166static void insert_caller_stat(unsigned long call_site,
167 int bytes_req, int bytes_alloc)
168{
169 struct rb_node **node = &root_caller_stat.rb_node;
170 struct rb_node *parent = NULL;
171 struct alloc_stat *data = NULL;
172
173 while (*node) {
174 parent = *node;
175 data = rb_entry(*node, struct alloc_stat, node);
176
177 if (call_site > data->call_site)
178 node = &(*node)->rb_right;
179 else if (call_site < data->call_site)
180 node = &(*node)->rb_left;
181 else
182 break;
183 }
184
185 if (data && data->call_site == call_site) {
186 data->hit++;
187 data->bytes_req += bytes_req;
188 data->bytes_alloc += bytes_req;
189 } else {
190 data = malloc(sizeof(*data));
191 if (!data)
192 die("malloc");
193 data->call_site = call_site;
194 data->pingpong = 0;
195 data->hit = 1;
196 data->bytes_req = bytes_req;
197 data->bytes_alloc = bytes_alloc;
198
199 rb_link_node(&data->node, parent, node);
200 rb_insert_color(&data->node, &root_caller_stat);
201 }
202}
203
204static void process_alloc_event(struct raw_event_sample *raw,
205 struct event *event,
206 int cpu,
207 u64 timestamp __used,
208 struct thread *thread __used,
209 int node)
210{
211 unsigned long call_site;
212 unsigned long ptr;
213 int bytes_req;
214 int bytes_alloc;
215 int node1, node2;
216
217 ptr = raw_field_value(event, "ptr", raw->data);
218 call_site = raw_field_value(event, "call_site", raw->data);
219 bytes_req = raw_field_value(event, "bytes_req", raw->data);
220 bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
221
222 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
223 insert_caller_stat(call_site, bytes_req, bytes_alloc);
224
225 total_requested += bytes_req;
226 total_allocated += bytes_alloc;
227
228 if (node) {
229 node1 = cpunode_map[cpu];
230 node2 = raw_field_value(event, "node", raw->data);
231 if (node1 != node2)
232 nr_cross_allocs++;
233 }
234 nr_allocs++;
235}
236
237static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
238static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
239
240static struct alloc_stat *search_alloc_stat(unsigned long ptr,
241 unsigned long call_site,
242 struct rb_root *root,
243 sort_fn_t sort_fn)
244{
245 struct rb_node *node = root->rb_node;
246 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
247
248 while (node) {
249 struct alloc_stat *data;
250 int cmp;
251
252 data = rb_entry(node, struct alloc_stat, node);
253
254 cmp = sort_fn(&key, data);
255 if (cmp < 0)
256 node = node->rb_left;
257 else if (cmp > 0)
258 node = node->rb_right;
259 else
260 return data;
261 }
262 return NULL;
263}
264
265static void process_free_event(struct raw_event_sample *raw,
266 struct event *event,
267 int cpu,
268 u64 timestamp __used,
269 struct thread *thread __used)
270{
271 unsigned long ptr;
272 struct alloc_stat *s_alloc, *s_caller;
273
274 ptr = raw_field_value(event, "ptr", raw->data);
275
276 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
277 if (!s_alloc)
278 return;
279
280 if (cpu != s_alloc->alloc_cpu) {
281 s_alloc->pingpong++;
282
283 s_caller = search_alloc_stat(0, s_alloc->call_site,
284 &root_caller_stat, callsite_cmp);
285 assert(s_caller);
286 s_caller->pingpong++;
287 }
288 s_alloc->alloc_cpu = -1;
289}
290
291static void
292process_raw_event(event_t *raw_event __used, void *more_data,
293 int cpu, u64 timestamp, struct thread *thread)
294{
295 struct raw_event_sample *raw = more_data;
296 struct event *event;
297 int type;
298
299 type = trace_parse_common_type(raw->data);
300 event = trace_find_event(type);
301
302 if (!strcmp(event->name, "kmalloc") ||
303 !strcmp(event->name, "kmem_cache_alloc")) {
304 process_alloc_event(raw, event, cpu, timestamp, thread, 0);
305 return;
306 }
307
308 if (!strcmp(event->name, "kmalloc_node") ||
309 !strcmp(event->name, "kmem_cache_alloc_node")) {
310 process_alloc_event(raw, event, cpu, timestamp, thread, 1);
311 return;
312 }
313
314 if (!strcmp(event->name, "kfree") ||
315 !strcmp(event->name, "kmem_cache_free")) {
316 process_free_event(raw, event, cpu, timestamp, thread);
317 return;
318 }
319}
320
321static int process_sample_event(event_t *event)
322{
323 u64 ip = event->ip.ip;
324 u64 timestamp = -1;
325 u32 cpu = -1;
326 u64 period = 1;
327 void *more_data = event->ip.__more_data;
328 struct thread *thread = threads__findnew(event->ip.pid);
329
330 if (sample_type & PERF_SAMPLE_TIME) {
331 timestamp = *(u64 *)more_data;
332 more_data += sizeof(u64);
333 }
334
335 if (sample_type & PERF_SAMPLE_CPU) {
336 cpu = *(u32 *)more_data;
337 more_data += sizeof(u32);
338 more_data += sizeof(u32); /* reserved */
339 }
340
341 if (sample_type & PERF_SAMPLE_PERIOD) {
342 period = *(u64 *)more_data;
343 more_data += sizeof(u64);
344 }
345
346 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
347 event->header.misc,
348 event->ip.pid, event->ip.tid,
349 (void *)(long)ip,
350 (long long)period);
351
352 if (thread == NULL) {
353 pr_debug("problem processing %d event, skipping it.\n",
354 event->header.type);
355 return -1;
356 }
357
358 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
359
360 process_raw_event(event, more_data, cpu, timestamp, thread);
361
362 return 0;
363}
364
365static int sample_type_check(u64 type)
366{
367 sample_type = type;
368
369 if (!(sample_type & PERF_SAMPLE_RAW)) {
370 fprintf(stderr,
371 "No trace sample to read. Did you call perf record "
372 "without -R?");
373 return -1;
374 }
375
376 return 0;
377}
378
379static struct perf_file_handler file_handler = {
380 .process_sample_event = process_sample_event,
381 .process_comm_event = event__process_comm,
382 .sample_type_check = sample_type_check,
383};
384
385static int read_events(void)
386{
387 register_idle_thread();
388 register_perf_file_handler(&file_handler);
389
390 return mmap_dispatch_perf_file(&header, input_name, 0, 0,
391 &event__cwdlen, &event__cwd);
392}
393
394static double fragmentation(unsigned long n_req, unsigned long n_alloc)
395{
396 if (n_alloc == 0)
397 return 0.0;
398 else
399 return 100.0 - (100.0 * n_req / n_alloc);
400}
401
402static void __print_result(struct rb_root *root, int n_lines, int is_caller)
403{
404 struct rb_node *next;
405
406 printf("%.102s\n", graph_dotted_line);
407 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
408 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
409 printf("%.102s\n", graph_dotted_line);
410
411 next = rb_first(root);
412
413 while (next && n_lines--) {
414 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
415 node);
416 struct symbol *sym = NULL;
417 char buf[BUFSIZ];
418 u64 addr;
419
420 if (is_caller) {
421 addr = data->call_site;
422 if (!raw_ip)
423 sym = thread__find_function(kthread, addr, NULL);
424 } else
425 addr = data->ptr;
426
427 if (sym != NULL)
428 snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
429 addr - sym->start);
430 else
431 snprintf(buf, sizeof(buf), "%#Lx", addr);
432 printf(" %-34s |", buf);
433
434 printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
435 (unsigned long long)data->bytes_alloc,
436 (unsigned long)data->bytes_alloc / data->hit,
437 (unsigned long long)data->bytes_req,
438 (unsigned long)data->bytes_req / data->hit,
439 (unsigned long)data->hit,
440 (unsigned long)data->pingpong,
441 fragmentation(data->bytes_req, data->bytes_alloc));
442
443 next = rb_next(next);
444 }
445
446 if (n_lines == -1)
447 printf(" ... | ... | ... | ... | ... | ... \n");
448
449 printf("%.102s\n", graph_dotted_line);
450}
451
452static void print_summary(void)
453{
454 printf("\nSUMMARY\n=======\n");
455 printf("Total bytes requested: %lu\n", total_requested);
456 printf("Total bytes allocated: %lu\n", total_allocated);
457 printf("Total bytes wasted on internal fragmentation: %lu\n",
458 total_allocated - total_requested);
459 printf("Internal fragmentation: %f%%\n",
460 fragmentation(total_requested, total_allocated));
461 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
462}
463
464static void print_result(void)
465{
466 if (caller_flag)
467 __print_result(&root_caller_sorted, caller_lines, 1);
468 if (alloc_flag)
469 __print_result(&root_alloc_sorted, alloc_lines, 0);
470 print_summary();
471}
472
473struct sort_dimension {
474 const char name[20];
475 sort_fn_t cmp;
476 struct list_head list;
477};
478
479static LIST_HEAD(caller_sort);
480static LIST_HEAD(alloc_sort);
481
482static void sort_insert(struct rb_root *root, struct alloc_stat *data,
483 struct list_head *sort_list)
484{
485 struct rb_node **new = &(root->rb_node);
486 struct rb_node *parent = NULL;
487 struct sort_dimension *sort;
488
489 while (*new) {
490 struct alloc_stat *this;
491 int cmp = 0;
492
493 this = rb_entry(*new, struct alloc_stat, node);
494 parent = *new;
495
496 list_for_each_entry(sort, sort_list, list) {
497 cmp = sort->cmp(data, this);
498 if (cmp)
499 break;
500 }
501
502 if (cmp > 0)
503 new = &((*new)->rb_left);
504 else
505 new = &((*new)->rb_right);
506 }
507
508 rb_link_node(&data->node, parent, new);
509 rb_insert_color(&data->node, root);
510}
511
512static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
513 struct list_head *sort_list)
514{
515 struct rb_node *node;
516 struct alloc_stat *data;
517
518 for (;;) {
519 node = rb_first(root);
520 if (!node)
521 break;
522
523 rb_erase(node, root);
524 data = rb_entry(node, struct alloc_stat, node);
525 sort_insert(root_sorted, data, sort_list);
526 }
527}
528
529static void sort_result(void)
530{
531 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
532 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
533}
534
535static int __cmd_kmem(void)
536{
537 setup_pager();
538 read_events();
539 sort_result();
540 print_result();
541
542 return 0;
543}
544
545static const char * const kmem_usage[] = {
546 "perf kmem [<options>] {record}",
547 NULL
548};
549
550static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
551{
552 if (l->ptr < r->ptr)
553 return -1;
554 else if (l->ptr > r->ptr)
555 return 1;
556 return 0;
557}
558
559static struct sort_dimension ptr_sort_dimension = {
560 .name = "ptr",
561 .cmp = ptr_cmp,
562};
563
564static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
565{
566 if (l->call_site < r->call_site)
567 return -1;
568 else if (l->call_site > r->call_site)
569 return 1;
570 return 0;
571}
572
573static struct sort_dimension callsite_sort_dimension = {
574 .name = "callsite",
575 .cmp = callsite_cmp,
576};
577
578static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
579{
580 if (l->hit < r->hit)
581 return -1;
582 else if (l->hit > r->hit)
583 return 1;
584 return 0;
585}
586
587static struct sort_dimension hit_sort_dimension = {
588 .name = "hit",
589 .cmp = hit_cmp,
590};
591
592static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
593{
594 if (l->bytes_alloc < r->bytes_alloc)
595 return -1;
596 else if (l->bytes_alloc > r->bytes_alloc)
597 return 1;
598 return 0;
599}
600
601static struct sort_dimension bytes_sort_dimension = {
602 .name = "bytes",
603 .cmp = bytes_cmp,
604};
605
606static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
607{
608 double x, y;
609
610 x = fragmentation(l->bytes_req, l->bytes_alloc);
611 y = fragmentation(r->bytes_req, r->bytes_alloc);
612
613 if (x < y)
614 return -1;
615 else if (x > y)
616 return 1;
617 return 0;
618}
619
620static struct sort_dimension frag_sort_dimension = {
621 .name = "frag",
622 .cmp = frag_cmp,
623};
624
625static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
626{
627 if (l->pingpong < r->pingpong)
628 return -1;
629 else if (l->pingpong > r->pingpong)
630 return 1;
631 return 0;
632}
633
634static struct sort_dimension pingpong_sort_dimension = {
635 .name = "pingpong",
636 .cmp = pingpong_cmp,
637};
638
639static struct sort_dimension *avail_sorts[] = {
640 &ptr_sort_dimension,
641 &callsite_sort_dimension,
642 &hit_sort_dimension,
643 &bytes_sort_dimension,
644 &frag_sort_dimension,
645 &pingpong_sort_dimension,
646};
647
648#define NUM_AVAIL_SORTS \
649 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
650
651static int sort_dimension__add(const char *tok, struct list_head *list)
652{
653 struct sort_dimension *sort;
654 int i;
655
656 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
657 if (!strcmp(avail_sorts[i]->name, tok)) {
658 sort = malloc(sizeof(*sort));
659 if (!sort)
660 die("malloc");
661 memcpy(sort, avail_sorts[i], sizeof(*sort));
662 list_add_tail(&sort->list, list);
663 return 0;
664 }
665 }
666
667 return -1;
668}
669
670static int setup_sorting(struct list_head *sort_list, const char *arg)
671{
672 char *tok;
673 char *str = strdup(arg);
674
675 if (!str)
676 die("strdup");
677
678 while (true) {
679 tok = strsep(&str, ",");
680 if (!tok)
681 break;
682 if (sort_dimension__add(tok, sort_list) < 0) {
683 error("Unknown --sort key: '%s'", tok);
684 return -1;
685 }
686 }
687
688 free(str);
689 return 0;
690}
691
692static int parse_sort_opt(const struct option *opt __used,
693 const char *arg, int unset __used)
694{
695 if (!arg)
696 return -1;
697
698 if (caller_flag > alloc_flag)
699 return setup_sorting(&caller_sort, arg);
700 else
701 return setup_sorting(&alloc_sort, arg);
702
703 return 0;
704}
705
706static int parse_stat_opt(const struct option *opt __used,
707 const char *arg, int unset __used)
708{
709 if (!arg)
710 return -1;
711
712 if (strcmp(arg, "alloc") == 0)
713 alloc_flag = (caller_flag + 1);
714 else if (strcmp(arg, "caller") == 0)
715 caller_flag = (alloc_flag + 1);
716 else
717 return -1;
718 return 0;
719}
720
721static int parse_line_opt(const struct option *opt __used,
722 const char *arg, int unset __used)
723{
724 int lines;
725
726 if (!arg)
727 return -1;
728
729 lines = strtoul(arg, NULL, 10);
730
731 if (caller_flag > alloc_flag)
732 caller_lines = lines;
733 else
734 alloc_lines = lines;
735
736 return 0;
737}
738
739static const struct option kmem_options[] = {
740 OPT_STRING('i', "input", &input_name, "file",
741 "input file name"),
742 OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
743 "stat selector, Pass 'alloc' or 'caller'.",
744 parse_stat_opt),
745 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
746 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
747 parse_sort_opt),
748 OPT_CALLBACK('l', "line", NULL, "num",
749 "show n lins",
750 parse_line_opt),
751 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
752 OPT_END()
753};
754
755static const char *record_args[] = {
756 "record",
757 "-a",
758 "-R",
759 "-M",
760 "-f",
761 "-c", "1",
762 "-e", "kmem:kmalloc",
763 "-e", "kmem:kmalloc_node",
764 "-e", "kmem:kfree",
765 "-e", "kmem:kmem_cache_alloc",
766 "-e", "kmem:kmem_cache_alloc_node",
767 "-e", "kmem:kmem_cache_free",
768};
769
770static int __cmd_record(int argc, const char **argv)
771{
772 unsigned int rec_argc, i, j;
773 const char **rec_argv;
774
775 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
776 rec_argv = calloc(rec_argc + 1, sizeof(char *));
777
778 for (i = 0; i < ARRAY_SIZE(record_args); i++)
779 rec_argv[i] = strdup(record_args[i]);
780
781 for (j = 1; j < (unsigned int)argc; j++, i++)
782 rec_argv[i] = argv[j];
783
784 return cmd_record(i, rec_argv, NULL);
785}
786
787int cmd_kmem(int argc, const char **argv, const char *prefix __used)
788{
789 symbol__init(0);
790
791 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
792
793 if (argc && !strncmp(argv[0], "rec", 3))
794 return __cmd_record(argc, argv);
795 else if (argc)
796 usage_with_options(kmem_usage, kmem_options);
797
798 if (list_empty(&caller_sort))
799 setup_sorting(&caller_sort, default_sort_order);
800 if (list_empty(&alloc_sort))
801 setup_sorting(&alloc_sort, default_sort_order);
802
803 setup_cpunode_map();
804
805 return __cmd_kmem();
806}
807
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
new file mode 100644
index 000000000000..a58e11b7ea80
--- /dev/null
+++ b/tools/perf/builtin-probe.c
@@ -0,0 +1,242 @@
1/*
2 * builtin-probe.c
3 *
4 * Builtin probe command: Set up probe events by C expression
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#define _GNU_SOURCE
24#include <sys/utsname.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <fcntl.h>
28#include <errno.h>
29#include <stdio.h>
30#include <unistd.h>
31#include <stdlib.h>
32#include <string.h>
33
34#undef _GNU_SOURCE
35#include "perf.h"
36#include "builtin.h"
37#include "util/util.h"
38#include "util/event.h"
39#include "util/debug.h"
40#include "util/parse-options.h"
41#include "util/parse-events.h" /* For debugfs_path */
42#include "util/probe-finder.h"
43#include "util/probe-event.h"
44
45/* Default vmlinux search paths */
46#define NR_SEARCH_PATH 3
47const char *default_search_path[NR_SEARCH_PATH] = {
48"/lib/modules/%s/build/vmlinux", /* Custom build kernel */
49"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */
50"/boot/vmlinux-debug-%s", /* Ubuntu */
51};
52
53#define MAX_PATH_LEN 256
54#define MAX_PROBES 128
55
56/* Session management structure */
57static struct {
58 char *vmlinux;
59 char *release;
60 int need_dwarf;
61 int nr_probe;
62 struct probe_point probes[MAX_PROBES];
63} session;
64
65static bool listing;
66
67/* Parse an event definition. Note that any error must die. */
68static void parse_probe_event(const char *str)
69{
70 struct probe_point *pp = &session.probes[session.nr_probe];
71
72 pr_debug("probe-definition(%d): %s\n", session.nr_probe, str);
73 if (++session.nr_probe == MAX_PROBES)
74 die("Too many probes (> %d) are specified.", MAX_PROBES);
75
76 /* Parse perf-probe event into probe_point */
77 session.need_dwarf = parse_perf_probe_event(str, pp);
78
79 pr_debug("%d arguments\n", pp->nr_args);
80}
81
82static int opt_add_probe_event(const struct option *opt __used,
83 const char *str, int unset __used)
84{
85 if (str)
86 parse_probe_event(str);
87 return 0;
88}
89
90#ifndef NO_LIBDWARF
91static int open_default_vmlinux(void)
92{
93 struct utsname uts;
94 char fname[MAX_PATH_LEN];
95 int fd, ret, i;
96
97 ret = uname(&uts);
98 if (ret) {
99 pr_debug("uname() failed.\n");
100 return -errno;
101 }
102 session.release = uts.release;
103 for (i = 0; i < NR_SEARCH_PATH; i++) {
104 ret = snprintf(fname, MAX_PATH_LEN,
105 default_search_path[i], session.release);
106 if (ret >= MAX_PATH_LEN || ret < 0) {
107 pr_debug("Filename(%d,%s) is too long.\n", i,
108 uts.release);
109 errno = E2BIG;
110 return -E2BIG;
111 }
112 pr_debug("try to open %s\n", fname);
113 fd = open(fname, O_RDONLY);
114 if (fd >= 0)
115 break;
116 }
117 return fd;
118}
119#endif
120
121static const char * const probe_usage[] = {
122 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
123 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
124 "perf probe --list",
125 NULL
126};
127
128static const struct option options[] = {
129 OPT_BOOLEAN('v', "verbose", &verbose,
130 "be more verbose (show parsed arguments, etc)"),
131#ifndef NO_LIBDWARF
132 OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
133 "vmlinux/module pathname"),
134#endif
135 OPT_BOOLEAN('l', "list", &listing, "list up current probes"),
136 OPT_CALLBACK('a', "add", NULL,
137#ifdef NO_LIBDWARF
138 "FUNC[+OFFS|%return] [ARG ...]",
139#else
140 "FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]",
141#endif
142 "probe point definition, where\n"
143 "\t\tGRP:\tGroup name (optional)\n"
144 "\t\tNAME:\tEvent name\n"
145 "\t\tFUNC:\tFunction name\n"
146 "\t\tOFFS:\tOffset from function entry (in byte)\n"
147 "\t\t%return:\tPut the probe at function return\n"
148#ifdef NO_LIBDWARF
149 "\t\tARG:\tProbe argument (only \n"
150#else
151 "\t\tSRC:\tSource code path\n"
152 "\t\tRLN:\tRelative line number from function entry.\n"
153 "\t\tALN:\tAbsolute line number in file.\n"
154 "\t\tARG:\tProbe argument (local variable name or\n"
155#endif
156 "\t\t\tkprobe-tracer argument format.)\n",
157 opt_add_probe_event),
158 OPT_END()
159};
160
161int cmd_probe(int argc, const char **argv, const char *prefix __used)
162{
163 int i, j, ret;
164#ifndef NO_LIBDWARF
165 int fd;
166#endif
167 struct probe_point *pp;
168
169 argc = parse_options(argc, argv, options, probe_usage,
170 PARSE_OPT_STOP_AT_NON_OPTION);
171 for (i = 0; i < argc; i++)
172 parse_probe_event(argv[i]);
173
174 if ((session.nr_probe == 0 && !listing) ||
175 (session.nr_probe != 0 && listing))
176 usage_with_options(probe_usage, options);
177
178 if (listing) {
179 show_perf_probe_events();
180 return 0;
181 }
182
183 if (session.need_dwarf)
184#ifdef NO_LIBDWARF
185 die("Debuginfo-analysis is not supported");
186#else /* !NO_LIBDWARF */
187 pr_debug("Some probes require debuginfo.\n");
188
189 if (session.vmlinux)
190 fd = open(session.vmlinux, O_RDONLY);
191 else
192 fd = open_default_vmlinux();
193 if (fd < 0) {
194 if (session.need_dwarf)
195 die("Could not open vmlinux/module file.");
196
197 pr_warning("Could not open vmlinux/module file."
198 " Try to use symbols.\n");
199 goto end_dwarf;
200 }
201
202 /* Searching probe points */
203 for (j = 0; j < session.nr_probe; j++) {
204 pp = &session.probes[j];
205 if (pp->found)
206 continue;
207
208 lseek(fd, SEEK_SET, 0);
209 ret = find_probepoint(fd, pp);
210 if (ret < 0) {
211 if (session.need_dwarf)
212 die("Could not analyze debuginfo.");
213
214 pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n");
215 break;
216 }
217 if (ret == 0) /* No error but failed to find probe point. */
218 die("No probe point found.");
219 }
220 close(fd);
221
222end_dwarf:
223#endif /* !NO_LIBDWARF */
224
225 /* Synthesize probes without dwarf */
226 for (j = 0; j < session.nr_probe; j++) {
227 pp = &session.probes[j];
228 if (pp->found) /* This probe is already found. */
229 continue;
230
231 ret = synthesize_trace_kprobe_event(pp);
232 if (ret == -E2BIG)
233 die("probe point definition becomes too long.");
234 else if (ret < 0)
235 die("Failed to synthesize a probe point.");
236 }
237
238 /* Settng up probe points */
239 add_trace_kprobe_events(session.probes, session.nr_probe);
240 return 0;
241}
242
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a4be453fc8a9..0e519c667e3a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -17,55 +17,52 @@
17#include "util/header.h" 17#include "util/header.h"
18#include "util/event.h" 18#include "util/event.h"
19#include "util/debug.h" 19#include "util/debug.h"
20#include "util/trace-event.h" 20#include "util/symbol.h"
21 21
22#include <unistd.h> 22#include <unistd.h>
23#include <sched.h> 23#include <sched.h>
24 24
25#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
26#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
27
28static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 25static int fd[MAX_NR_CPUS][MAX_COUNTERS];
29 26
30static long default_interval = 100000; 27static long default_interval = 0;
31 28
32static int nr_cpus = 0; 29static int nr_cpus = 0;
33static unsigned int page_size; 30static unsigned int page_size;
34static unsigned int mmap_pages = 128; 31static unsigned int mmap_pages = 128;
35static int freq = 0; 32static int freq = 1000;
36static int output; 33static int output;
37static const char *output_name = "perf.data"; 34static const char *output_name = "perf.data";
38static int group = 0; 35static int group = 0;
39static unsigned int realtime_prio = 0; 36static unsigned int realtime_prio = 0;
40static int raw_samples = 0; 37static int raw_samples = 0;
41static int system_wide = 0; 38static int system_wide = 0;
42static int profile_cpu = -1; 39static int profile_cpu = -1;
43static pid_t target_pid = -1; 40static pid_t target_pid = -1;
44static pid_t child_pid = -1; 41static pid_t child_pid = -1;
45static int inherit = 1; 42static int inherit = 1;
46static int force = 0; 43static int force = 0;
47static int append_file = 0; 44static int append_file = 0;
48static int call_graph = 0; 45static int call_graph = 0;
49static int inherit_stat = 0; 46static int inherit_stat = 0;
50static int no_samples = 0; 47static int no_samples = 0;
51static int sample_address = 0; 48static int sample_address = 0;
52static int multiplex = 0; 49static int multiplex = 0;
53static int multiplex_fd = -1; 50static int multiplex_fd = -1;
54 51
55static long samples; 52static long samples = 0;
56static struct timeval last_read; 53static struct timeval last_read;
57static struct timeval this_read; 54static struct timeval this_read;
58 55
59static u64 bytes_written; 56static u64 bytes_written = 0;
60 57
61static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 58static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
62 59
63static int nr_poll; 60static int nr_poll = 0;
64static int nr_cpu; 61static int nr_cpu = 0;
65 62
66static int file_new = 1; 63static int file_new = 1;
67 64
68struct perf_header *header; 65struct perf_header *header = NULL;
69 66
70struct mmap_data { 67struct mmap_data {
71 int counter; 68 int counter;
@@ -113,6 +110,24 @@ static void write_output(void *buf, size_t size)
113 } 110 }
114} 111}
115 112
113static void write_event(event_t *buf, size_t size)
114{
115 /*
116 * Add it to the list of DSOs, so that when we finish this
117 * record session we can pick the available build-ids.
118 */
119 if (buf->header.type == PERF_RECORD_MMAP)
120 dsos__findnew(buf->mmap.filename);
121
122 write_output(buf, size);
123}
124
125static int process_synthesized_event(event_t *event)
126{
127 write_event(event, event->header.size);
128 return 0;
129}
130
116static void mmap_read(struct mmap_data *md) 131static void mmap_read(struct mmap_data *md)
117{ 132{
118 unsigned int head = mmap_read_head(md); 133 unsigned int head = mmap_read_head(md);
@@ -161,14 +176,14 @@ static void mmap_read(struct mmap_data *md)
161 size = md->mask + 1 - (old & md->mask); 176 size = md->mask + 1 - (old & md->mask);
162 old += size; 177 old += size;
163 178
164 write_output(buf, size); 179 write_event(buf, size);
165 } 180 }
166 181
167 buf = &data[old & md->mask]; 182 buf = &data[old & md->mask];
168 size = head - old; 183 size = head - old;
169 old += size; 184 old += size;
170 185
171 write_output(buf, size); 186 write_event(buf, size);
172 187
173 md->prev = old; 188 md->prev = old;
174 mmap_write_tail(md, old); 189 mmap_write_tail(md, old);
@@ -195,168 +210,6 @@ static void sig_atexit(void)
195 kill(getpid(), signr); 210 kill(getpid(), signr);
196} 211}
197 212
198static pid_t pid_synthesize_comm_event(pid_t pid, int full)
199{
200 struct comm_event comm_ev;
201 char filename[PATH_MAX];
202 char bf[BUFSIZ];
203 FILE *fp;
204 size_t size = 0;
205 DIR *tasks;
206 struct dirent dirent, *next;
207 pid_t tgid = 0;
208
209 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
210
211 fp = fopen(filename, "r");
212 if (fp == NULL) {
213 /*
214 * We raced with a task exiting - just return:
215 */
216 if (verbose)
217 fprintf(stderr, "couldn't open %s\n", filename);
218 return 0;
219 }
220
221 memset(&comm_ev, 0, sizeof(comm_ev));
222 while (!comm_ev.comm[0] || !comm_ev.pid) {
223 if (fgets(bf, sizeof(bf), fp) == NULL)
224 goto out_failure;
225
226 if (memcmp(bf, "Name:", 5) == 0) {
227 char *name = bf + 5;
228 while (*name && isspace(*name))
229 ++name;
230 size = strlen(name) - 1;
231 memcpy(comm_ev.comm, name, size++);
232 } else if (memcmp(bf, "Tgid:", 5) == 0) {
233 char *tgids = bf + 5;
234 while (*tgids && isspace(*tgids))
235 ++tgids;
236 tgid = comm_ev.pid = atoi(tgids);
237 }
238 }
239
240 comm_ev.header.type = PERF_RECORD_COMM;
241 size = ALIGN(size, sizeof(u64));
242 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
243
244 if (!full) {
245 comm_ev.tid = pid;
246
247 write_output(&comm_ev, comm_ev.header.size);
248 goto out_fclose;
249 }
250
251 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
252
253 tasks = opendir(filename);
254 while (!readdir_r(tasks, &dirent, &next) && next) {
255 char *end;
256 pid = strtol(dirent.d_name, &end, 10);
257 if (*end)
258 continue;
259
260 comm_ev.tid = pid;
261
262 write_output(&comm_ev, comm_ev.header.size);
263 }
264 closedir(tasks);
265
266out_fclose:
267 fclose(fp);
268 return tgid;
269
270out_failure:
271 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
272 filename);
273 exit(EXIT_FAILURE);
274}
275
276static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
277{
278 char filename[PATH_MAX];
279 FILE *fp;
280
281 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
282
283 fp = fopen(filename, "r");
284 if (fp == NULL) {
285 /*
286 * We raced with a task exiting - just return:
287 */
288 if (verbose)
289 fprintf(stderr, "couldn't open %s\n", filename);
290 return;
291 }
292 while (1) {
293 char bf[BUFSIZ], *pbf = bf;
294 struct mmap_event mmap_ev = {
295 .header = { .type = PERF_RECORD_MMAP },
296 };
297 int n;
298 size_t size;
299 if (fgets(bf, sizeof(bf), fp) == NULL)
300 break;
301
302 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
303 n = hex2u64(pbf, &mmap_ev.start);
304 if (n < 0)
305 continue;
306 pbf += n + 1;
307 n = hex2u64(pbf, &mmap_ev.len);
308 if (n < 0)
309 continue;
310 pbf += n + 3;
311 if (*pbf == 'x') { /* vm_exec */
312 char *execname = strchr(bf, '/');
313
314 /* Catch VDSO */
315 if (execname == NULL)
316 execname = strstr(bf, "[vdso]");
317
318 if (execname == NULL)
319 continue;
320
321 size = strlen(execname);
322 execname[size - 1] = '\0'; /* Remove \n */
323 memcpy(mmap_ev.filename, execname, size);
324 size = ALIGN(size, sizeof(u64));
325 mmap_ev.len -= mmap_ev.start;
326 mmap_ev.header.size = (sizeof(mmap_ev) -
327 (sizeof(mmap_ev.filename) - size));
328 mmap_ev.pid = tgid;
329 mmap_ev.tid = pid;
330
331 write_output(&mmap_ev, mmap_ev.header.size);
332 }
333 }
334
335 fclose(fp);
336}
337
338static void synthesize_all(void)
339{
340 DIR *proc;
341 struct dirent dirent, *next;
342
343 proc = opendir("/proc");
344
345 while (!readdir_r(proc, &dirent, &next) && next) {
346 char *end;
347 pid_t pid, tgid;
348
349 pid = strtol(dirent.d_name, &end, 10);
350 if (*end) /* only interested in proper numerical dirents */
351 continue;
352
353 tgid = pid_synthesize_comm_event(pid, 1);
354 pid_synthesize_mmap_samples(pid, tgid);
355 }
356
357 closedir(proc);
358}
359
360static int group_fd; 213static int group_fd;
361 214
362static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) 215static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
@@ -367,7 +220,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
367 h_attr = header->attr[nr]; 220 h_attr = header->attr[nr];
368 } else { 221 } else {
369 h_attr = perf_header_attr__new(a); 222 h_attr = perf_header_attr__new(a);
370 perf_header__add_attr(header, h_attr); 223 if (h_attr != NULL)
224 if (perf_header__add_attr(header, h_attr) < 0) {
225 perf_header_attr__delete(h_attr);
226 h_attr = NULL;
227 }
371 } 228 }
372 229
373 return h_attr; 230 return h_attr;
@@ -375,9 +232,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
375 232
376static void create_counter(int counter, int cpu, pid_t pid) 233static void create_counter(int counter, int cpu, pid_t pid)
377{ 234{
235 char *filter = filters[counter];
378 struct perf_event_attr *attr = attrs + counter; 236 struct perf_event_attr *attr = attrs + counter;
379 struct perf_header_attr *h_attr; 237 struct perf_header_attr *h_attr;
380 int track = !counter; /* only the first counter needs these */ 238 int track = !counter; /* only the first counter needs these */
239 int ret;
381 struct { 240 struct {
382 u64 count; 241 u64 count;
383 u64 time_enabled; 242 u64 time_enabled;
@@ -448,11 +307,19 @@ try_again:
448 printf("\n"); 307 printf("\n");
449 error("perfcounter syscall returned with %d (%s)\n", 308 error("perfcounter syscall returned with %d (%s)\n",
450 fd[nr_cpu][counter], strerror(err)); 309 fd[nr_cpu][counter], strerror(err));
310
311#if defined(__i386__) || defined(__x86_64__)
312 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
313 die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n");
314#endif
315
451 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 316 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
452 exit(-1); 317 exit(-1);
453 } 318 }
454 319
455 h_attr = get_header_attr(attr, counter); 320 h_attr = get_header_attr(attr, counter);
321 if (h_attr == NULL)
322 die("nomem\n");
456 323
457 if (!file_new) { 324 if (!file_new) {
458 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 325 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
@@ -466,7 +333,10 @@ try_again:
466 exit(-1); 333 exit(-1);
467 } 334 }
468 335
469 perf_header_attr__add_id(h_attr, read_data.id); 336 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
337 pr_warning("Not enough memory to add id\n");
338 exit(-1);
339 }
470 340
471 assert(fd[nr_cpu][counter] >= 0); 341 assert(fd[nr_cpu][counter] >= 0);
472 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); 342 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
@@ -480,7 +350,6 @@ try_again:
480 multiplex_fd = fd[nr_cpu][counter]; 350 multiplex_fd = fd[nr_cpu][counter];
481 351
482 if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { 352 if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
483 int ret;
484 353
485 ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); 354 ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
486 assert(ret != -1); 355 assert(ret != -1);
@@ -500,6 +369,16 @@ try_again:
500 } 369 }
501 } 370 }
502 371
372 if (filter != NULL) {
373 ret = ioctl(fd[nr_cpu][counter],
374 PERF_EVENT_IOC_SET_FILTER, filter);
375 if (ret) {
376 error("failed to set filter with %d (%s)\n", errno,
377 strerror(errno));
378 exit(-1);
379 }
380 }
381
503 ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); 382 ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
504} 383}
505 384
@@ -518,7 +397,7 @@ static void atexit_header(void)
518{ 397{
519 header->data_size += bytes_written; 398 header->data_size += bytes_written;
520 399
521 perf_header__write(header, output); 400 perf_header__write(header, output, true);
522} 401}
523 402
524static int __cmd_record(int argc, const char **argv) 403static int __cmd_record(int argc, const char **argv)
@@ -527,7 +406,7 @@ static int __cmd_record(int argc, const char **argv)
527 struct stat st; 406 struct stat st;
528 pid_t pid = 0; 407 pid_t pid = 0;
529 int flags; 408 int flags;
530 int ret; 409 int err;
531 unsigned long waking = 0; 410 unsigned long waking = 0;
532 411
533 page_size = sysconf(_SC_PAGE_SIZE); 412 page_size = sysconf(_SC_PAGE_SIZE);
@@ -561,22 +440,29 @@ static int __cmd_record(int argc, const char **argv)
561 exit(-1); 440 exit(-1);
562 } 441 }
563 442
564 if (!file_new) 443 header = perf_header__new();
565 header = perf_header__read(output); 444 if (header == NULL) {
566 else 445 pr_err("Not enough memory for reading perf file header\n");
567 header = perf_header__new(); 446 return -1;
447 }
568 448
449 if (!file_new) {
450 err = perf_header__read(header, output);
451 if (err < 0)
452 return err;
453 }
569 454
570 if (raw_samples) { 455 if (raw_samples) {
571 read_tracing_data(attrs, nr_counters); 456 perf_header__set_feat(header, HEADER_TRACE_INFO);
572 } else { 457 } else {
573 for (i = 0; i < nr_counters; i++) { 458 for (i = 0; i < nr_counters; i++) {
574 if (attrs[i].sample_type & PERF_SAMPLE_RAW) { 459 if (attrs[i].sample_type & PERF_SAMPLE_RAW) {
575 read_tracing_data(attrs, nr_counters); 460 perf_header__set_feat(header, HEADER_TRACE_INFO);
576 break; 461 break;
577 } 462 }
578 } 463 }
579 } 464 }
465
580 atexit(atexit_header); 466 atexit(atexit_header);
581 467
582 if (!system_wide) { 468 if (!system_wide) {
@@ -594,25 +480,36 @@ static int __cmd_record(int argc, const char **argv)
594 } 480 }
595 } 481 }
596 482
597 if (file_new) 483 if (file_new) {
598 perf_header__write(header, output); 484 err = perf_header__write(header, output, false);
485 if (err < 0)
486 return err;
487 }
599 488
600 if (!system_wide) { 489 if (!system_wide)
601 pid_t tgid = pid_synthesize_comm_event(pid, 0); 490 event__synthesize_thread(pid, process_synthesized_event);
602 pid_synthesize_mmap_samples(pid, tgid); 491 else
603 } else 492 event__synthesize_threads(process_synthesized_event);
604 synthesize_all();
605 493
606 if (target_pid == -1 && argc) { 494 if (target_pid == -1 && argc) {
607 pid = fork(); 495 pid = fork();
608 if (pid < 0) 496 if (pid < 0)
609 perror("failed to fork"); 497 die("failed to fork");
610 498
611 if (!pid) { 499 if (!pid) {
612 if (execvp(argv[0], (char **)argv)) { 500 if (execvp(argv[0], (char **)argv)) {
613 perror(argv[0]); 501 perror(argv[0]);
614 exit(-1); 502 exit(-1);
615 } 503 }
504 } else {
505 /*
506 * Wait a bit for the execv'ed child to appear
507 * and be updated in /proc
508 * FIXME: Do you know a less heuristical solution?
509 */
510 usleep(1000);
511 event__synthesize_thread(pid,
512 process_synthesized_event);
616 } 513 }
617 514
618 child_pid = pid; 515 child_pid = pid;
@@ -623,7 +520,7 @@ static int __cmd_record(int argc, const char **argv)
623 520
624 param.sched_priority = realtime_prio; 521 param.sched_priority = realtime_prio;
625 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 522 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
626 printf("Could not set realtime priority.\n"); 523 pr_err("Could not set realtime priority.\n");
627 exit(-1); 524 exit(-1);
628 } 525 }
629 } 526 }
@@ -641,7 +538,7 @@ static int __cmd_record(int argc, const char **argv)
641 if (hits == samples) { 538 if (hits == samples) {
642 if (done) 539 if (done)
643 break; 540 break;
644 ret = poll(event_array, nr_poll, -1); 541 err = poll(event_array, nr_poll, -1);
645 waking++; 542 waking++;
646 } 543 }
647 544
@@ -677,6 +574,8 @@ static const struct option options[] = {
677 OPT_CALLBACK('e', "event", NULL, "event", 574 OPT_CALLBACK('e', "event", NULL, "event",
678 "event selector. use 'perf list' to list available events", 575 "event selector. use 'perf list' to list available events",
679 parse_events), 576 parse_events),
577 OPT_CALLBACK(0, "filter", NULL, "filter",
578 "event filter", parse_filter),
680 OPT_INTEGER('p', "pid", &target_pid, 579 OPT_INTEGER('p', "pid", &target_pid,
681 "record events on existing pid"), 580 "record events on existing pid"),
682 OPT_INTEGER('r', "realtime", &realtime_prio, 581 OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -720,6 +619,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
720{ 619{
721 int counter; 620 int counter;
722 621
622 symbol__init(0);
623
723 argc = parse_options(argc, argv, options, record_usage, 624 argc = parse_options(argc, argv, options, record_usage,
724 PARSE_OPT_STOP_AT_NON_OPTION); 625 PARSE_OPT_STOP_AT_NON_OPTION);
725 if (!argc && target_pid == -1 && !system_wide) 626 if (!argc && target_pid == -1 && !system_wide)
@@ -731,6 +632,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
731 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; 632 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
732 } 633 }
733 634
635 /*
636 * User specified count overrides default frequency.
637 */
638 if (default_interval)
639 freq = 0;
640 else if (freq) {
641 default_interval = freq;
642 } else {
643 fprintf(stderr, "frequency and count are zero, aborting\n");
644 exit(EXIT_FAILURE);
645 }
646
734 for (counter = 0; counter < nr_counters; counter++) { 647 for (counter = 0; counter < nr_counters; counter++) {
735 if (attrs[counter].sample_period) 648 if (attrs[counter].sample_period)
736 continue; 649 continue;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 19669c20088e..383c4ab4f9af 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -26,20 +26,18 @@
26#include "util/parse-options.h" 26#include "util/parse-options.h"
27#include "util/parse-events.h" 27#include "util/parse-events.h"
28 28
29#include "util/data_map.h"
29#include "util/thread.h" 30#include "util/thread.h"
31#include "util/sort.h"
32#include "util/hist.h"
30 33
31static char const *input_name = "perf.data"; 34static char const *input_name = "perf.data";
32 35
33static char default_sort_order[] = "comm,dso,symbol";
34static char *sort_order = default_sort_order;
35static char *dso_list_str, *comm_list_str, *sym_list_str, 36static char *dso_list_str, *comm_list_str, *sym_list_str,
36 *col_width_list_str; 37 *col_width_list_str;
37static struct strlist *dso_list, *comm_list, *sym_list; 38static struct strlist *dso_list, *comm_list, *sym_list;
38static char *field_sep;
39 39
40static int force; 40static int force;
41static int input;
42static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
43 41
44static int full_paths; 42static int full_paths;
45static int show_nr_samples; 43static int show_nr_samples;
@@ -50,374 +48,38 @@ static struct perf_read_values show_threads_values;
50static char default_pretty_printing_style[] = "normal"; 48static char default_pretty_printing_style[] = "normal";
51static char *pretty_printing_style = default_pretty_printing_style; 49static char *pretty_printing_style = default_pretty_printing_style;
52 50
53static unsigned long page_size;
54static unsigned long mmap_window = 32;
55
56static char default_parent_pattern[] = "^sys_|^do_page_fault";
57static char *parent_pattern = default_parent_pattern;
58static regex_t parent_regex;
59
60static int exclude_other = 1; 51static int exclude_other = 1;
61 52
62static char callchain_default_opt[] = "fractal,0.5"; 53static char callchain_default_opt[] = "fractal,0.5";
63 54
64static int callchain;
65
66static char __cwd[PATH_MAX];
67static char *cwd = __cwd;
68static int cwdlen;
69
70static struct rb_root threads;
71static struct thread *last_match;
72
73static struct perf_header *header; 55static struct perf_header *header;
74 56
75static
76struct callchain_param callchain_param = {
77 .mode = CHAIN_GRAPH_REL,
78 .min_percent = 0.5
79};
80
81static u64 sample_type; 57static u64 sample_type;
82 58
83static int repsep_fprintf(FILE *fp, const char *fmt, ...) 59struct symbol_conf symbol_conf;
84{
85 int n;
86 va_list ap;
87
88 va_start(ap, fmt);
89 if (!field_sep)
90 n = vfprintf(fp, fmt, ap);
91 else {
92 char *bf = NULL;
93 n = vasprintf(&bf, fmt, ap);
94 if (n > 0) {
95 char *sep = bf;
96
97 while (1) {
98 sep = strchr(sep, *field_sep);
99 if (sep == NULL)
100 break;
101 *sep = '.';
102 }
103 }
104 fputs(bf, fp);
105 free(bf);
106 }
107 va_end(ap);
108 return n;
109}
110
111static unsigned int dsos__col_width,
112 comms__col_width,
113 threads__col_width;
114 60
115/*
116 * histogram, sorted on item, collects counts
117 */
118
119static struct rb_root hist;
120
121struct hist_entry {
122 struct rb_node rb_node;
123
124 struct thread *thread;
125 struct map *map;
126 struct dso *dso;
127 struct symbol *sym;
128 struct symbol *parent;
129 u64 ip;
130 char level;
131 struct callchain_node callchain;
132 struct rb_root sorted_chain;
133
134 u64 count;
135};
136
137/*
138 * configurable sorting bits
139 */
140
141struct sort_entry {
142 struct list_head list;
143
144 const char *header;
145
146 int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
147 int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
148 size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width);
149 unsigned int *width;
150 bool elide;
151};
152
153static int64_t cmp_null(void *l, void *r)
154{
155 if (!l && !r)
156 return 0;
157 else if (!l)
158 return -1;
159 else
160 return 1;
161}
162
163/* --sort pid */
164
165static int64_t
166sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
167{
168 return right->thread->pid - left->thread->pid;
169}
170 61
171static size_t 62static size_t
172sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) 63callchain__fprintf_left_margin(FILE *fp, int left_margin)
173{ 64{
174 return repsep_fprintf(fp, "%*s:%5d", width - 6, 65 int i;
175 self->thread->comm ?: "", self->thread->pid); 66 int ret;
176}
177
178static struct sort_entry sort_thread = {
179 .header = "Command: Pid",
180 .cmp = sort__thread_cmp,
181 .print = sort__thread_print,
182 .width = &threads__col_width,
183};
184
185/* --sort comm */
186
187static int64_t
188sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
189{
190 return right->thread->pid - left->thread->pid;
191}
192
193static int64_t
194sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
195{
196 char *comm_l = left->thread->comm;
197 char *comm_r = right->thread->comm;
198
199 if (!comm_l || !comm_r)
200 return cmp_null(comm_l, comm_r);
201
202 return strcmp(comm_l, comm_r);
203}
204
205static size_t
206sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
207{
208 return repsep_fprintf(fp, "%*s", width, self->thread->comm);
209}
210
211static struct sort_entry sort_comm = {
212 .header = "Command",
213 .cmp = sort__comm_cmp,
214 .collapse = sort__comm_collapse,
215 .print = sort__comm_print,
216 .width = &comms__col_width,
217};
218
219/* --sort dso */
220
221static int64_t
222sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
223{
224 struct dso *dso_l = left->dso;
225 struct dso *dso_r = right->dso;
226
227 if (!dso_l || !dso_r)
228 return cmp_null(dso_l, dso_r);
229
230 return strcmp(dso_l->name, dso_r->name);
231}
232
233static size_t
234sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
235{
236 if (self->dso)
237 return repsep_fprintf(fp, "%-*s", width, self->dso->name);
238
239 return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
240}
241
242static struct sort_entry sort_dso = {
243 .header = "Shared Object",
244 .cmp = sort__dso_cmp,
245 .print = sort__dso_print,
246 .width = &dsos__col_width,
247};
248
249/* --sort symbol */
250
251static int64_t
252sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
253{
254 u64 ip_l, ip_r;
255
256 if (left->sym == right->sym)
257 return 0;
258
259 ip_l = left->sym ? left->sym->start : left->ip;
260 ip_r = right->sym ? right->sym->start : right->ip;
261
262 return (int64_t)(ip_r - ip_l);
263}
264
265static size_t
266sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
267{
268 size_t ret = 0;
269 67
270 if (verbose) 68 ret = fprintf(fp, " ");
271 ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
272 dso__symtab_origin(self->dso));
273 69
274 ret += repsep_fprintf(fp, "[%c] ", self->level); 70 for (i = 0; i < left_margin; i++)
275 if (self->sym) { 71 ret += fprintf(fp, " ");
276 ret += repsep_fprintf(fp, "%s", self->sym->name);
277
278 if (self->sym->module)
279 ret += repsep_fprintf(fp, "\t[%s]",
280 self->sym->module->name);
281 } else {
282 ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
283 }
284 72
285 return ret; 73 return ret;
286} 74}
287 75
288static struct sort_entry sort_sym = { 76static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
289 .header = "Symbol", 77 int left_margin)
290 .cmp = sort__sym_cmp,
291 .print = sort__sym_print,
292};
293
294/* --sort parent */
295
296static int64_t
297sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
298{
299 struct symbol *sym_l = left->parent;
300 struct symbol *sym_r = right->parent;
301
302 if (!sym_l || !sym_r)
303 return cmp_null(sym_l, sym_r);
304
305 return strcmp(sym_l->name, sym_r->name);
306}
307
308static size_t
309sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
310{
311 return repsep_fprintf(fp, "%-*s", width,
312 self->parent ? self->parent->name : "[other]");
313}
314
315static unsigned int parent_symbol__col_width;
316
317static struct sort_entry sort_parent = {
318 .header = "Parent symbol",
319 .cmp = sort__parent_cmp,
320 .print = sort__parent_print,
321 .width = &parent_symbol__col_width,
322};
323
324static int sort__need_collapse = 0;
325static int sort__has_parent = 0;
326
327struct sort_dimension {
328 const char *name;
329 struct sort_entry *entry;
330 int taken;
331};
332
333static struct sort_dimension sort_dimensions[] = {
334 { .name = "pid", .entry = &sort_thread, },
335 { .name = "comm", .entry = &sort_comm, },
336 { .name = "dso", .entry = &sort_dso, },
337 { .name = "symbol", .entry = &sort_sym, },
338 { .name = "parent", .entry = &sort_parent, },
339};
340
341static LIST_HEAD(hist_entry__sort_list);
342
343static int sort_dimension__add(const char *tok)
344{
345 unsigned int i;
346
347 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
348 struct sort_dimension *sd = &sort_dimensions[i];
349
350 if (sd->taken)
351 continue;
352
353 if (strncasecmp(tok, sd->name, strlen(tok)))
354 continue;
355
356 if (sd->entry->collapse)
357 sort__need_collapse = 1;
358
359 if (sd->entry == &sort_parent) {
360 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
361 if (ret) {
362 char err[BUFSIZ];
363
364 regerror(ret, &parent_regex, err, sizeof(err));
365 fprintf(stderr, "Invalid regex: %s\n%s",
366 parent_pattern, err);
367 exit(-1);
368 }
369 sort__has_parent = 1;
370 }
371
372 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
373 sd->taken = 1;
374
375 return 0;
376 }
377
378 return -ESRCH;
379}
380
381static int64_t
382hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
383{
384 struct sort_entry *se;
385 int64_t cmp = 0;
386
387 list_for_each_entry(se, &hist_entry__sort_list, list) {
388 cmp = se->cmp(left, right);
389 if (cmp)
390 break;
391 }
392
393 return cmp;
394}
395
396static int64_t
397hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
398{
399 struct sort_entry *se;
400 int64_t cmp = 0;
401
402 list_for_each_entry(se, &hist_entry__sort_list, list) {
403 int64_t (*f)(struct hist_entry *, struct hist_entry *);
404
405 f = se->collapse ?: se->cmp;
406
407 cmp = f(left, right);
408 if (cmp)
409 break;
410 }
411
412 return cmp;
413}
414
415static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
416{ 78{
417 int i; 79 int i;
418 size_t ret = 0; 80 size_t ret = 0;
419 81
420 ret += fprintf(fp, "%s", " "); 82 ret += callchain__fprintf_left_margin(fp, left_margin);
421 83
422 for (i = 0; i < depth; i++) 84 for (i = 0; i < depth; i++)
423 if (depth_mask & (1 << i)) 85 if (depth_mask & (1 << i))
@@ -432,12 +94,12 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
432static size_t 94static size_t
433ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, 95ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
434 int depth_mask, int count, u64 total_samples, 96 int depth_mask, int count, u64 total_samples,
435 int hits) 97 int hits, int left_margin)
436{ 98{
437 int i; 99 int i;
438 size_t ret = 0; 100 size_t ret = 0;
439 101
440 ret += fprintf(fp, "%s", " "); 102 ret += callchain__fprintf_left_margin(fp, left_margin);
441 for (i = 0; i < depth; i++) { 103 for (i = 0; i < depth; i++) {
442 if (depth_mask & (1 << i)) 104 if (depth_mask & (1 << i))
443 ret += fprintf(fp, "|"); 105 ret += fprintf(fp, "|");
@@ -475,8 +137,9 @@ static void init_rem_hits(void)
475} 137}
476 138
477static size_t 139static size_t
478callchain__fprintf_graph(FILE *fp, struct callchain_node *self, 140__callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
479 u64 total_samples, int depth, int depth_mask) 141 u64 total_samples, int depth, int depth_mask,
142 int left_margin)
480{ 143{
481 struct rb_node *node, *next; 144 struct rb_node *node, *next;
482 struct callchain_node *child; 145 struct callchain_node *child;
@@ -517,7 +180,8 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
517 * But we keep the older depth mask for the line seperator 180 * But we keep the older depth mask for the line seperator
518 * to keep the level link until we reach the last child 181 * to keep the level link until we reach the last child
519 */ 182 */
520 ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); 183 ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
184 left_margin);
521 i = 0; 185 i = 0;
522 list_for_each_entry(chain, &child->val, list) { 186 list_for_each_entry(chain, &child->val, list) {
523 if (chain->ip >= PERF_CONTEXT_MAX) 187 if (chain->ip >= PERF_CONTEXT_MAX)
@@ -525,11 +189,13 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
525 ret += ipchain__fprintf_graph(fp, chain, depth, 189 ret += ipchain__fprintf_graph(fp, chain, depth,
526 new_depth_mask, i++, 190 new_depth_mask, i++,
527 new_total, 191 new_total,
528 cumul); 192 cumul,
193 left_margin);
529 } 194 }
530 ret += callchain__fprintf_graph(fp, child, new_total, 195 ret += __callchain__fprintf_graph(fp, child, new_total,
531 depth + 1, 196 depth + 1,
532 new_depth_mask | (1 << depth)); 197 new_depth_mask | (1 << depth),
198 left_margin);
533 node = next; 199 node = next;
534 } 200 }
535 201
@@ -543,9 +209,48 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
543 209
544 ret += ipchain__fprintf_graph(fp, &rem_hits, depth, 210 ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
545 new_depth_mask, 0, new_total, 211 new_depth_mask, 0, new_total,
546 remaining); 212 remaining, left_margin);
213 }
214
215 return ret;
216}
217
218
219static size_t
220callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
221 u64 total_samples, int left_margin)
222{
223 struct callchain_list *chain;
224 bool printed = false;
225 int i = 0;
226 int ret = 0;
227
228 list_for_each_entry(chain, &self->val, list) {
229 if (chain->ip >= PERF_CONTEXT_MAX)
230 continue;
231
232 if (!i++ && sort__first_dimension == SORT_SYM)
233 continue;
234
235 if (!printed) {
236 ret += callchain__fprintf_left_margin(fp, left_margin);
237 ret += fprintf(fp, "|\n");
238 ret += callchain__fprintf_left_margin(fp, left_margin);
239 ret += fprintf(fp, "---");
240
241 left_margin += 3;
242 printed = true;
243 } else
244 ret += callchain__fprintf_left_margin(fp, left_margin);
245
246 if (chain->sym)
247 ret += fprintf(fp, " %s\n", chain->sym->name);
248 else
249 ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
547 } 250 }
548 251
252 ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin);
253
549 return ret; 254 return ret;
550} 255}
551 256
@@ -577,7 +282,7 @@ callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
577 282
578static size_t 283static size_t
579hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, 284hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
580 u64 total_samples) 285 u64 total_samples, int left_margin)
581{ 286{
582 struct rb_node *rb_node; 287 struct rb_node *rb_node;
583 struct callchain_node *chain; 288 struct callchain_node *chain;
@@ -597,8 +302,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
597 break; 302 break;
598 case CHAIN_GRAPH_ABS: /* Falldown */ 303 case CHAIN_GRAPH_ABS: /* Falldown */
599 case CHAIN_GRAPH_REL: 304 case CHAIN_GRAPH_REL:
600 ret += callchain__fprintf_graph(fp, chain, 305 ret += callchain__fprintf_graph(fp, chain, total_samples,
601 total_samples, 1, 1); 306 left_margin);
602 case CHAIN_NONE: 307 case CHAIN_NONE:
603 default: 308 default:
604 break; 309 break;
@@ -610,7 +315,6 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
610 return ret; 315 return ret;
611} 316}
612 317
613
614static size_t 318static size_t
615hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) 319hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
616{ 320{
@@ -644,8 +348,19 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
644 348
645 ret += fprintf(fp, "\n"); 349 ret += fprintf(fp, "\n");
646 350
647 if (callchain) 351 if (callchain) {
648 hist_entry_callchain__fprintf(fp, self, total_samples); 352 int left_margin = 0;
353
354 if (sort__first_dimension == SORT_COMM) {
355 se = list_first_entry(&hist_entry__sort_list, typeof(*se),
356 list);
357 left_margin = se->width ? *se->width : 0;
358 left_margin -= thread__comm_len(self->thread);
359 }
360
361 hist_entry_callchain__fprintf(fp, self, total_samples,
362 left_margin);
363 }
649 364
650 return ret; 365 return ret;
651} 366}
@@ -693,63 +408,6 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm)
693 return 0; 408 return 0;
694} 409}
695 410
696
697static struct symbol *
698resolve_symbol(struct thread *thread, struct map **mapp,
699 struct dso **dsop, u64 *ipp)
700{
701 struct dso *dso = dsop ? *dsop : NULL;
702 struct map *map = mapp ? *mapp : NULL;
703 u64 ip = *ipp;
704
705 if (!thread)
706 return NULL;
707
708 if (dso)
709 goto got_dso;
710
711 if (map)
712 goto got_map;
713
714 map = thread__find_map(thread, ip);
715 if (map != NULL) {
716 /*
717 * We have to do this here as we may have a dso
718 * with no symbol hit that has a name longer than
719 * the ones with symbols sampled.
720 */
721 if (!sort_dso.elide && !map->dso->slen_calculated)
722 dso__calc_col_width(map->dso);
723
724 if (mapp)
725 *mapp = map;
726got_map:
727 ip = map->map_ip(map, ip);
728
729 dso = map->dso;
730 } else {
731 /*
732 * If this is outside of all known maps,
733 * and is a negative address, try to look it
734 * up in the kernel dso, as it might be a
735 * vsyscall (which executes in user-mode):
736 */
737 if ((long long)ip < 0)
738 dso = kernel_dso;
739 }
740 dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
741 dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
742 *ipp = ip;
743
744 if (dsop)
745 *dsop = dso;
746
747 if (!dso)
748 return NULL;
749got_dso:
750 return dso->find_symbol(dso, ip);
751}
752
753static int call__match(struct symbol *sym) 411static int call__match(struct symbol *sym)
754{ 412{
755 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) 413 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
@@ -758,11 +416,11 @@ static int call__match(struct symbol *sym)
758 return 0; 416 return 0;
759} 417}
760 418
761static struct symbol ** 419static struct symbol **resolve_callchain(struct thread *thread,
762resolve_callchain(struct thread *thread, struct map *map __used, 420 struct ip_callchain *chain,
763 struct ip_callchain *chain, struct hist_entry *entry) 421 struct symbol **parent)
764{ 422{
765 u64 context = PERF_CONTEXT_MAX; 423 u8 cpumode = PERF_RECORD_MISC_USER;
766 struct symbol **syms = NULL; 424 struct symbol **syms = NULL;
767 unsigned int i; 425 unsigned int i;
768 426
@@ -776,34 +434,31 @@ resolve_callchain(struct thread *thread, struct map *map __used,
776 434
777 for (i = 0; i < chain->nr; i++) { 435 for (i = 0; i < chain->nr; i++) {
778 u64 ip = chain->ips[i]; 436 u64 ip = chain->ips[i];
779 struct dso *dso = NULL; 437 struct addr_location al;
780 struct symbol *sym;
781 438
782 if (ip >= PERF_CONTEXT_MAX) { 439 if (ip >= PERF_CONTEXT_MAX) {
783 context = ip; 440 switch (ip) {
441 case PERF_CONTEXT_HV:
442 cpumode = PERF_RECORD_MISC_HYPERVISOR; break;
443 case PERF_CONTEXT_KERNEL:
444 cpumode = PERF_RECORD_MISC_KERNEL; break;
445 case PERF_CONTEXT_USER:
446 cpumode = PERF_RECORD_MISC_USER; break;
447 default:
448 break;
449 }
784 continue; 450 continue;
785 } 451 }
786 452
787 switch (context) { 453 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
788 case PERF_CONTEXT_HV: 454 ip, &al, NULL);
789 dso = hypervisor_dso; 455 if (al.sym != NULL) {
790 break; 456 if (sort__has_parent && !*parent &&
791 case PERF_CONTEXT_KERNEL: 457 call__match(al.sym))
792 dso = kernel_dso; 458 *parent = al.sym;
793 break;
794 default:
795 break;
796 }
797
798 sym = resolve_symbol(thread, NULL, &dso, &ip);
799
800 if (sym) {
801 if (sort__has_parent && call__match(sym) &&
802 !entry->parent)
803 entry->parent = sym;
804 if (!callchain) 459 if (!callchain)
805 break; 460 break;
806 syms[i] = sym; 461 syms[i] = al.sym;
807 } 462 }
808 } 463 }
809 464
@@ -814,178 +469,33 @@ resolve_callchain(struct thread *thread, struct map *map __used,
814 * collect histogram counts 469 * collect histogram counts
815 */ 470 */
816 471
817static int 472static int hist_entry__add(struct addr_location *al,
818hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, 473 struct ip_callchain *chain, u64 count)
819 struct symbol *sym, u64 ip, struct ip_callchain *chain,
820 char level, u64 count)
821{ 474{
822 struct rb_node **p = &hist.rb_node; 475 struct symbol **syms = NULL, *parent = NULL;
823 struct rb_node *parent = NULL; 476 bool hit;
824 struct hist_entry *he; 477 struct hist_entry *he;
825 struct symbol **syms = NULL;
826 struct hist_entry entry = {
827 .thread = thread,
828 .map = map,
829 .dso = dso,
830 .sym = sym,
831 .ip = ip,
832 .level = level,
833 .count = count,
834 .parent = NULL,
835 .sorted_chain = RB_ROOT
836 };
837 int cmp;
838 478
839 if ((sort__has_parent || callchain) && chain) 479 if ((sort__has_parent || callchain) && chain)
840 syms = resolve_callchain(thread, map, chain, &entry); 480 syms = resolve_callchain(al->thread, chain, &parent);
841 481
842 while (*p != NULL) { 482 he = __hist_entry__add(al, parent, count, &hit);
843 parent = *p; 483 if (he == NULL)
844 he = rb_entry(parent, struct hist_entry, rb_node); 484 return -ENOMEM;
845 485
846 cmp = hist_entry__cmp(&entry, he); 486 if (hit)
487 he->count += count;
847 488
848 if (!cmp) {
849 he->count += count;
850 if (callchain) {
851 append_chain(&he->callchain, chain, syms);
852 free(syms);
853 }
854 return 0;
855 }
856
857 if (cmp < 0)
858 p = &(*p)->rb_left;
859 else
860 p = &(*p)->rb_right;
861 }
862
863 he = malloc(sizeof(*he));
864 if (!he)
865 return -ENOMEM;
866 *he = entry;
867 if (callchain) { 489 if (callchain) {
868 callchain_init(&he->callchain); 490 if (!hit)
491 callchain_init(&he->callchain);
869 append_chain(&he->callchain, chain, syms); 492 append_chain(&he->callchain, chain, syms);
870 free(syms); 493 free(syms);
871 } 494 }
872 rb_link_node(&he->rb_node, parent, p);
873 rb_insert_color(&he->rb_node, &hist);
874 495
875 return 0; 496 return 0;
876} 497}
877 498
878static void hist_entry__free(struct hist_entry *he)
879{
880 free(he);
881}
882
883/*
884 * collapse the histogram
885 */
886
887static struct rb_root collapse_hists;
888
889static void collapse__insert_entry(struct hist_entry *he)
890{
891 struct rb_node **p = &collapse_hists.rb_node;
892 struct rb_node *parent = NULL;
893 struct hist_entry *iter;
894 int64_t cmp;
895
896 while (*p != NULL) {
897 parent = *p;
898 iter = rb_entry(parent, struct hist_entry, rb_node);
899
900 cmp = hist_entry__collapse(iter, he);
901
902 if (!cmp) {
903 iter->count += he->count;
904 hist_entry__free(he);
905 return;
906 }
907
908 if (cmp < 0)
909 p = &(*p)->rb_left;
910 else
911 p = &(*p)->rb_right;
912 }
913
914 rb_link_node(&he->rb_node, parent, p);
915 rb_insert_color(&he->rb_node, &collapse_hists);
916}
917
918static void collapse__resort(void)
919{
920 struct rb_node *next;
921 struct hist_entry *n;
922
923 if (!sort__need_collapse)
924 return;
925
926 next = rb_first(&hist);
927 while (next) {
928 n = rb_entry(next, struct hist_entry, rb_node);
929 next = rb_next(&n->rb_node);
930
931 rb_erase(&n->rb_node, &hist);
932 collapse__insert_entry(n);
933 }
934}
935
936/*
937 * reverse the map, sort on count.
938 */
939
940static struct rb_root output_hists;
941
942static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits)
943{
944 struct rb_node **p = &output_hists.rb_node;
945 struct rb_node *parent = NULL;
946 struct hist_entry *iter;
947
948 if (callchain)
949 callchain_param.sort(&he->sorted_chain, &he->callchain,
950 min_callchain_hits, &callchain_param);
951
952 while (*p != NULL) {
953 parent = *p;
954 iter = rb_entry(parent, struct hist_entry, rb_node);
955
956 if (he->count > iter->count)
957 p = &(*p)->rb_left;
958 else
959 p = &(*p)->rb_right;
960 }
961
962 rb_link_node(&he->rb_node, parent, p);
963 rb_insert_color(&he->rb_node, &output_hists);
964}
965
966static void output__resort(u64 total_samples)
967{
968 struct rb_node *next;
969 struct hist_entry *n;
970 struct rb_root *tree = &hist;
971 u64 min_callchain_hits;
972
973 min_callchain_hits = total_samples * (callchain_param.min_percent / 100);
974
975 if (sort__need_collapse)
976 tree = &collapse_hists;
977
978 next = rb_first(tree);
979
980 while (next) {
981 n = rb_entry(next, struct hist_entry, rb_node);
982 next = rb_next(&n->rb_node);
983
984 rb_erase(&n->rb_node, tree);
985 output__insert_entry(n, min_callchain_hits);
986 }
987}
988
989static size_t output__fprintf(FILE *fp, u64 total_samples) 499static size_t output__fprintf(FILE *fp, u64 total_samples)
990{ 500{
991 struct hist_entry *pos; 501 struct hist_entry *pos;
@@ -1080,13 +590,6 @@ print_entries:
1080 return ret; 590 return ret;
1081} 591}
1082 592
1083static unsigned long total = 0,
1084 total_mmap = 0,
1085 total_comm = 0,
1086 total_fork = 0,
1087 total_unknown = 0,
1088 total_lost = 0;
1089
1090static int validate_chain(struct ip_callchain *chain, event_t *event) 593static int validate_chain(struct ip_callchain *chain, event_t *event)
1091{ 594{
1092 unsigned int chain_size; 595 unsigned int chain_size;
@@ -1100,30 +603,22 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
1100 return 0; 603 return 0;
1101} 604}
1102 605
1103static int 606static int process_sample_event(event_t *event)
1104process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1105{ 607{
1106 char level;
1107 int show = 0;
1108 struct dso *dso = NULL;
1109 struct thread *thread;
1110 u64 ip = event->ip.ip; 608 u64 ip = event->ip.ip;
1111 u64 period = 1; 609 u64 period = 1;
1112 struct map *map = NULL;
1113 void *more_data = event->ip.__more_data; 610 void *more_data = event->ip.__more_data;
1114 struct ip_callchain *chain = NULL; 611 struct ip_callchain *chain = NULL;
1115 int cpumode; 612 int cpumode;
1116 613 struct addr_location al;
1117 thread = threads__findnew(event->ip.pid, &threads, &last_match); 614 struct thread *thread = threads__findnew(event->ip.pid);
1118 615
1119 if (sample_type & PERF_SAMPLE_PERIOD) { 616 if (sample_type & PERF_SAMPLE_PERIOD) {
1120 period = *(u64 *)more_data; 617 period = *(u64 *)more_data;
1121 more_data += sizeof(u64); 618 more_data += sizeof(u64);
1122 } 619 }
1123 620
1124 dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", 621 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
1125 (void *)(offset + head),
1126 (void *)(long)(event->header.size),
1127 event->header.misc, 622 event->header.misc,
1128 event->ip.pid, event->ip.tid, 623 event->ip.pid, event->ip.tid,
1129 (void *)(long)ip, 624 (void *)(long)ip,
@@ -1137,7 +632,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1137 dump_printf("... chain: nr:%Lu\n", chain->nr); 632 dump_printf("... chain: nr:%Lu\n", chain->nr);
1138 633
1139 if (validate_chain(chain, event) < 0) { 634 if (validate_chain(chain, event) < 0) {
1140 eprintf("call-chain problem with event, skipping it.\n"); 635 pr_debug("call-chain problem with event, "
636 "skipping it.\n");
1141 return 0; 637 return 0;
1142 } 638 }
1143 639
@@ -1147,163 +643,64 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1147 } 643 }
1148 } 644 }
1149 645
1150 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1151
1152 if (thread == NULL) { 646 if (thread == NULL) {
1153 eprintf("problem processing %d event, skipping it.\n", 647 pr_debug("problem processing %d event, skipping it.\n",
1154 event->header.type); 648 event->header.type);
1155 return -1; 649 return -1;
1156 } 650 }
1157 651
652 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
653
1158 if (comm_list && !strlist__has_entry(comm_list, thread->comm)) 654 if (comm_list && !strlist__has_entry(comm_list, thread->comm))
1159 return 0; 655 return 0;
1160 656
1161 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 657 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1162 658
1163 if (cpumode == PERF_RECORD_MISC_KERNEL) { 659 thread__find_addr_location(thread, cpumode,
1164 show = SHOW_KERNEL; 660 MAP__FUNCTION, ip, &al, NULL);
1165 level = 'k'; 661 /*
1166 662 * We have to do this here as we may have a dso with no symbol hit that
1167 dso = kernel_dso; 663 * has a name longer than the ones with symbols sampled.
1168 664 */
1169 dump_printf(" ...... dso: %s\n", dso->name); 665 if (al.map && !sort_dso.elide && !al.map->dso->slen_calculated)
1170 666 dso__calc_col_width(al.map->dso);
1171 } else if (cpumode == PERF_RECORD_MISC_USER) { 667
1172 668 if (dso_list &&
1173 show = SHOW_USER; 669 (!al.map || !al.map->dso ||
1174 level = '.'; 670 !(strlist__has_entry(dso_list, al.map->dso->short_name) ||
1175 671 (al.map->dso->short_name != al.map->dso->long_name &&
1176 } else { 672 strlist__has_entry(dso_list, al.map->dso->long_name)))))
1177 show = SHOW_HV; 673 return 0;
1178 level = 'H';
1179
1180 dso = hypervisor_dso;
1181
1182 dump_printf(" ...... dso: [hypervisor]\n");
1183 }
1184
1185 if (show & show_mask) {
1186 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1187
1188 if (dso_list && (!dso || !dso->name ||
1189 !strlist__has_entry(dso_list, dso->name)))
1190 return 0;
1191
1192 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1193 return 0;
1194
1195 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
1196 eprintf("problem incrementing symbol count, skipping event\n");
1197 return -1;
1198 }
1199 }
1200 total += period;
1201
1202 return 0;
1203}
1204 674
1205static int 675 if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
1206process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1207{
1208 struct thread *thread;
1209 struct map *map = map__new(&event->mmap, cwd, cwdlen);
1210
1211 thread = threads__findnew(event->mmap.pid, &threads, &last_match);
1212
1213 dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1214 (void *)(offset + head),
1215 (void *)(long)(event->header.size),
1216 event->mmap.pid,
1217 event->mmap.tid,
1218 (void *)(long)event->mmap.start,
1219 (void *)(long)event->mmap.len,
1220 (void *)(long)event->mmap.pgoff,
1221 event->mmap.filename);
1222
1223 if (thread == NULL || map == NULL) {
1224 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
1225 return 0; 676 return 0;
677
678 if (hist_entry__add(&al, chain, period)) {
679 pr_debug("problem incrementing symbol count, skipping event\n");
680 return -1;
1226 } 681 }
1227 682
1228 thread__insert_map(thread, map); 683 event__stats.total += period;
1229 total_mmap++;
1230 684
1231 return 0; 685 return 0;
1232} 686}
1233 687
1234static int 688static int process_comm_event(event_t *event)
1235process_comm_event(event_t *event, unsigned long offset, unsigned long head)
1236{ 689{
1237 struct thread *thread; 690 struct thread *thread = threads__findnew(event->comm.pid);
1238
1239 thread = threads__findnew(event->comm.pid, &threads, &last_match);
1240 691
1241 dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", 692 dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid);
1242 (void *)(offset + head),
1243 (void *)(long)(event->header.size),
1244 event->comm.comm, event->comm.pid);
1245 693
1246 if (thread == NULL || 694 if (thread == NULL ||
1247 thread__set_comm_adjust(thread, event->comm.comm)) { 695 thread__set_comm_adjust(thread, event->comm.comm)) {
1248 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 696 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
1249 return -1; 697 return -1;
1250 } 698 }
1251 total_comm++;
1252
1253 return 0;
1254}
1255
1256static int
1257process_task_event(event_t *event, unsigned long offset, unsigned long head)
1258{
1259 struct thread *thread;
1260 struct thread *parent;
1261
1262 thread = threads__findnew(event->fork.pid, &threads, &last_match);
1263 parent = threads__findnew(event->fork.ppid, &threads, &last_match);
1264
1265 dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
1266 (void *)(offset + head),
1267 (void *)(long)(event->header.size),
1268 event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
1269 event->fork.pid, event->fork.tid,
1270 event->fork.ppid, event->fork.ptid);
1271
1272 /*
1273 * A thread clone will have the same PID for both
1274 * parent and child.
1275 */
1276 if (thread == parent)
1277 return 0;
1278
1279 if (event->header.type == PERF_RECORD_EXIT)
1280 return 0;
1281
1282 if (!thread || !parent || thread__fork(thread, parent)) {
1283 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
1284 return -1;
1285 }
1286 total_fork++;
1287 699
1288 return 0; 700 return 0;
1289} 701}
1290 702
1291static int 703static int process_read_event(event_t *event)
1292process_lost_event(event_t *event, unsigned long offset, unsigned long head)
1293{
1294 dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
1295 (void *)(offset + head),
1296 (void *)(long)(event->header.size),
1297 event->lost.id,
1298 event->lost.lost);
1299
1300 total_lost += event->lost.lost;
1301
1302 return 0;
1303}
1304
1305static int
1306process_read_event(event_t *event, unsigned long offset, unsigned long head)
1307{ 704{
1308 struct perf_event_attr *attr; 705 struct perf_event_attr *attr;
1309 706
@@ -1319,238 +716,91 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
1319 event->read.value); 716 event->read.value);
1320 } 717 }
1321 718
1322 dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", 719 dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid,
1323 (void *)(offset + head), 720 attr ? __event_name(attr->type, attr->config) : "FAIL",
1324 (void *)(long)(event->header.size), 721 event->read.value);
1325 event->read.pid,
1326 event->read.tid,
1327 attr ? __event_name(attr->type, attr->config)
1328 : "FAIL",
1329 event->read.value);
1330
1331 return 0;
1332}
1333
1334static int
1335process_event(event_t *event, unsigned long offset, unsigned long head)
1336{
1337 trace_event(event);
1338
1339 switch (event->header.type) {
1340 case PERF_RECORD_SAMPLE:
1341 return process_sample_event(event, offset, head);
1342
1343 case PERF_RECORD_MMAP:
1344 return process_mmap_event(event, offset, head);
1345
1346 case PERF_RECORD_COMM:
1347 return process_comm_event(event, offset, head);
1348
1349 case PERF_RECORD_FORK:
1350 case PERF_RECORD_EXIT:
1351 return process_task_event(event, offset, head);
1352
1353 case PERF_RECORD_LOST:
1354 return process_lost_event(event, offset, head);
1355
1356 case PERF_RECORD_READ:
1357 return process_read_event(event, offset, head);
1358
1359 /*
1360 * We dont process them right now but they are fine:
1361 */
1362
1363 case PERF_RECORD_THROTTLE:
1364 case PERF_RECORD_UNTHROTTLE:
1365 return 0;
1366
1367 default:
1368 return -1;
1369 }
1370 722
1371 return 0; 723 return 0;
1372} 724}
1373 725
1374static int __cmd_report(void) 726static int sample_type_check(u64 type)
1375{ 727{
1376 int ret, rc = EXIT_FAILURE; 728 sample_type = type;
1377 unsigned long offset = 0;
1378 unsigned long head, shift;
1379 struct stat input_stat;
1380 struct thread *idle;
1381 event_t *event;
1382 uint32_t size;
1383 char *buf;
1384
1385 idle = register_idle_thread(&threads, &last_match);
1386 thread__comm_adjust(idle);
1387
1388 if (show_threads)
1389 perf_read_values_init(&show_threads_values);
1390
1391 input = open(input_name, O_RDONLY);
1392 if (input < 0) {
1393 fprintf(stderr, " failed to open file: %s", input_name);
1394 if (!strcmp(input_name, "perf.data"))
1395 fprintf(stderr, " (try 'perf record' first)");
1396 fprintf(stderr, "\n");
1397 exit(-1);
1398 }
1399
1400 ret = fstat(input, &input_stat);
1401 if (ret < 0) {
1402 perror("failed to stat file");
1403 exit(-1);
1404 }
1405
1406 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
1407 fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
1408 exit(-1);
1409 }
1410
1411 if (!input_stat.st_size) {
1412 fprintf(stderr, "zero-sized file, nothing to do!\n");
1413 exit(0);
1414 }
1415
1416 header = perf_header__read(input);
1417 head = header->data_offset;
1418
1419 sample_type = perf_header__sample_type(header);
1420 729
1421 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { 730 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
1422 if (sort__has_parent) { 731 if (sort__has_parent) {
1423 fprintf(stderr, "selected --sort parent, but no" 732 fprintf(stderr, "selected --sort parent, but no"
1424 " callchain data. Did you call" 733 " callchain data. Did you call"
1425 " perf record without -g?\n"); 734 " perf record without -g?\n");
1426 exit(-1); 735 return -1;
1427 } 736 }
1428 if (callchain) { 737 if (callchain) {
1429 fprintf(stderr, "selected -g but no callchain data." 738 fprintf(stderr, "selected -g but no callchain data."
1430 " Did you call perf record without" 739 " Did you call perf record without"
1431 " -g?\n"); 740 " -g?\n");
1432 exit(-1); 741 return -1;
1433 } 742 }
1434 } else if (callchain_param.mode != CHAIN_NONE && !callchain) { 743 } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
1435 callchain = 1; 744 callchain = 1;
1436 if (register_callchain_param(&callchain_param) < 0) { 745 if (register_callchain_param(&callchain_param) < 0) {
1437 fprintf(stderr, "Can't register callchain" 746 fprintf(stderr, "Can't register callchain"
1438 " params\n"); 747 " params\n");
1439 exit(-1); 748 return -1;
1440 } 749 }
1441 } 750 }
1442 751
1443 if (load_kernel() < 0) { 752 return 0;
1444 perror("failed to load kernel symbols"); 753}
1445 return EXIT_FAILURE;
1446 }
1447
1448 if (!full_paths) {
1449 if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
1450 perror("failed to get the current directory");
1451 return EXIT_FAILURE;
1452 }
1453 cwdlen = strlen(cwd);
1454 } else {
1455 cwd = NULL;
1456 cwdlen = 0;
1457 }
1458
1459 shift = page_size * (head / page_size);
1460 offset += shift;
1461 head -= shift;
1462
1463remap:
1464 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1465 MAP_SHARED, input, offset);
1466 if (buf == MAP_FAILED) {
1467 perror("failed to mmap file");
1468 exit(-1);
1469 }
1470
1471more:
1472 event = (event_t *)(buf + head);
1473
1474 size = event->header.size;
1475 if (!size)
1476 size = 8;
1477
1478 if (head + event->header.size >= page_size * mmap_window) {
1479 int munmap_ret;
1480
1481 shift = page_size * (head / page_size);
1482
1483 munmap_ret = munmap(buf, page_size * mmap_window);
1484 assert(munmap_ret == 0);
1485
1486 offset += shift;
1487 head -= shift;
1488 goto remap;
1489 }
1490
1491 size = event->header.size;
1492
1493 dump_printf("\n%p [%p]: event: %d\n",
1494 (void *)(offset + head),
1495 (void *)(long)event->header.size,
1496 event->header.type);
1497
1498 if (!size || process_event(event, offset, head) < 0) {
1499
1500 dump_printf("%p [%p]: skipping unknown header type: %d\n",
1501 (void *)(offset + head),
1502 (void *)(long)(event->header.size),
1503 event->header.type);
1504
1505 total_unknown++;
1506
1507 /*
1508 * assume we lost track of the stream, check alignment, and
1509 * increment a single u64 in the hope to catch on again 'soon'.
1510 */
1511 754
1512 if (unlikely(head & 7)) 755static struct perf_file_handler file_handler = {
1513 head &= ~7ULL; 756 .process_sample_event = process_sample_event,
757 .process_mmap_event = event__process_mmap,
758 .process_comm_event = process_comm_event,
759 .process_exit_event = event__process_task,
760 .process_fork_event = event__process_task,
761 .process_lost_event = event__process_lost,
762 .process_read_event = process_read_event,
763 .sample_type_check = sample_type_check,
764};
1514 765
1515 size = 8;
1516 }
1517 766
1518 head += size; 767static int __cmd_report(void)
768{
769 struct thread *idle;
770 int ret;
1519 771
1520 if (offset + head >= header->data_offset + header->data_size) 772 idle = register_idle_thread();
1521 goto done; 773 thread__comm_adjust(idle);
1522 774
1523 if (offset + head < (unsigned long)input_stat.st_size) 775 if (show_threads)
1524 goto more; 776 perf_read_values_init(&show_threads_values);
1525 777
1526done: 778 register_perf_file_handler(&file_handler);
1527 rc = EXIT_SUCCESS;
1528 close(input);
1529 779
1530 dump_printf(" IP events: %10ld\n", total); 780 ret = mmap_dispatch_perf_file(&header, input_name, force,
1531 dump_printf(" mmap events: %10ld\n", total_mmap); 781 full_paths, &event__cwdlen, &event__cwd);
1532 dump_printf(" comm events: %10ld\n", total_comm); 782 if (ret)
1533 dump_printf(" fork events: %10ld\n", total_fork); 783 return ret;
1534 dump_printf(" lost events: %10ld\n", total_lost);
1535 dump_printf(" unknown events: %10ld\n", total_unknown);
1536 784
1537 if (dump_trace) 785 if (dump_trace) {
786 event__print_totals();
1538 return 0; 787 return 0;
788 }
1539 789
1540 if (verbose >= 3) 790 if (verbose > 3)
1541 threads__fprintf(stdout, &threads); 791 threads__fprintf(stdout);
1542 792
1543 if (verbose >= 2) 793 if (verbose > 2)
1544 dsos__fprintf(stdout); 794 dsos__fprintf(stdout);
1545 795
1546 collapse__resort(); 796 collapse__resort();
1547 output__resort(total); 797 output__resort(event__stats.total);
1548 output__fprintf(stdout, total); 798 output__fprintf(stdout, event__stats.total);
1549 799
1550 if (show_threads) 800 if (show_threads)
1551 perf_read_values_destroy(&show_threads_values); 801 perf_read_values_destroy(&show_threads_values);
1552 802
1553 return rc; 803 return ret;
1554} 804}
1555 805
1556static int 806static int
@@ -1606,7 +856,8 @@ setup:
1606 return 0; 856 return 0;
1607} 857}
1608 858
1609static const char * const report_usage[] = { 859//static const char * const report_usage[] = {
860const char * const report_usage[] = {
1610 "perf report [<options>] <command>", 861 "perf report [<options>] <command>",
1611 NULL 862 NULL
1612}; 863};
@@ -1618,9 +869,10 @@ static const struct option options[] = {
1618 "be more verbose (show symbol address, etc)"), 869 "be more verbose (show symbol address, etc)"),
1619 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 870 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1620 "dump raw trace in ASCII"), 871 "dump raw trace in ASCII"),
1621 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), 872 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
873 "file", "vmlinux pathname"),
1622 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 874 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
1623 OPT_BOOLEAN('m', "modules", &modules, 875 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
1624 "load module symbols - WARNING: use only with -k and LIVE kernel"), 876 "load module symbols - WARNING: use only with -k and LIVE kernel"),
1625 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, 877 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
1626 "Show a column with the number of samples"), 878 "Show a column with the number of samples"),
@@ -1690,9 +942,8 @@ static void setup_list(struct strlist **list, const char *list_str,
1690 942
1691int cmd_report(int argc, const char **argv, const char *prefix __used) 943int cmd_report(int argc, const char **argv, const char *prefix __used)
1692{ 944{
1693 symbol__init(); 945 if (symbol__init(&symbol_conf) < 0)
1694 946 return -1;
1695 page_size = getpagesize();
1696 947
1697 argc = parse_options(argc, argv, options, report_usage, 0); 948 argc = parse_options(argc, argv, options, report_usage, 0);
1698 949
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index ce2d5be4f30e..26b782f26ee1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,7 @@
11#include "util/trace-event.h" 11#include "util/trace-event.h"
12 12
13#include "util/debug.h" 13#include "util/debug.h"
14#include "util/data_map.h"
14 15
15#include <sys/types.h> 16#include <sys/types.h>
16#include <sys/prctl.h> 17#include <sys/prctl.h>
@@ -20,14 +21,6 @@
20#include <math.h> 21#include <math.h>
21 22
22static char const *input_name = "perf.data"; 23static char const *input_name = "perf.data";
23static int input;
24static unsigned long page_size;
25static unsigned long mmap_window = 32;
26
27static unsigned long total_comm = 0;
28
29static struct rb_root threads;
30static struct thread *last_match;
31 24
32static struct perf_header *header; 25static struct perf_header *header;
33static u64 sample_type; 26static u64 sample_type;
@@ -35,11 +28,11 @@ static u64 sample_type;
35static char default_sort_order[] = "avg, max, switch, runtime"; 28static char default_sort_order[] = "avg, max, switch, runtime";
36static char *sort_order = default_sort_order; 29static char *sort_order = default_sort_order;
37 30
31static int profile_cpu = -1;
32
38#define PR_SET_NAME 15 /* Set process name */ 33#define PR_SET_NAME 15 /* Set process name */
39#define MAX_CPUS 4096 34#define MAX_CPUS 4096
40 35
41#define BUG_ON(x) assert(!(x))
42
43static u64 run_measurement_overhead; 36static u64 run_measurement_overhead;
44static u64 sleep_measurement_overhead; 37static u64 sleep_measurement_overhead;
45 38
@@ -74,6 +67,7 @@ enum sched_event_type {
74 SCHED_EVENT_RUN, 67 SCHED_EVENT_RUN,
75 SCHED_EVENT_SLEEP, 68 SCHED_EVENT_SLEEP,
76 SCHED_EVENT_WAKEUP, 69 SCHED_EVENT_WAKEUP,
70 SCHED_EVENT_MIGRATION,
77}; 71};
78 72
79struct sched_atom { 73struct sched_atom {
@@ -226,7 +220,7 @@ static void calibrate_sleep_measurement_overhead(void)
226static struct sched_atom * 220static struct sched_atom *
227get_new_event(struct task_desc *task, u64 timestamp) 221get_new_event(struct task_desc *task, u64 timestamp)
228{ 222{
229 struct sched_atom *event = calloc(1, sizeof(*event)); 223 struct sched_atom *event = zalloc(sizeof(*event));
230 unsigned long idx = task->nr_events; 224 unsigned long idx = task->nr_events;
231 size_t size; 225 size_t size;
232 226
@@ -294,7 +288,7 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
294 return; 288 return;
295 } 289 }
296 290
297 wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); 291 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
298 sem_init(wakee_event->wait_sem, 0, 0); 292 sem_init(wakee_event->wait_sem, 0, 0);
299 wakee_event->specific_wait = 1; 293 wakee_event->specific_wait = 1;
300 event->wait_sem = wakee_event->wait_sem; 294 event->wait_sem = wakee_event->wait_sem;
@@ -324,7 +318,7 @@ static struct task_desc *register_pid(unsigned long pid, const char *comm)
324 if (task) 318 if (task)
325 return task; 319 return task;
326 320
327 task = calloc(1, sizeof(*task)); 321 task = zalloc(sizeof(*task));
328 task->pid = pid; 322 task->pid = pid;
329 task->nr = nr_tasks; 323 task->nr = nr_tasks;
330 strcpy(task->comm, comm); 324 strcpy(task->comm, comm);
@@ -398,6 +392,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
398 ret = sem_post(atom->wait_sem); 392 ret = sem_post(atom->wait_sem);
399 BUG_ON(ret); 393 BUG_ON(ret);
400 break; 394 break;
395 case SCHED_EVENT_MIGRATION:
396 break;
401 default: 397 default:
402 BUG_ON(1); 398 BUG_ON(1);
403 } 399 }
@@ -632,29 +628,6 @@ static void test_calibrations(void)
632 printf("the sleep test took %Ld nsecs\n", T1-T0); 628 printf("the sleep test took %Ld nsecs\n", T1-T0);
633} 629}
634 630
635static int
636process_comm_event(event_t *event, unsigned long offset, unsigned long head)
637{
638 struct thread *thread;
639
640 thread = threads__findnew(event->comm.pid, &threads, &last_match);
641
642 dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
643 (void *)(offset + head),
644 (void *)(long)(event->header.size),
645 event->comm.comm, event->comm.pid);
646
647 if (thread == NULL ||
648 thread__set_comm(thread, event->comm.comm)) {
649 dump_printf("problem processing perf_event_comm, skipping event.\n");
650 return -1;
651 }
652 total_comm++;
653
654 return 0;
655}
656
657
658struct raw_event_sample { 631struct raw_event_sample {
659 u32 size; 632 u32 size;
660 char data[0]; 633 char data[0];
@@ -745,6 +718,22 @@ struct trace_fork_event {
745 u32 child_pid; 718 u32 child_pid;
746}; 719};
747 720
721struct trace_migrate_task_event {
722 u32 size;
723
724 u16 common_type;
725 u8 common_flags;
726 u8 common_preempt_count;
727 u32 common_pid;
728 u32 common_tgid;
729
730 char comm[16];
731 u32 pid;
732
733 u32 prio;
734 u32 cpu;
735};
736
748struct trace_sched_handler { 737struct trace_sched_handler {
749 void (*switch_event)(struct trace_switch_event *, 738 void (*switch_event)(struct trace_switch_event *,
750 struct event *, 739 struct event *,
@@ -769,6 +758,12 @@ struct trace_sched_handler {
769 int cpu, 758 int cpu,
770 u64 timestamp, 759 u64 timestamp,
771 struct thread *thread); 760 struct thread *thread);
761
762 void (*migrate_task_event)(struct trace_migrate_task_event *,
763 struct event *,
764 int cpu,
765 u64 timestamp,
766 struct thread *thread);
772}; 767};
773 768
774 769
@@ -941,9 +936,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
941 936
942static void thread_atoms_insert(struct thread *thread) 937static void thread_atoms_insert(struct thread *thread)
943{ 938{
944 struct work_atoms *atoms; 939 struct work_atoms *atoms = zalloc(sizeof(*atoms));
945
946 atoms = calloc(sizeof(*atoms), 1);
947 if (!atoms) 940 if (!atoms)
948 die("No memory"); 941 die("No memory");
949 942
@@ -975,9 +968,7 @@ add_sched_out_event(struct work_atoms *atoms,
975 char run_state, 968 char run_state,
976 u64 timestamp) 969 u64 timestamp)
977{ 970{
978 struct work_atom *atom; 971 struct work_atom *atom = zalloc(sizeof(*atom));
979
980 atom = calloc(sizeof(*atom), 1);
981 if (!atom) 972 if (!atom)
982 die("Non memory"); 973 die("Non memory");
983 974
@@ -1058,8 +1049,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
1058 die("hm, delta: %Ld < 0 ?\n", delta); 1049 die("hm, delta: %Ld < 0 ?\n", delta);
1059 1050
1060 1051
1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1052 sched_out = threads__findnew(switch_event->prev_pid);
1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1053 sched_in = threads__findnew(switch_event->next_pid);
1063 1054
1064 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1055 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1065 if (!out_events) { 1056 if (!out_events) {
@@ -1092,13 +1083,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
1092 u64 timestamp, 1083 u64 timestamp,
1093 struct thread *this_thread __used) 1084 struct thread *this_thread __used)
1094{ 1085{
1095 struct work_atoms *atoms; 1086 struct thread *thread = threads__findnew(runtime_event->pid);
1096 struct thread *thread; 1087 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1097 1088
1098 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1089 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1099
1100 thread = threads__findnew(runtime_event->pid, &threads, &last_match);
1101 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1102 if (!atoms) { 1090 if (!atoms) {
1103 thread_atoms_insert(thread); 1091 thread_atoms_insert(thread);
1104 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1092 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
@@ -1125,7 +1113,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1125 if (!wakeup_event->success) 1113 if (!wakeup_event->success)
1126 return; 1114 return;
1127 1115
1128 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); 1116 wakee = threads__findnew(wakeup_event->pid);
1129 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1117 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1130 if (!atoms) { 1118 if (!atoms) {
1131 thread_atoms_insert(wakee); 1119 thread_atoms_insert(wakee);
@@ -1139,7 +1127,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1139 1127
1140 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1128 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1141 1129
1142 if (atom->state != THREAD_SLEEPING) 1130 /*
1131 * You WILL be missing events if you've recorded only
1132 * one CPU, or are only looking at only one, so don't
1133 * make useless noise.
1134 */
1135 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1143 nr_state_machine_bugs++; 1136 nr_state_machine_bugs++;
1144 1137
1145 nr_timestamps++; 1138 nr_timestamps++;
@@ -1152,11 +1145,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1152 atom->wake_up_time = timestamp; 1145 atom->wake_up_time = timestamp;
1153} 1146}
1154 1147
1148static void
1149latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1150 struct event *__event __used,
1151 int cpu __used,
1152 u64 timestamp,
1153 struct thread *thread __used)
1154{
1155 struct work_atoms *atoms;
1156 struct work_atom *atom;
1157 struct thread *migrant;
1158
1159 /*
1160 * Only need to worry about migration when profiling one CPU.
1161 */
1162 if (profile_cpu == -1)
1163 return;
1164
1165 migrant = threads__findnew(migrate_task_event->pid);
1166 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1167 if (!atoms) {
1168 thread_atoms_insert(migrant);
1169 register_pid(migrant->pid, migrant->comm);
1170 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1171 if (!atoms)
1172 die("migration-event: Internal tree error");
1173 add_sched_out_event(atoms, 'R', timestamp);
1174 }
1175
1176 BUG_ON(list_empty(&atoms->work_list));
1177
1178 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1179 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
1180
1181 nr_timestamps++;
1182
1183 if (atom->sched_out_time > timestamp)
1184 nr_unordered_timestamps++;
1185}
1186
1155static struct trace_sched_handler lat_ops = { 1187static struct trace_sched_handler lat_ops = {
1156 .wakeup_event = latency_wakeup_event, 1188 .wakeup_event = latency_wakeup_event,
1157 .switch_event = latency_switch_event, 1189 .switch_event = latency_switch_event,
1158 .runtime_event = latency_runtime_event, 1190 .runtime_event = latency_runtime_event,
1159 .fork_event = latency_fork_event, 1191 .fork_event = latency_fork_event,
1192 .migrate_task_event = latency_migrate_task_event,
1160}; 1193};
1161 1194
1162static void output_lat_thread(struct work_atoms *work_list) 1195static void output_lat_thread(struct work_atoms *work_list)
@@ -1385,8 +1418,8 @@ map_switch_event(struct trace_switch_event *switch_event,
1385 die("hm, delta: %Ld < 0 ?\n", delta); 1418 die("hm, delta: %Ld < 0 ?\n", delta);
1386 1419
1387 1420
1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1421 sched_out = threads__findnew(switch_event->prev_pid);
1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1422 sched_in = threads__findnew(switch_event->next_pid);
1390 1423
1391 curr_thread[this_cpu] = sched_in; 1424 curr_thread[this_cpu] = sched_in;
1392 1425
@@ -1517,6 +1550,26 @@ process_sched_exit_event(struct event *event,
1517} 1550}
1518 1551
1519static void 1552static void
1553process_sched_migrate_task_event(struct raw_event_sample *raw,
1554 struct event *event,
1555 int cpu __used,
1556 u64 timestamp __used,
1557 struct thread *thread __used)
1558{
1559 struct trace_migrate_task_event migrate_task_event;
1560
1561 FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
1562
1563 FILL_ARRAY(migrate_task_event, comm, event, raw->data);
1564 FILL_FIELD(migrate_task_event, pid, event, raw->data);
1565 FILL_FIELD(migrate_task_event, prio, event, raw->data);
1566 FILL_FIELD(migrate_task_event, cpu, event, raw->data);
1567
1568 if (trace_handler->migrate_task_event)
1569 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
1570}
1571
1572static void
1520process_raw_event(event_t *raw_event __used, void *more_data, 1573process_raw_event(event_t *raw_event __used, void *more_data,
1521 int cpu, u64 timestamp, struct thread *thread) 1574 int cpu, u64 timestamp, struct thread *thread)
1522{ 1575{
@@ -1539,23 +1592,23 @@ process_raw_event(event_t *raw_event __used, void *more_data,
1539 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1592 process_sched_fork_event(raw, event, cpu, timestamp, thread);
1540 if (!strcmp(event->name, "sched_process_exit")) 1593 if (!strcmp(event->name, "sched_process_exit"))
1541 process_sched_exit_event(event, cpu, timestamp, thread); 1594 process_sched_exit_event(event, cpu, timestamp, thread);
1595 if (!strcmp(event->name, "sched_migrate_task"))
1596 process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
1542} 1597}
1543 1598
1544static int 1599static int process_sample_event(event_t *event)
1545process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1546{ 1600{
1547 char level;
1548 int show = 0;
1549 struct dso *dso = NULL;
1550 struct thread *thread; 1601 struct thread *thread;
1551 u64 ip = event->ip.ip; 1602 u64 ip = event->ip.ip;
1552 u64 timestamp = -1; 1603 u64 timestamp = -1;
1553 u32 cpu = -1; 1604 u32 cpu = -1;
1554 u64 period = 1; 1605 u64 period = 1;
1555 void *more_data = event->ip.__more_data; 1606 void *more_data = event->ip.__more_data;
1556 int cpumode;
1557 1607
1558 thread = threads__findnew(event->ip.pid, &threads, &last_match); 1608 if (!(sample_type & PERF_SAMPLE_RAW))
1609 return 0;
1610
1611 thread = threads__findnew(event->ip.pid);
1559 1612
1560 if (sample_type & PERF_SAMPLE_TIME) { 1613 if (sample_type & PERF_SAMPLE_TIME) {
1561 timestamp = *(u64 *)more_data; 1614 timestamp = *(u64 *)more_data;
@@ -1573,177 +1626,64 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1573 more_data += sizeof(u64); 1626 more_data += sizeof(u64);
1574 } 1627 }
1575 1628
1576 dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", 1629 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
1577 (void *)(offset + head),
1578 (void *)(long)(event->header.size),
1579 event->header.misc, 1630 event->header.misc,
1580 event->ip.pid, event->ip.tid, 1631 event->ip.pid, event->ip.tid,
1581 (void *)(long)ip, 1632 (void *)(long)ip,
1582 (long long)period); 1633 (long long)period);
1583 1634
1584 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1585
1586 if (thread == NULL) { 1635 if (thread == NULL) {
1587 eprintf("problem processing %d event, skipping it.\n", 1636 pr_debug("problem processing %d event, skipping it.\n",
1588 event->header.type); 1637 event->header.type);
1589 return -1; 1638 return -1;
1590 } 1639 }
1591 1640
1592 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1641 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1593
1594 if (cpumode == PERF_RECORD_MISC_KERNEL) {
1595 show = SHOW_KERNEL;
1596 level = 'k';
1597
1598 dso = kernel_dso;
1599
1600 dump_printf(" ...... dso: %s\n", dso->name);
1601
1602 } else if (cpumode == PERF_RECORD_MISC_USER) {
1603
1604 show = SHOW_USER;
1605 level = '.';
1606
1607 } else {
1608 show = SHOW_HV;
1609 level = 'H';
1610
1611 dso = hypervisor_dso;
1612 1642
1613 dump_printf(" ...... dso: [hypervisor]\n"); 1643 if (profile_cpu != -1 && profile_cpu != (int) cpu)
1614 } 1644 return 0;
1615 1645
1616 if (sample_type & PERF_SAMPLE_RAW) 1646 process_raw_event(event, more_data, cpu, timestamp, thread);
1617 process_raw_event(event, more_data, cpu, timestamp, thread);
1618 1647
1619 return 0; 1648 return 0;
1620} 1649}
1621 1650
1622static int 1651static int process_lost_event(event_t *event __used)
1623process_event(event_t *event, unsigned long offset, unsigned long head)
1624{ 1652{
1625 trace_event(event); 1653 nr_lost_chunks++;
1626 1654 nr_lost_events += event->lost.lost;
1627 nr_events++;
1628 switch (event->header.type) {
1629 case PERF_RECORD_MMAP:
1630 return 0;
1631 case PERF_RECORD_LOST:
1632 nr_lost_chunks++;
1633 nr_lost_events += event->lost.lost;
1634 return 0;
1635
1636 case PERF_RECORD_COMM:
1637 return process_comm_event(event, offset, head);
1638 1655
1639 case PERF_RECORD_EXIT ... PERF_RECORD_READ: 1656 return 0;
1640 return 0; 1657}
1641 1658
1642 case PERF_RECORD_SAMPLE: 1659static int sample_type_check(u64 type)
1643 return process_sample_event(event, offset, head); 1660{
1661 sample_type = type;
1644 1662
1645 case PERF_RECORD_MAX: 1663 if (!(sample_type & PERF_SAMPLE_RAW)) {
1646 default: 1664 fprintf(stderr,
1665 "No trace sample to read. Did you call perf record "
1666 "without -R?");
1647 return -1; 1667 return -1;
1648 } 1668 }
1649 1669
1650 return 0; 1670 return 0;
1651} 1671}
1652 1672
1673static struct perf_file_handler file_handler = {
1674 .process_sample_event = process_sample_event,
1675 .process_comm_event = event__process_comm,
1676 .process_lost_event = process_lost_event,
1677 .sample_type_check = sample_type_check,
1678};
1679
1653static int read_events(void) 1680static int read_events(void)
1654{ 1681{
1655 int ret, rc = EXIT_FAILURE; 1682 register_idle_thread();
1656 unsigned long offset = 0; 1683 register_perf_file_handler(&file_handler);
1657 unsigned long head = 0;
1658 struct stat perf_stat;
1659 event_t *event;
1660 uint32_t size;
1661 char *buf;
1662
1663 trace_report();
1664 register_idle_thread(&threads, &last_match);
1665
1666 input = open(input_name, O_RDONLY);
1667 if (input < 0) {
1668 perror("failed to open file");
1669 exit(-1);
1670 }
1671
1672 ret = fstat(input, &perf_stat);
1673 if (ret < 0) {
1674 perror("failed to stat file");
1675 exit(-1);
1676 }
1677
1678 if (!perf_stat.st_size) {
1679 fprintf(stderr, "zero-sized file, nothing to do!\n");
1680 exit(0);
1681 }
1682 header = perf_header__read(input);
1683 head = header->data_offset;
1684 sample_type = perf_header__sample_type(header);
1685
1686 if (!(sample_type & PERF_SAMPLE_RAW))
1687 die("No trace sample to read. Did you call perf record "
1688 "without -R?");
1689
1690 if (load_kernel() < 0) {
1691 perror("failed to load kernel symbols");
1692 return EXIT_FAILURE;
1693 }
1694
1695remap:
1696 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1697 MAP_SHARED, input, offset);
1698 if (buf == MAP_FAILED) {
1699 perror("failed to mmap file");
1700 exit(-1);
1701 }
1702
1703more:
1704 event = (event_t *)(buf + head);
1705
1706 size = event->header.size;
1707 if (!size)
1708 size = 8;
1709
1710 if (head + event->header.size >= page_size * mmap_window) {
1711 unsigned long shift = page_size * (head / page_size);
1712 int res;
1713
1714 res = munmap(buf, page_size * mmap_window);
1715 assert(res == 0);
1716
1717 offset += shift;
1718 head -= shift;
1719 goto remap;
1720 }
1721
1722 size = event->header.size;
1723
1724
1725 if (!size || process_event(event, offset, head) < 0) {
1726
1727 /*
1728 * assume we lost track of the stream, check alignment, and
1729 * increment a single u64 in the hope to catch on again 'soon'.
1730 */
1731
1732 if (unlikely(head & 7))
1733 head &= ~7ULL;
1734
1735 size = 8;
1736 }
1737
1738 head += size;
1739
1740 if (offset + head < (unsigned long)perf_stat.st_size)
1741 goto more;
1742
1743 rc = EXIT_SUCCESS;
1744 close(input);
1745 1684
1746 return rc; 1685 return mmap_dispatch_perf_file(&header, input_name, 0, 0,
1686 &event__cwdlen, &event__cwd);
1747} 1687}
1748 1688
1749static void print_bad_events(void) 1689static void print_bad_events(void)
@@ -1883,6 +1823,8 @@ static const struct option latency_options[] = {
1883 "sort by key(s): runtime, switch, avg, max"), 1823 "sort by key(s): runtime, switch, avg, max"),
1884 OPT_BOOLEAN('v', "verbose", &verbose, 1824 OPT_BOOLEAN('v', "verbose", &verbose,
1885 "be more verbose (show symbol address, etc)"), 1825 "be more verbose (show symbol address, etc)"),
1826 OPT_INTEGER('C', "CPU", &profile_cpu,
1827 "CPU to profile on"),
1886 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1828 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1887 "dump raw trace in ASCII"), 1829 "dump raw trace in ASCII"),
1888 OPT_END() 1830 OPT_END()
@@ -1960,8 +1902,7 @@ static int __cmd_record(int argc, const char **argv)
1960 1902
1961int cmd_sched(int argc, const char **argv, const char *prefix __used) 1903int cmd_sched(int argc, const char **argv, const char *prefix __used)
1962{ 1904{
1963 symbol__init(); 1905 symbol__init(0);
1964 page_size = getpagesize();
1965 1906
1966 argc = parse_options(argc, argv, sched_options, sched_usage, 1907 argc = parse_options(argc, argv, sched_options, sched_usage,
1967 PARSE_OPT_STOP_AT_NON_OPTION); 1908 PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3db31e7bf173..c70d72003557 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -50,15 +50,17 @@
50 50
51static struct perf_event_attr default_attrs[] = { 51static struct perf_event_attr default_attrs[] = {
52 52
53 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 53 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
54 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, 54 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
55 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 55 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
56 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 56 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
57 57
58 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 58 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
59 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 59 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
60 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, 60 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
61 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, 61 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
62 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
63 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
62 64
63}; 65};
64 66
@@ -125,6 +127,7 @@ struct stats event_res_stats[MAX_COUNTERS][3];
125struct stats runtime_nsecs_stats; 127struct stats runtime_nsecs_stats;
126struct stats walltime_nsecs_stats; 128struct stats walltime_nsecs_stats;
127struct stats runtime_cycles_stats; 129struct stats runtime_cycles_stats;
130struct stats runtime_branches_stats;
128 131
129#define MATCH_EVENT(t, c, counter) \ 132#define MATCH_EVENT(t, c, counter) \
130 (attrs[counter].type == PERF_TYPE_##t && \ 133 (attrs[counter].type == PERF_TYPE_##t && \
@@ -235,6 +238,8 @@ static void read_counter(int counter)
235 update_stats(&runtime_nsecs_stats, count[0]); 238 update_stats(&runtime_nsecs_stats, count[0]);
236 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 239 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
237 update_stats(&runtime_cycles_stats, count[0]); 240 update_stats(&runtime_cycles_stats, count[0]);
241 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
242 update_stats(&runtime_branches_stats, count[0]);
238} 243}
239 244
240static int run_perf_stat(int argc __used, const char **argv) 245static int run_perf_stat(int argc __used, const char **argv)
@@ -352,7 +357,16 @@ static void abs_printout(int counter, double avg)
352 ratio = avg / total; 357 ratio = avg / total;
353 358
354 fprintf(stderr, " # %10.3f IPC ", ratio); 359 fprintf(stderr, " # %10.3f IPC ", ratio);
355 } else { 360 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
361 runtime_branches_stats.n != 0) {
362 total = avg_stats(&runtime_branches_stats);
363
364 if (total)
365 ratio = avg * 100 / total;
366
367 fprintf(stderr, " # %10.3f %% ", ratio);
368
369 } else if (runtime_nsecs_stats.n != 0) {
356 total = avg_stats(&runtime_nsecs_stats); 370 total = avg_stats(&runtime_nsecs_stats);
357 371
358 if (total) 372 if (total)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index e8a510d935e5..cb58b6605fcc 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -29,14 +29,14 @@
29#include "util/header.h" 29#include "util/header.h"
30#include "util/parse-options.h" 30#include "util/parse-options.h"
31#include "util/parse-events.h" 31#include "util/parse-events.h"
32#include "util/event.h"
33#include "util/data_map.h"
32#include "util/svghelper.h" 34#include "util/svghelper.h"
33 35
34static char const *input_name = "perf.data"; 36static char const *input_name = "perf.data";
35static char const *output_name = "output.svg"; 37static char const *output_name = "output.svg";
36 38
37 39
38static unsigned long page_size;
39static unsigned long mmap_window = 32;
40static u64 sample_type; 40static u64 sample_type;
41 41
42static unsigned int numcpus; 42static unsigned int numcpus;
@@ -49,8 +49,6 @@ static u64 first_time, last_time;
49static int power_only; 49static int power_only;
50 50
51 51
52static struct perf_header *header;
53
54struct per_pid; 52struct per_pid;
55struct per_pidcomm; 53struct per_pidcomm;
56 54
@@ -153,6 +151,17 @@ static struct wake_event *wake_events;
153 151
154struct sample_wrapper *all_samples; 152struct sample_wrapper *all_samples;
155 153
154
155struct process_filter;
156struct process_filter {
157 char *name;
158 int pid;
159 struct process_filter *next;
160};
161
162static struct process_filter *process_filter;
163
164
156static struct per_pid *find_create_pid(int pid) 165static struct per_pid *find_create_pid(int pid)
157{ 166{
158 struct per_pid *cursor = all_data; 167 struct per_pid *cursor = all_data;
@@ -763,11 +772,11 @@ static void draw_wakeups(void)
763 c = p->all; 772 c = p->all;
764 while (c) { 773 while (c) {
765 if (c->Y && c->start_time <= we->time && c->end_time >= we->time) { 774 if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
766 if (p->pid == we->waker) { 775 if (p->pid == we->waker && !from) {
767 from = c->Y; 776 from = c->Y;
768 task_from = strdup(c->comm); 777 task_from = strdup(c->comm);
769 } 778 }
770 if (p->pid == we->wakee) { 779 if (p->pid == we->wakee && !to) {
771 to = c->Y; 780 to = c->Y;
772 task_to = strdup(c->comm); 781 task_to = strdup(c->comm);
773 } 782 }
@@ -882,12 +891,89 @@ static void draw_process_bars(void)
882 } 891 }
883} 892}
884 893
894static void add_process_filter(const char *string)
895{
896 struct process_filter *filt;
897 int pid;
898
899 pid = strtoull(string, NULL, 10);
900 filt = malloc(sizeof(struct process_filter));
901 if (!filt)
902 return;
903
904 filt->name = strdup(string);
905 filt->pid = pid;
906 filt->next = process_filter;
907
908 process_filter = filt;
909}
910
911static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
912{
913 struct process_filter *filt;
914 if (!process_filter)
915 return 1;
916
917 filt = process_filter;
918 while (filt) {
919 if (filt->pid && p->pid == filt->pid)
920 return 1;
921 if (strcmp(filt->name, c->comm) == 0)
922 return 1;
923 filt = filt->next;
924 }
925 return 0;
926}
927
928static int determine_display_tasks_filtered(void)
929{
930 struct per_pid *p;
931 struct per_pidcomm *c;
932 int count = 0;
933
934 p = all_data;
935 while (p) {
936 p->display = 0;
937 if (p->start_time == 1)
938 p->start_time = first_time;
939
940 /* no exit marker, task kept running to the end */
941 if (p->end_time == 0)
942 p->end_time = last_time;
943
944 c = p->all;
945
946 while (c) {
947 c->display = 0;
948
949 if (c->start_time == 1)
950 c->start_time = first_time;
951
952 if (passes_filter(p, c)) {
953 c->display = 1;
954 p->display = 1;
955 count++;
956 }
957
958 if (c->end_time == 0)
959 c->end_time = last_time;
960
961 c = c->next;
962 }
963 p = p->next;
964 }
965 return count;
966}
967
885static int determine_display_tasks(u64 threshold) 968static int determine_display_tasks(u64 threshold)
886{ 969{
887 struct per_pid *p; 970 struct per_pid *p;
888 struct per_pidcomm *c; 971 struct per_pidcomm *c;
889 int count = 0; 972 int count = 0;
890 973
974 if (process_filter)
975 return determine_display_tasks_filtered();
976
891 p = all_data; 977 p = all_data;
892 while (p) { 978 while (p) {
893 p->display = 0; 979 p->display = 0;
@@ -957,36 +1043,6 @@ static void write_svg_file(const char *filename)
957 svg_close(); 1043 svg_close();
958} 1044}
959 1045
960static int
961process_event(event_t *event)
962{
963
964 switch (event->header.type) {
965
966 case PERF_RECORD_COMM:
967 return process_comm_event(event);
968 case PERF_RECORD_FORK:
969 return process_fork_event(event);
970 case PERF_RECORD_EXIT:
971 return process_exit_event(event);
972 case PERF_RECORD_SAMPLE:
973 return queue_sample_event(event);
974
975 /*
976 * We dont process them right now but they are fine:
977 */
978 case PERF_RECORD_MMAP:
979 case PERF_RECORD_THROTTLE:
980 case PERF_RECORD_UNTHROTTLE:
981 return 0;
982
983 default:
984 return -1;
985 }
986
987 return 0;
988}
989
990static void process_samples(void) 1046static void process_samples(void)
991{ 1047{
992 struct sample_wrapper *cursor; 1048 struct sample_wrapper *cursor;
@@ -1002,107 +1058,38 @@ static void process_samples(void)
1002 } 1058 }
1003} 1059}
1004 1060
1005 1061static int sample_type_check(u64 type)
1006static int __cmd_timechart(void)
1007{ 1062{
1008 int ret, rc = EXIT_FAILURE; 1063 sample_type = type;
1009 unsigned long offset = 0;
1010 unsigned long head, shift;
1011 struct stat statbuf;
1012 event_t *event;
1013 uint32_t size;
1014 char *buf;
1015 int input;
1016
1017 input = open(input_name, O_RDONLY);
1018 if (input < 0) {
1019 fprintf(stderr, " failed to open file: %s", input_name);
1020 if (!strcmp(input_name, "perf.data"))
1021 fprintf(stderr, " (try 'perf record' first)");
1022 fprintf(stderr, "\n");
1023 exit(-1);
1024 }
1025
1026 ret = fstat(input, &statbuf);
1027 if (ret < 0) {
1028 perror("failed to stat file");
1029 exit(-1);
1030 }
1031
1032 if (!statbuf.st_size) {
1033 fprintf(stderr, "zero-sized file, nothing to do!\n");
1034 exit(0);
1035 }
1036
1037 header = perf_header__read(input);
1038 head = header->data_offset;
1039
1040 sample_type = perf_header__sample_type(header);
1041 1064
1042 shift = page_size * (head / page_size); 1065 if (!(sample_type & PERF_SAMPLE_RAW)) {
1043 offset += shift; 1066 fprintf(stderr, "No trace samples found in the file.\n"
1044 head -= shift; 1067 "Have you used 'perf timechart record' to record it?\n");
1045 1068 return -1;
1046remap:
1047 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1048 MAP_SHARED, input, offset);
1049 if (buf == MAP_FAILED) {
1050 perror("failed to mmap file");
1051 exit(-1);
1052 }
1053
1054more:
1055 event = (event_t *)(buf + head);
1056
1057 size = event->header.size;
1058 if (!size)
1059 size = 8;
1060
1061 if (head + event->header.size >= page_size * mmap_window) {
1062 int ret2;
1063
1064 shift = page_size * (head / page_size);
1065
1066 ret2 = munmap(buf, page_size * mmap_window);
1067 assert(ret2 == 0);
1068
1069 offset += shift;
1070 head -= shift;
1071 goto remap;
1072 }
1073
1074 size = event->header.size;
1075
1076 if (!size || process_event(event) < 0) {
1077
1078 printf("%p [%p]: skipping unknown header type: %d\n",
1079 (void *)(offset + head),
1080 (void *)(long)(event->header.size),
1081 event->header.type);
1082
1083 /*
1084 * assume we lost track of the stream, check alignment, and
1085 * increment a single u64 in the hope to catch on again 'soon'.
1086 */
1087
1088 if (unlikely(head & 7))
1089 head &= ~7ULL;
1090
1091 size = 8;
1092 } 1069 }
1093 1070
1094 head += size; 1071 return 0;
1072}
1095 1073
1096 if (offset + head >= header->data_offset + header->data_size) 1074static struct perf_file_handler file_handler = {
1097 goto done; 1075 .process_comm_event = process_comm_event,
1076 .process_fork_event = process_fork_event,
1077 .process_exit_event = process_exit_event,
1078 .process_sample_event = queue_sample_event,
1079 .sample_type_check = sample_type_check,
1080};
1098 1081
1099 if (offset + head < (unsigned long)statbuf.st_size) 1082static int __cmd_timechart(void)
1100 goto more; 1083{
1084 struct perf_header *header;
1085 int ret;
1101 1086
1102done: 1087 register_perf_file_handler(&file_handler);
1103 rc = EXIT_SUCCESS;
1104 close(input);
1105 1088
1089 ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
1090 &event__cwdlen, &event__cwd);
1091 if (ret)
1092 return EXIT_FAILURE;
1106 1093
1107 process_samples(); 1094 process_samples();
1108 1095
@@ -1112,9 +1099,10 @@ done:
1112 1099
1113 write_svg_file(output_name); 1100 write_svg_file(output_name);
1114 1101
1115 printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name); 1102 pr_info("Written %2.1f seconds of trace to %s.\n",
1103 (last_time - first_time) / 1000000000.0, output_name);
1116 1104
1117 return rc; 1105 return EXIT_SUCCESS;
1118} 1106}
1119 1107
1120static const char * const timechart_usage[] = { 1108static const char * const timechart_usage[] = {
@@ -1153,6 +1141,14 @@ static int __cmd_record(int argc, const char **argv)
1153 return cmd_record(i, rec_argv, NULL); 1141 return cmd_record(i, rec_argv, NULL);
1154} 1142}
1155 1143
1144static int
1145parse_process(const struct option *opt __used, const char *arg, int __used unset)
1146{
1147 if (arg)
1148 add_process_filter(arg);
1149 return 0;
1150}
1151
1156static const struct option options[] = { 1152static const struct option options[] = {
1157 OPT_STRING('i', "input", &input_name, "file", 1153 OPT_STRING('i', "input", &input_name, "file",
1158 "input file name"), 1154 "input file name"),
@@ -1160,17 +1156,18 @@ static const struct option options[] = {
1160 "output file name"), 1156 "output file name"),
1161 OPT_INTEGER('w', "width", &svg_page_width, 1157 OPT_INTEGER('w', "width", &svg_page_width,
1162 "page width"), 1158 "page width"),
1163 OPT_BOOLEAN('p', "power-only", &power_only, 1159 OPT_BOOLEAN('P', "power-only", &power_only,
1164 "output power data only"), 1160 "output power data only"),
1161 OPT_CALLBACK('p', "process", NULL, "process",
1162 "process selector. Pass a pid or process name.",
1163 parse_process),
1165 OPT_END() 1164 OPT_END()
1166}; 1165};
1167 1166
1168 1167
1169int cmd_timechart(int argc, const char **argv, const char *prefix __used) 1168int cmd_timechart(int argc, const char **argv, const char *prefix __used)
1170{ 1169{
1171 symbol__init(); 1170 symbol__init(0);
1172
1173 page_size = getpagesize();
1174 1171
1175 argc = parse_options(argc, argv, options, timechart_usage, 1172 argc = parse_options(argc, argv, options, timechart_usage,
1176 PARSE_OPT_STOP_AT_NON_OPTION); 1173 PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e23bc74e734f..e0a374d0e43a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,6 +22,7 @@
22 22
23#include "util/symbol.h" 23#include "util/symbol.h"
24#include "util/color.h" 24#include "util/color.h"
25#include "util/thread.h"
25#include "util/util.h" 26#include "util/util.h"
26#include <linux/rbtree.h> 27#include <linux/rbtree.h>
27#include "util/parse-options.h" 28#include "util/parse-options.h"
@@ -54,26 +55,31 @@
54 55
55static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 56static int fd[MAX_NR_CPUS][MAX_COUNTERS];
56 57
57static int system_wide = 0; 58static int system_wide = 0;
58 59
59static int default_interval = 100000; 60static int default_interval = 0;
60 61
61static int count_filter = 5; 62static int count_filter = 5;
62static int print_entries = 15; 63static int print_entries;
63 64
64static int target_pid = -1; 65static int target_pid = -1;
65static int inherit = 0; 66static int inherit = 0;
66static int profile_cpu = -1; 67static int profile_cpu = -1;
67static int nr_cpus = 0; 68static int nr_cpus = 0;
68static unsigned int realtime_prio = 0; 69static unsigned int realtime_prio = 0;
69static int group = 0; 70static int group = 0;
70static unsigned int page_size; 71static unsigned int page_size;
71static unsigned int mmap_pages = 16; 72static unsigned int mmap_pages = 16;
72static int freq = 0; 73static int freq = 1000; /* 1 KHz */
73 74
74static int delay_secs = 2; 75static int delay_secs = 2;
75static int zero; 76static int zero = 0;
76static int dump_symtab; 77static int dump_symtab = 0;
78
79static bool hide_kernel_symbols = false;
80static bool hide_user_symbols = false;
81static struct winsize winsize;
82struct symbol_conf symbol_conf;
77 83
78/* 84/*
79 * Source 85 * Source
@@ -86,83 +92,126 @@ struct source_line {
86 struct source_line *next; 92 struct source_line *next;
87}; 93};
88 94
89static char *sym_filter = NULL; 95static char *sym_filter = NULL;
90struct sym_entry *sym_filter_entry = NULL; 96struct sym_entry *sym_filter_entry = NULL;
91static int sym_pcnt_filter = 5; 97static int sym_pcnt_filter = 5;
92static int sym_counter = 0; 98static int sym_counter = 0;
93static int display_weighted = -1; 99static int display_weighted = -1;
94 100
95/* 101/*
96 * Symbols 102 * Symbols
97 */ 103 */
98 104
99static u64 min_ip; 105struct sym_entry_source {
100static u64 max_ip = -1ll; 106 struct source_line *source;
107 struct source_line *lines;
108 struct source_line **lines_tail;
109 pthread_mutex_t lock;
110};
101 111
102struct sym_entry { 112struct sym_entry {
103 struct rb_node rb_node; 113 struct rb_node rb_node;
104 struct list_head node; 114 struct list_head node;
105 unsigned long count[MAX_COUNTERS];
106 unsigned long snap_count; 115 unsigned long snap_count;
107 double weight; 116 double weight;
108 int skip; 117 int skip;
109 struct source_line *source; 118 u16 name_len;
110 struct source_line *lines; 119 u8 origin;
111 struct source_line **lines_tail; 120 struct map *map;
112 pthread_mutex_t source_lock; 121 struct sym_entry_source *src;
122 unsigned long count[0];
113}; 123};
114 124
115/* 125/*
116 * Source functions 126 * Source functions
117 */ 127 */
118 128
129static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
130{
131 return ((void *)self) + symbol_conf.priv_size;
132}
133
134static void get_term_dimensions(struct winsize *ws)
135{
136 char *s = getenv("LINES");
137
138 if (s != NULL) {
139 ws->ws_row = atoi(s);
140 s = getenv("COLUMNS");
141 if (s != NULL) {
142 ws->ws_col = atoi(s);
143 if (ws->ws_row && ws->ws_col)
144 return;
145 }
146 }
147#ifdef TIOCGWINSZ
148 if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
149 ws->ws_row && ws->ws_col)
150 return;
151#endif
152 ws->ws_row = 25;
153 ws->ws_col = 80;
154}
155
156static void update_print_entries(struct winsize *ws)
157{
158 print_entries = ws->ws_row;
159
160 if (print_entries > 9)
161 print_entries -= 9;
162}
163
164static void sig_winch_handler(int sig __used)
165{
166 get_term_dimensions(&winsize);
167 update_print_entries(&winsize);
168}
169
119static void parse_source(struct sym_entry *syme) 170static void parse_source(struct sym_entry *syme)
120{ 171{
121 struct symbol *sym; 172 struct symbol *sym;
122 struct module *module; 173 struct sym_entry_source *source;
123 struct section *section = NULL; 174 struct map *map;
124 FILE *file; 175 FILE *file;
125 char command[PATH_MAX*2]; 176 char command[PATH_MAX*2];
126 const char *path = vmlinux_name; 177 const char *path;
127 u64 start, end, len; 178 u64 len;
128 179
129 if (!syme) 180 if (!syme)
130 return; 181 return;
131 182
132 if (syme->lines) { 183 if (syme->src == NULL) {
133 pthread_mutex_lock(&syme->source_lock); 184 syme->src = zalloc(sizeof(*source));
134 goto out_assign; 185 if (syme->src == NULL)
186 return;
187 pthread_mutex_init(&syme->src->lock, NULL);
135 } 188 }
136 189
137 sym = (struct symbol *)(syme + 1); 190 source = syme->src;
138 module = sym->module;
139
140 if (module)
141 path = module->path;
142 if (!path)
143 return;
144
145 start = sym->obj_start;
146 if (!start)
147 start = sym->start;
148 191
149 if (module) { 192 if (source->lines) {
150 section = module->sections->find_section(module->sections, ".text"); 193 pthread_mutex_lock(&source->lock);
151 if (section) 194 goto out_assign;
152 start -= section->vma;
153 } 195 }
154 196
155 end = start + sym->end - sym->start + 1; 197 sym = sym_entry__symbol(syme);
198 map = syme->map;
199 path = map->dso->long_name;
200
156 len = sym->end - sym->start; 201 len = sym->end - sym->start;
157 202
158 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); 203 sprintf(command,
204 "objdump --start-address=0x%016Lx "
205 "--stop-address=0x%016Lx -dS %s",
206 map->unmap_ip(map, sym->start),
207 map->unmap_ip(map, sym->end), path);
159 208
160 file = popen(command, "r"); 209 file = popen(command, "r");
161 if (!file) 210 if (!file)
162 return; 211 return;
163 212
164 pthread_mutex_lock(&syme->source_lock); 213 pthread_mutex_lock(&source->lock);
165 syme->lines_tail = &syme->lines; 214 source->lines_tail = &source->lines;
166 while (!feof(file)) { 215 while (!feof(file)) {
167 struct source_line *src; 216 struct source_line *src;
168 size_t dummy = 0; 217 size_t dummy = 0;
@@ -182,24 +231,22 @@ static void parse_source(struct sym_entry *syme)
182 *c = 0; 231 *c = 0;
183 232
184 src->next = NULL; 233 src->next = NULL;
185 *syme->lines_tail = src; 234 *source->lines_tail = src;
186 syme->lines_tail = &src->next; 235 source->lines_tail = &src->next;
187 236
188 if (strlen(src->line)>8 && src->line[8] == ':') { 237 if (strlen(src->line)>8 && src->line[8] == ':') {
189 src->eip = strtoull(src->line, NULL, 16); 238 src->eip = strtoull(src->line, NULL, 16);
190 if (section) 239 src->eip = map->unmap_ip(map, src->eip);
191 src->eip += section->vma;
192 } 240 }
193 if (strlen(src->line)>8 && src->line[16] == ':') { 241 if (strlen(src->line)>8 && src->line[16] == ':') {
194 src->eip = strtoull(src->line, NULL, 16); 242 src->eip = strtoull(src->line, NULL, 16);
195 if (section) 243 src->eip = map->unmap_ip(map, src->eip);
196 src->eip += section->vma;
197 } 244 }
198 } 245 }
199 pclose(file); 246 pclose(file);
200out_assign: 247out_assign:
201 sym_filter_entry = syme; 248 sym_filter_entry = syme;
202 pthread_mutex_unlock(&syme->source_lock); 249 pthread_mutex_unlock(&source->lock);
203} 250}
204 251
205static void __zero_source_counters(struct sym_entry *syme) 252static void __zero_source_counters(struct sym_entry *syme)
@@ -207,7 +254,7 @@ static void __zero_source_counters(struct sym_entry *syme)
207 int i; 254 int i;
208 struct source_line *line; 255 struct source_line *line;
209 256
210 line = syme->lines; 257 line = syme->src->lines;
211 while (line) { 258 while (line) {
212 for (i = 0; i < nr_counters; i++) 259 for (i = 0; i < nr_counters; i++)
213 line->count[i] = 0; 260 line->count[i] = 0;
@@ -222,13 +269,13 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
222 if (syme != sym_filter_entry) 269 if (syme != sym_filter_entry)
223 return; 270 return;
224 271
225 if (pthread_mutex_trylock(&syme->source_lock)) 272 if (pthread_mutex_trylock(&syme->src->lock))
226 return; 273 return;
227 274
228 if (!syme->source) 275 if (syme->src == NULL || syme->src->source == NULL)
229 goto out_unlock; 276 goto out_unlock;
230 277
231 for (line = syme->lines; line; line = line->next) { 278 for (line = syme->src->lines; line; line = line->next) {
232 if (line->eip == ip) { 279 if (line->eip == ip) {
233 line->count[counter]++; 280 line->count[counter]++;
234 break; 281 break;
@@ -237,32 +284,25 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
237 break; 284 break;
238 } 285 }
239out_unlock: 286out_unlock:
240 pthread_mutex_unlock(&syme->source_lock); 287 pthread_mutex_unlock(&syme->src->lock);
241} 288}
242 289
243static void lookup_sym_source(struct sym_entry *syme) 290static void lookup_sym_source(struct sym_entry *syme)
244{ 291{
245 struct symbol *symbol = (struct symbol *)(syme + 1); 292 struct symbol *symbol = sym_entry__symbol(syme);
246 struct source_line *line; 293 struct source_line *line;
247 char pattern[PATH_MAX]; 294 char pattern[PATH_MAX];
248 char *idx;
249 295
250 sprintf(pattern, "<%s>:", symbol->name); 296 sprintf(pattern, "<%s>:", symbol->name);
251 297
252 if (symbol->module) { 298 pthread_mutex_lock(&syme->src->lock);
253 idx = strstr(pattern, "\t"); 299 for (line = syme->src->lines; line; line = line->next) {
254 if (idx)
255 *idx = 0;
256 }
257
258 pthread_mutex_lock(&syme->source_lock);
259 for (line = syme->lines; line; line = line->next) {
260 if (strstr(line->line, pattern)) { 300 if (strstr(line->line, pattern)) {
261 syme->source = line; 301 syme->src->source = line;
262 break; 302 break;
263 } 303 }
264 } 304 }
265 pthread_mutex_unlock(&syme->source_lock); 305 pthread_mutex_unlock(&syme->src->lock);
266} 306}
267 307
268static void show_lines(struct source_line *queue, int count, int total) 308static void show_lines(struct source_line *queue, int count, int total)
@@ -292,24 +332,24 @@ static void show_details(struct sym_entry *syme)
292 if (!syme) 332 if (!syme)
293 return; 333 return;
294 334
295 if (!syme->source) 335 if (!syme->src->source)
296 lookup_sym_source(syme); 336 lookup_sym_source(syme);
297 337
298 if (!syme->source) 338 if (!syme->src->source)
299 return; 339 return;
300 340
301 symbol = (struct symbol *)(syme + 1); 341 symbol = sym_entry__symbol(syme);
302 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 342 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
303 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 343 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
304 344
305 pthread_mutex_lock(&syme->source_lock); 345 pthread_mutex_lock(&syme->src->lock);
306 line = syme->source; 346 line = syme->src->source;
307 while (line) { 347 while (line) {
308 total += line->count[sym_counter]; 348 total += line->count[sym_counter];
309 line = line->next; 349 line = line->next;
310 } 350 }
311 351
312 line = syme->source; 352 line = syme->src->source;
313 while (line) { 353 while (line) {
314 float pcnt = 0.0; 354 float pcnt = 0.0;
315 355
@@ -334,13 +374,13 @@ static void show_details(struct sym_entry *syme)
334 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 374 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
335 line = line->next; 375 line = line->next;
336 } 376 }
337 pthread_mutex_unlock(&syme->source_lock); 377 pthread_mutex_unlock(&syme->src->lock);
338 if (more) 378 if (more)
339 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 379 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
340} 380}
341 381
342/* 382/*
343 * Symbols will be added here in record_ip and will get out 383 * Symbols will be added here in event__process_sample and will get out
344 * after decayed. 384 * after decayed.
345 */ 385 */
346static LIST_HEAD(active_symbols); 386static LIST_HEAD(active_symbols);
@@ -411,6 +451,8 @@ static void print_sym_table(void)
411 struct sym_entry *syme, *n; 451 struct sym_entry *syme, *n;
412 struct rb_root tmp = RB_ROOT; 452 struct rb_root tmp = RB_ROOT;
413 struct rb_node *nd; 453 struct rb_node *nd;
454 int sym_width = 0, dso_width = 0, max_dso_width;
455 const int win_width = winsize.ws_col - 1;
414 456
415 samples = userspace_samples = 0; 457 samples = userspace_samples = 0;
416 458
@@ -422,6 +464,14 @@ static void print_sym_table(void)
422 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 464 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
423 syme->snap_count = syme->count[snap]; 465 syme->snap_count = syme->count[snap];
424 if (syme->snap_count != 0) { 466 if (syme->snap_count != 0) {
467
468 if ((hide_user_symbols &&
469 syme->origin == PERF_RECORD_MISC_USER) ||
470 (hide_kernel_symbols &&
471 syme->origin == PERF_RECORD_MISC_KERNEL)) {
472 list_remove_active_sym(syme);
473 continue;
474 }
425 syme->weight = sym_weight(syme); 475 syme->weight = sym_weight(syme);
426 rb_insert_active_sym(&tmp, syme); 476 rb_insert_active_sym(&tmp, syme);
427 sum_ksamples += syme->snap_count; 477 sum_ksamples += syme->snap_count;
@@ -434,8 +484,7 @@ static void print_sym_table(void)
434 484
435 puts(CONSOLE_CLEAR); 485 puts(CONSOLE_CLEAR);
436 486
437 printf( 487 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
438"------------------------------------------------------------------------------\n");
439 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 488 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [",
440 samples_per_sec, 489 samples_per_sec,
441 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 490 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
@@ -473,33 +522,57 @@ static void print_sym_table(void)
473 printf(", %d CPUs)\n", nr_cpus); 522 printf(", %d CPUs)\n", nr_cpus);
474 } 523 }
475 524
476 printf("------------------------------------------------------------------------------\n\n"); 525 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
477 526
478 if (sym_filter_entry) { 527 if (sym_filter_entry) {
479 show_details(sym_filter_entry); 528 show_details(sym_filter_entry);
480 return; 529 return;
481 } 530 }
482 531
532 /*
533 * Find the longest symbol name that will be displayed
534 */
535 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
536 syme = rb_entry(nd, struct sym_entry, rb_node);
537 if (++printed > print_entries ||
538 (int)syme->snap_count < count_filter)
539 continue;
540
541 if (syme->map->dso->long_name_len > dso_width)
542 dso_width = syme->map->dso->long_name_len;
543
544 if (syme->name_len > sym_width)
545 sym_width = syme->name_len;
546 }
547
548 printed = 0;
549
550 max_dso_width = winsize.ws_col - sym_width - 29;
551 if (dso_width > max_dso_width)
552 dso_width = max_dso_width;
553 putchar('\n');
483 if (nr_counters == 1) 554 if (nr_counters == 1)
484 printf(" samples pcnt"); 555 printf(" samples pcnt");
485 else 556 else
486 printf(" weight samples pcnt"); 557 printf(" weight samples pcnt");
487 558
488 if (verbose) 559 if (verbose)
489 printf(" RIP "); 560 printf(" RIP ");
490 printf(" kernel function\n"); 561 printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
491 printf(" %s _______ _____", 562 printf(" %s _______ _____",
492 nr_counters == 1 ? " " : "______"); 563 nr_counters == 1 ? " " : "______");
493 if (verbose) 564 if (verbose)
494 printf(" ________________"); 565 printf(" ________________");
495 printf(" _______________\n\n"); 566 printf(" %-*.*s", sym_width, sym_width, graph_line);
567 printf(" %-*.*s", dso_width, dso_width, graph_line);
568 puts("\n");
496 569
497 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 570 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
498 struct symbol *sym; 571 struct symbol *sym;
499 double pcnt; 572 double pcnt;
500 573
501 syme = rb_entry(nd, struct sym_entry, rb_node); 574 syme = rb_entry(nd, struct sym_entry, rb_node);
502 sym = (struct symbol *)(syme + 1); 575 sym = sym_entry__symbol(syme);
503 576
504 if (++printed > print_entries || (int)syme->snap_count < count_filter) 577 if (++printed > print_entries || (int)syme->snap_count < count_filter)
505 continue; 578 continue;
@@ -508,17 +581,18 @@ static void print_sym_table(void)
508 sum_ksamples)); 581 sum_ksamples));
509 582
510 if (nr_counters == 1 || !display_weighted) 583 if (nr_counters == 1 || !display_weighted)
511 printf("%20.2f - ", syme->weight); 584 printf("%20.2f ", syme->weight);
512 else 585 else
513 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 586 printf("%9.1f %10ld ", syme->weight, syme->snap_count);
514 587
515 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 588 percent_color_fprintf(stdout, "%4.1f%%", pcnt);
516 if (verbose) 589 if (verbose)
517 printf(" - %016llx", sym->start); 590 printf(" %016llx", sym->start);
518 printf(" : %s", sym->name); 591 printf(" %-*.*s", sym_width, sym_width, sym->name);
519 if (sym->module) 592 printf(" %-*.*s\n", dso_width, dso_width,
520 printf("\t[%s]", sym->module->name); 593 dso_width >= syme->map->dso->long_name_len ?
521 printf("\n"); 594 syme->map->dso->long_name :
595 syme->map->dso->short_name);
522 } 596 }
523} 597}
524 598
@@ -565,10 +639,10 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
565 639
566 /* zero counters of active symbol */ 640 /* zero counters of active symbol */
567 if (syme) { 641 if (syme) {
568 pthread_mutex_lock(&syme->source_lock); 642 pthread_mutex_lock(&syme->src->lock);
569 __zero_source_counters(syme); 643 __zero_source_counters(syme);
570 *target = NULL; 644 *target = NULL;
571 pthread_mutex_unlock(&syme->source_lock); 645 pthread_mutex_unlock(&syme->src->lock);
572 } 646 }
573 647
574 fprintf(stdout, "\n%s: ", msg); 648 fprintf(stdout, "\n%s: ", msg);
@@ -584,7 +658,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
584 pthread_mutex_unlock(&active_symbols_lock); 658 pthread_mutex_unlock(&active_symbols_lock);
585 659
586 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 660 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
587 struct symbol *sym = (struct symbol *)(syme + 1); 661 struct symbol *sym = sym_entry__symbol(syme);
588 662
589 if (!strcmp(buf, sym->name)) { 663 if (!strcmp(buf, sym->name)) {
590 found = syme; 664 found = syme;
@@ -608,7 +682,7 @@ static void print_mapped_keys(void)
608 char *name = NULL; 682 char *name = NULL;
609 683
610 if (sym_filter_entry) { 684 if (sym_filter_entry) {
611 struct symbol *sym = (struct symbol *)(sym_filter_entry+1); 685 struct symbol *sym = sym_entry__symbol(sym_filter_entry);
612 name = sym->name; 686 name = sym->name;
613 } 687 }
614 688
@@ -621,7 +695,7 @@ static void print_mapped_keys(void)
621 695
622 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 696 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
623 697
624 if (vmlinux_name) { 698 if (symbol_conf.vmlinux_name) {
625 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 699 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
626 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 700 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
627 fprintf(stdout, "\t[S] stop annotation.\n"); 701 fprintf(stdout, "\t[S] stop annotation.\n");
@@ -630,6 +704,12 @@ static void print_mapped_keys(void)
630 if (nr_counters > 1) 704 if (nr_counters > 1)
631 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 705 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
632 706
707 fprintf(stdout,
708 "\t[K] hide kernel_symbols symbols. \t(%s)\n",
709 hide_kernel_symbols ? "yes" : "no");
710 fprintf(stdout,
711 "\t[U] hide user symbols. \t(%s)\n",
712 hide_user_symbols ? "yes" : "no");
633 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 713 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
634 fprintf(stdout, "\t[qQ] quit.\n"); 714 fprintf(stdout, "\t[qQ] quit.\n");
635} 715}
@@ -643,6 +723,8 @@ static int key_mapped(int c)
643 case 'z': 723 case 'z':
644 case 'q': 724 case 'q':
645 case 'Q': 725 case 'Q':
726 case 'K':
727 case 'U':
646 return 1; 728 return 1;
647 case 'E': 729 case 'E':
648 case 'w': 730 case 'w':
@@ -650,7 +732,7 @@ static int key_mapped(int c)
650 case 'F': 732 case 'F':
651 case 's': 733 case 's':
652 case 'S': 734 case 'S':
653 return vmlinux_name ? 1 : 0; 735 return symbol_conf.vmlinux_name ? 1 : 0;
654 default: 736 default:
655 break; 737 break;
656 } 738 }
@@ -691,6 +773,11 @@ static void handle_keypress(int c)
691 break; 773 break;
692 case 'e': 774 case 'e':
693 prompt_integer(&print_entries, "Enter display entries (lines)"); 775 prompt_integer(&print_entries, "Enter display entries (lines)");
776 if (print_entries == 0) {
777 sig_winch_handler(SIGWINCH);
778 signal(SIGWINCH, sig_winch_handler);
779 } else
780 signal(SIGWINCH, SIG_DFL);
694 break; 781 break;
695 case 'E': 782 case 'E':
696 if (nr_counters > 1) { 783 if (nr_counters > 1) {
@@ -715,9 +802,14 @@ static void handle_keypress(int c)
715 case 'F': 802 case 'F':
716 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 803 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
717 break; 804 break;
805 case 'K':
806 hide_kernel_symbols = !hide_kernel_symbols;
807 break;
718 case 'q': 808 case 'q':
719 case 'Q': 809 case 'Q':
720 printf("exiting.\n"); 810 printf("exiting.\n");
811 if (dump_symtab)
812 dsos__fprintf(stderr);
721 exit(0); 813 exit(0);
722 case 's': 814 case 's':
723 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 815 prompt_symbol(&sym_filter_entry, "Enter details symbol");
@@ -728,12 +820,15 @@ static void handle_keypress(int c)
728 else { 820 else {
729 struct sym_entry *syme = sym_filter_entry; 821 struct sym_entry *syme = sym_filter_entry;
730 822
731 pthread_mutex_lock(&syme->source_lock); 823 pthread_mutex_lock(&syme->src->lock);
732 sym_filter_entry = NULL; 824 sym_filter_entry = NULL;
733 __zero_source_counters(syme); 825 __zero_source_counters(syme);
734 pthread_mutex_unlock(&syme->source_lock); 826 pthread_mutex_unlock(&syme->src->lock);
735 } 827 }
736 break; 828 break;
829 case 'U':
830 hide_user_symbols = !hide_user_symbols;
831 break;
737 case 'w': 832 case 'w':
738 display_weighted = ~display_weighted; 833 display_weighted = ~display_weighted;
739 break; 834 break;
@@ -790,7 +885,7 @@ static const char *skip_symbols[] = {
790 NULL 885 NULL
791}; 886};
792 887
793static int symbol_filter(struct dso *self, struct symbol *sym) 888static int symbol_filter(struct map *map, struct symbol *sym)
794{ 889{
795 struct sym_entry *syme; 890 struct sym_entry *syme;
796 const char *name = sym->name; 891 const char *name = sym->name;
@@ -812,8 +907,9 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
812 strstr(name, "_text_end")) 907 strstr(name, "_text_end"))
813 return 1; 908 return 1;
814 909
815 syme = dso__sym_priv(self, sym); 910 syme = symbol__priv(sym);
816 pthread_mutex_init(&syme->source_lock, NULL); 911 syme->map = map;
912 syme->src = NULL;
817 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) 913 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
818 sym_filter_entry = syme; 914 sym_filter_entry = syme;
819 915
@@ -824,75 +920,65 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
824 } 920 }
825 } 921 }
826 922
827 return 0; 923 if (!syme->skip)
828} 924 syme->name_len = strlen(sym->name);
829
830static int parse_symbols(void)
831{
832 struct rb_node *node;
833 struct symbol *sym;
834 int use_modules = vmlinux_name ? 1 : 0;
835
836 kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
837 if (kernel_dso == NULL)
838 return -1;
839
840 if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0)
841 goto out_delete_dso;
842
843 node = rb_first(&kernel_dso->syms);
844 sym = rb_entry(node, struct symbol, rb_node);
845 min_ip = sym->start;
846
847 node = rb_last(&kernel_dso->syms);
848 sym = rb_entry(node, struct symbol, rb_node);
849 max_ip = sym->end;
850
851 if (dump_symtab)
852 dso__fprintf(kernel_dso, stderr);
853 925
854 return 0; 926 return 0;
855
856out_delete_dso:
857 dso__delete(kernel_dso);
858 kernel_dso = NULL;
859 return -1;
860} 927}
861 928
862/* 929static void event__process_sample(const event_t *self, int counter)
863 * Binary search in the histogram table and record the hit:
864 */
865static void record_ip(u64 ip, int counter)
866{ 930{
867 struct symbol *sym = dso__find_symbol(kernel_dso, ip); 931 u64 ip = self->ip.ip;
868 932 struct sym_entry *syme;
869 if (sym != NULL) { 933 struct addr_location al;
870 struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); 934 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
871 935
872 if (!syme->skip) { 936 switch (origin) {
873 syme->count[counter]++; 937 case PERF_RECORD_MISC_USER:
874 record_precise_ip(syme, counter, ip); 938 if (hide_user_symbols)
875 pthread_mutex_lock(&active_symbols_lock);
876 if (list_empty(&syme->node) || !syme->node.next)
877 __list_insert_active_sym(syme);
878 pthread_mutex_unlock(&active_symbols_lock);
879 return; 939 return;
880 } 940 break;
941 case PERF_RECORD_MISC_KERNEL:
942 if (hide_kernel_symbols)
943 return;
944 break;
945 default:
946 return;
881 } 947 }
882 948
883 samples--; 949 if (event__preprocess_sample(self, &al, symbol_filter) < 0 ||
950 al.sym == NULL)
951 return;
952
953 syme = symbol__priv(al.sym);
954 if (!syme->skip) {
955 syme->count[counter]++;
956 syme->origin = origin;
957 record_precise_ip(syme, counter, ip);
958 pthread_mutex_lock(&active_symbols_lock);
959 if (list_empty(&syme->node) || !syme->node.next)
960 __list_insert_active_sym(syme);
961 pthread_mutex_unlock(&active_symbols_lock);
962 if (origin == PERF_RECORD_MISC_USER)
963 ++userspace_samples;
964 ++samples;
965 }
884} 966}
885 967
886static void process_event(u64 ip, int counter, int user) 968static int event__process(event_t *event)
887{ 969{
888 samples++; 970 switch (event->header.type) {
889 971 case PERF_RECORD_COMM:
890 if (user) { 972 event__process_comm(event);
891 userspace_samples++; 973 break;
892 return; 974 case PERF_RECORD_MMAP:
975 event__process_mmap(event);
976 break;
977 default:
978 break;
893 } 979 }
894 980
895 record_ip(ip, counter); 981 return 0;
896} 982}
897 983
898struct mmap_data { 984struct mmap_data {
@@ -913,8 +999,6 @@ static unsigned int mmap_read_head(struct mmap_data *md)
913 return head; 999 return head;
914} 1000}
915 1001
916struct timeval last_read, this_read;
917
918static void mmap_read_counter(struct mmap_data *md) 1002static void mmap_read_counter(struct mmap_data *md)
919{ 1003{
920 unsigned int head = mmap_read_head(md); 1004 unsigned int head = mmap_read_head(md);
@@ -922,8 +1006,6 @@ static void mmap_read_counter(struct mmap_data *md)
922 unsigned char *data = md->base + page_size; 1006 unsigned char *data = md->base + page_size;
923 int diff; 1007 int diff;
924 1008
925 gettimeofday(&this_read, NULL);
926
927 /* 1009 /*
928 * If we're further behind than half the buffer, there's a chance 1010 * If we're further behind than half the buffer, there's a chance
929 * the writer will bite our tail and mess up the samples under us. 1011 * the writer will bite our tail and mess up the samples under us.
@@ -934,14 +1016,7 @@ static void mmap_read_counter(struct mmap_data *md)
934 */ 1016 */
935 diff = head - old; 1017 diff = head - old;
936 if (diff > md->mask / 2 || diff < 0) { 1018 if (diff > md->mask / 2 || diff < 0) {
937 struct timeval iv; 1019 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
938 unsigned long msecs;
939
940 timersub(&this_read, &last_read, &iv);
941 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
942
943 fprintf(stderr, "WARNING: failed to keep up with mmap data."
944 " Last read %lu msecs ago.\n", msecs);
945 1020
946 /* 1021 /*
947 * head points to a known good entry, start there. 1022 * head points to a known good entry, start there.
@@ -949,8 +1024,6 @@ static void mmap_read_counter(struct mmap_data *md)
949 old = head; 1024 old = head;
950 } 1025 }
951 1026
952 last_read = this_read;
953
954 for (; old != head;) { 1027 for (; old != head;) {
955 event_t *event = (event_t *)&data[old & md->mask]; 1028 event_t *event = (event_t *)&data[old & md->mask];
956 1029
@@ -978,13 +1051,11 @@ static void mmap_read_counter(struct mmap_data *md)
978 event = &event_copy; 1051 event = &event_copy;
979 } 1052 }
980 1053
1054 if (event->header.type == PERF_RECORD_SAMPLE)
1055 event__process_sample(event, md->counter);
1056 else
1057 event__process(event);
981 old += size; 1058 old += size;
982
983 if (event->header.type == PERF_RECORD_SAMPLE) {
984 int user =
985 (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER;
986 process_event(event->ip.ip, md->counter, user);
987 }
988 } 1059 }
989 1060
990 md->prev = old; 1061 md->prev = old;
@@ -1018,8 +1089,15 @@ static void start_counter(int i, int counter)
1018 attr = attrs + counter; 1089 attr = attrs + counter;
1019 1090
1020 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1091 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1021 attr->freq = freq; 1092
1093 if (freq) {
1094 attr->sample_type |= PERF_SAMPLE_PERIOD;
1095 attr->freq = 1;
1096 attr->sample_freq = freq;
1097 }
1098
1022 attr->inherit = (cpu < 0) && inherit; 1099 attr->inherit = (cpu < 0) && inherit;
1100 attr->mmap = 1;
1023 1101
1024try_again: 1102try_again:
1025 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); 1103 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
@@ -1078,6 +1156,11 @@ static int __cmd_top(void)
1078 int i, counter; 1156 int i, counter;
1079 int ret; 1157 int ret;
1080 1158
1159 if (target_pid != -1)
1160 event__synthesize_thread(target_pid, event__process);
1161 else
1162 event__synthesize_threads(event__process);
1163
1081 for (i = 0; i < nr_cpus; i++) { 1164 for (i = 0; i < nr_cpus; i++) {
1082 group_fd = -1; 1165 group_fd = -1;
1083 for (counter = 0; counter < nr_counters; counter++) 1166 for (counter = 0; counter < nr_counters; counter++)
@@ -1133,7 +1216,10 @@ static const struct option options[] = {
1133 "system-wide collection from all CPUs"), 1216 "system-wide collection from all CPUs"),
1134 OPT_INTEGER('C', "CPU", &profile_cpu, 1217 OPT_INTEGER('C', "CPU", &profile_cpu,
1135 "CPU to profile on"), 1218 "CPU to profile on"),
1136 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), 1219 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1220 "file", "vmlinux pathname"),
1221 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
1222 "hide kernel symbols"),
1137 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 1223 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
1138 "number of mmap data pages"), 1224 "number of mmap data pages"),
1139 OPT_INTEGER('r', "realtime", &realtime_prio, 1225 OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -1156,6 +1242,8 @@ static const struct option options[] = {
1156 "profile at this frequency"), 1242 "profile at this frequency"),
1157 OPT_INTEGER('E', "entries", &print_entries, 1243 OPT_INTEGER('E', "entries", &print_entries,
1158 "display this many functions"), 1244 "display this many functions"),
1245 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
1246 "hide user symbols"),
1159 OPT_BOOLEAN('v', "verbose", &verbose, 1247 OPT_BOOLEAN('v', "verbose", &verbose,
1160 "be more verbose (show counter open errors, etc)"), 1248 "be more verbose (show counter open errors, etc)"),
1161 OPT_END() 1249 OPT_END()
@@ -1165,19 +1253,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1165{ 1253{
1166 int counter; 1254 int counter;
1167 1255
1168 symbol__init();
1169
1170 page_size = sysconf(_SC_PAGE_SIZE); 1256 page_size = sysconf(_SC_PAGE_SIZE);
1171 1257
1172 argc = parse_options(argc, argv, options, top_usage, 0); 1258 argc = parse_options(argc, argv, options, top_usage, 0);
1173 if (argc) 1259 if (argc)
1174 usage_with_options(top_usage, options); 1260 usage_with_options(top_usage, options);
1175 1261
1176 if (freq) {
1177 default_interval = freq;
1178 freq = 1;
1179 }
1180
1181 /* CPU and PID are mutually exclusive */ 1262 /* CPU and PID are mutually exclusive */
1182 if (target_pid != -1 && profile_cpu != -1) { 1263 if (target_pid != -1 && profile_cpu != -1) {
1183 printf("WARNING: PID switch overriding CPU\n"); 1264 printf("WARNING: PID switch overriding CPU\n");
@@ -1188,13 +1269,31 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1188 if (!nr_counters) 1269 if (!nr_counters)
1189 nr_counters = 1; 1270 nr_counters = 1;
1190 1271
1272 symbol_conf.priv_size = (sizeof(struct sym_entry) +
1273 (nr_counters + 1) * sizeof(unsigned long));
1274 if (symbol_conf.vmlinux_name == NULL)
1275 symbol_conf.try_vmlinux_path = true;
1276 if (symbol__init(&symbol_conf) < 0)
1277 return -1;
1278
1191 if (delay_secs < 1) 1279 if (delay_secs < 1)
1192 delay_secs = 1; 1280 delay_secs = 1;
1193 1281
1194 parse_symbols();
1195 parse_source(sym_filter_entry); 1282 parse_source(sym_filter_entry);
1196 1283
1197 /* 1284 /*
1285 * User specified count overrides default frequency.
1286 */
1287 if (default_interval)
1288 freq = 0;
1289 else if (freq) {
1290 default_interval = freq;
1291 } else {
1292 fprintf(stderr, "frequency and count are zero, aborting\n");
1293 exit(EXIT_FAILURE);
1294 }
1295
1296 /*
1198 * Fill in the ones not specifically initialized via -c: 1297 * Fill in the ones not specifically initialized via -c:
1199 */ 1298 */
1200 for (counter = 0; counter < nr_counters; counter++) { 1299 for (counter = 0; counter < nr_counters; counter++) {
@@ -1211,5 +1310,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1211 if (target_pid != -1 || profile_cpu != -1) 1310 if (target_pid != -1 || profile_cpu != -1)
1212 nr_cpus = 1; 1311 nr_cpus = 1;
1213 1312
1313 get_term_dimensions(&winsize);
1314 if (print_entries == 0) {
1315 update_print_entries(&winsize);
1316 signal(SIGWINCH, sig_winch_handler);
1317 }
1318
1214 return __cmd_top(); 1319 return __cmd_top();
1215} 1320}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0c5e4f72f2ba..abb914aa7be6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5,66 +5,73 @@
5#include "util/symbol.h" 5#include "util/symbol.h"
6#include "util/thread.h" 6#include "util/thread.h"
7#include "util/header.h" 7#include "util/header.h"
8#include "util/exec_cmd.h"
9#include "util/trace-event.h"
8 10
9#include "util/parse-options.h" 11static char const *script_name;
12static char const *generate_script_lang;
10 13
11#include "perf.h" 14static int default_start_script(const char *script __attribute((unused)))
12#include "util/debug.h" 15{
16 return 0;
17}
13 18
14#include "util/trace-event.h" 19static int default_stop_script(void)
20{
21 return 0;
22}
15 23
16static char const *input_name = "perf.data"; 24static int default_generate_script(const char *outfile __attribute ((unused)))
17static int input; 25{
18static unsigned long page_size; 26 return 0;
19static unsigned long mmap_window = 32; 27}
20 28
21static unsigned long total = 0; 29static struct scripting_ops default_scripting_ops = {
22static unsigned long total_comm = 0; 30 .start_script = default_start_script,
31 .stop_script = default_stop_script,
32 .process_event = print_event,
33 .generate_script = default_generate_script,
34};
35
36static struct scripting_ops *scripting_ops;
23 37
24static struct rb_root threads; 38static void setup_scripting(void)
25static struct thread *last_match; 39{
40 /* make sure PERF_EXEC_PATH is set for scripts */
41 perf_set_argv_exec_path(perf_exec_path());
26 42
27static struct perf_header *header; 43 setup_perl_scripting();
28static u64 sample_type;
29 44
45 scripting_ops = &default_scripting_ops;
46}
30 47
31static int 48static int cleanup_scripting(void)
32process_comm_event(event_t *event, unsigned long offset, unsigned long head)
33{ 49{
34 struct thread *thread; 50 return scripting_ops->stop_script();
51}
35 52
36 thread = threads__findnew(event->comm.pid, &threads, &last_match); 53#include "util/parse-options.h"
37 54
38 dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", 55#include "perf.h"
39 (void *)(offset + head), 56#include "util/debug.h"
40 (void *)(long)(event->header.size),
41 event->comm.comm, event->comm.pid);
42 57
43 if (thread == NULL || 58#include "util/trace-event.h"
44 thread__set_comm(thread, event->comm.comm)) { 59#include "util/data_map.h"
45 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 60#include "util/exec_cmd.h"
46 return -1;
47 }
48 total_comm++;
49 61
50 return 0; 62static char const *input_name = "perf.data";
51}
52 63
53static int 64static struct perf_header *header;
54process_sample_event(event_t *event, unsigned long offset, unsigned long head) 65static u64 sample_type;
66
67static int process_sample_event(event_t *event)
55{ 68{
56 char level;
57 int show = 0;
58 struct dso *dso = NULL;
59 struct thread *thread;
60 u64 ip = event->ip.ip; 69 u64 ip = event->ip.ip;
61 u64 timestamp = -1; 70 u64 timestamp = -1;
62 u32 cpu = -1; 71 u32 cpu = -1;
63 u64 period = 1; 72 u64 period = 1;
64 void *more_data = event->ip.__more_data; 73 void *more_data = event->ip.__more_data;
65 int cpumode; 74 struct thread *thread = threads__findnew(event->ip.pid);
66
67 thread = threads__findnew(event->ip.pid, &threads, &last_match);
68 75
69 if (sample_type & PERF_SAMPLE_TIME) { 76 if (sample_type & PERF_SAMPLE_TIME) {
70 timestamp = *(u64 *)more_data; 77 timestamp = *(u64 *)more_data;
@@ -82,45 +89,19 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
82 more_data += sizeof(u64); 89 more_data += sizeof(u64);
83 } 90 }
84 91
85 dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", 92 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
86 (void *)(offset + head),
87 (void *)(long)(event->header.size),
88 event->header.misc, 93 event->header.misc,
89 event->ip.pid, event->ip.tid, 94 event->ip.pid, event->ip.tid,
90 (void *)(long)ip, 95 (void *)(long)ip,
91 (long long)period); 96 (long long)period);
92 97
93 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
94
95 if (thread == NULL) { 98 if (thread == NULL) {
96 eprintf("problem processing %d event, skipping it.\n", 99 pr_debug("problem processing %d event, skipping it.\n",
97 event->header.type); 100 event->header.type);
98 return -1; 101 return -1;
99 } 102 }
100 103
101 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 104 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
102
103 if (cpumode == PERF_RECORD_MISC_KERNEL) {
104 show = SHOW_KERNEL;
105 level = 'k';
106
107 dso = kernel_dso;
108
109 dump_printf(" ...... dso: %s\n", dso->name);
110
111 } else if (cpumode == PERF_RECORD_MISC_USER) {
112
113 show = SHOW_USER;
114 level = '.';
115
116 } else {
117 show = SHOW_HV;
118 level = 'H';
119
120 dso = hypervisor_dso;
121
122 dump_printf(" ...... dso: [hypervisor]\n");
123 }
124 105
125 if (sample_type & PERF_SAMPLE_RAW) { 106 if (sample_type & PERF_SAMPLE_RAW) {
126 struct { 107 struct {
@@ -133,128 +114,189 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
133 * field, although it should be the same than this perf 114 * field, although it should be the same than this perf
134 * event pid 115 * event pid
135 */ 116 */
136 print_event(cpu, raw->data, raw->size, timestamp, thread->comm); 117 scripting_ops->process_event(cpu, raw->data, raw->size,
118 timestamp, thread->comm);
137 } 119 }
138 total += period; 120 event__stats.total += period;
139 121
140 return 0; 122 return 0;
141} 123}
142 124
143static int 125static int sample_type_check(u64 type)
144process_event(event_t *event, unsigned long offset, unsigned long head)
145{ 126{
146 trace_event(event); 127 sample_type = type;
147
148 switch (event->header.type) {
149 case PERF_RECORD_MMAP ... PERF_RECORD_LOST:
150 return 0;
151
152 case PERF_RECORD_COMM:
153 return process_comm_event(event, offset, head);
154
155 case PERF_RECORD_EXIT ... PERF_RECORD_READ:
156 return 0;
157
158 case PERF_RECORD_SAMPLE:
159 return process_sample_event(event, offset, head);
160 128
161 case PERF_RECORD_MAX: 129 if (!(sample_type & PERF_SAMPLE_RAW)) {
162 default: 130 fprintf(stderr,
131 "No trace sample to read. Did you call perf record "
132 "without -R?");
163 return -1; 133 return -1;
164 } 134 }
165 135
166 return 0; 136 return 0;
167} 137}
168 138
139static struct perf_file_handler file_handler = {
140 .process_sample_event = process_sample_event,
141 .process_comm_event = event__process_comm,
142 .sample_type_check = sample_type_check,
143};
144
169static int __cmd_trace(void) 145static int __cmd_trace(void)
170{ 146{
171 int ret, rc = EXIT_FAILURE; 147 register_idle_thread();
172 unsigned long offset = 0; 148 register_perf_file_handler(&file_handler);
173 unsigned long head = 0;
174 struct stat perf_stat;
175 event_t *event;
176 uint32_t size;
177 char *buf;
178
179 trace_report();
180 register_idle_thread(&threads, &last_match);
181
182 input = open(input_name, O_RDONLY);
183 if (input < 0) {
184 perror("failed to open file");
185 exit(-1);
186 }
187 149
188 ret = fstat(input, &perf_stat); 150 return mmap_dispatch_perf_file(&header, input_name,
189 if (ret < 0) { 151 0, 0, &event__cwdlen, &event__cwd);
190 perror("failed to stat file"); 152}
191 exit(-1);
192 }
193 153
194 if (!perf_stat.st_size) { 154struct script_spec {
195 fprintf(stderr, "zero-sized file, nothing to do!\n"); 155 struct list_head node;
196 exit(0); 156 struct scripting_ops *ops;
197 } 157 char spec[0];
198 header = perf_header__read(input); 158};
199 head = header->data_offset;
200 sample_type = perf_header__sample_type(header);
201 159
202 if (!(sample_type & PERF_SAMPLE_RAW)) 160LIST_HEAD(script_specs);
203 die("No trace sample to read. Did you call perf record "
204 "without -R?");
205 161
206 if (load_kernel() < 0) { 162static struct script_spec *script_spec__new(const char *spec,
207 perror("failed to load kernel symbols"); 163 struct scripting_ops *ops)
208 return EXIT_FAILURE; 164{
209 } 165 struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
210 166
211remap: 167 if (s != NULL) {
212 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, 168 strcpy(s->spec, spec);
213 MAP_SHARED, input, offset); 169 s->ops = ops;
214 if (buf == MAP_FAILED) {
215 perror("failed to mmap file");
216 exit(-1);
217 } 170 }
218 171
219more: 172 return s;
220 event = (event_t *)(buf + head); 173}
221 174
222 if (head + event->header.size >= page_size * mmap_window) { 175static void script_spec__delete(struct script_spec *s)
223 unsigned long shift = page_size * (head / page_size); 176{
224 int res; 177 free(s->spec);
178 free(s);
179}
225 180
226 res = munmap(buf, page_size * mmap_window); 181static void script_spec__add(struct script_spec *s)
227 assert(res == 0); 182{
183 list_add_tail(&s->node, &script_specs);
184}
228 185
229 offset += shift; 186static struct script_spec *script_spec__find(const char *spec)
230 head -= shift; 187{
231 goto remap; 188 struct script_spec *s;
232 }
233 189
234 size = event->header.size; 190 list_for_each_entry(s, &script_specs, node)
191 if (strcasecmp(s->spec, spec) == 0)
192 return s;
193 return NULL;
194}
235 195
236 if (!size || process_event(event, offset, head) < 0) { 196static struct script_spec *script_spec__findnew(const char *spec,
197 struct scripting_ops *ops)
198{
199 struct script_spec *s = script_spec__find(spec);
237 200
238 /* 201 if (s)
239 * assume we lost track of the stream, check alignment, and 202 return s;
240 * increment a single u64 in the hope to catch on again 'soon'.
241 */
242 203
243 if (unlikely(head & 7)) 204 s = script_spec__new(spec, ops);
244 head &= ~7ULL; 205 if (!s)
206 goto out_delete_spec;
245 207
246 size = 8; 208 script_spec__add(s);
247 } 209
210 return s;
248 211
249 head += size; 212out_delete_spec:
213 script_spec__delete(s);
214
215 return NULL;
216}
250 217
251 if (offset + head < (unsigned long)perf_stat.st_size) 218int script_spec_register(const char *spec, struct scripting_ops *ops)
252 goto more; 219{
220 struct script_spec *s;
221
222 s = script_spec__find(spec);
223 if (s)
224 return -1;
253 225
254 rc = EXIT_SUCCESS; 226 s = script_spec__findnew(spec, ops);
255 close(input); 227 if (!s)
228 return -1;
229
230 return 0;
231}
232
233static struct scripting_ops *script_spec__lookup(const char *spec)
234{
235 struct script_spec *s = script_spec__find(spec);
236 if (!s)
237 return NULL;
256 238
257 return rc; 239 return s->ops;
240}
241
242static void list_available_languages(void)
243{
244 struct script_spec *s;
245
246 fprintf(stderr, "\n");
247 fprintf(stderr, "Scripting language extensions (used in "
248 "perf trace -s [spec:]script.[spec]):\n\n");
249
250 list_for_each_entry(s, &script_specs, node)
251 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
252
253 fprintf(stderr, "\n");
254}
255
256static int parse_scriptname(const struct option *opt __used,
257 const char *str, int unset __used)
258{
259 char spec[PATH_MAX];
260 const char *script, *ext;
261 int len;
262
263 if (strcmp(str, "list") == 0) {
264 list_available_languages();
265 return 0;
266 }
267
268 script = strchr(str, ':');
269 if (script) {
270 len = script - str;
271 if (len >= PATH_MAX) {
272 fprintf(stderr, "invalid language specifier");
273 return -1;
274 }
275 strncpy(spec, str, len);
276 spec[len] = '\0';
277 scripting_ops = script_spec__lookup(spec);
278 if (!scripting_ops) {
279 fprintf(stderr, "invalid language specifier");
280 return -1;
281 }
282 script++;
283 } else {
284 script = str;
285 ext = strchr(script, '.');
286 if (!ext) {
287 fprintf(stderr, "invalid script extension");
288 return -1;
289 }
290 scripting_ops = script_spec__lookup(++ext);
291 if (!scripting_ops) {
292 fprintf(stderr, "invalid script extension");
293 return -1;
294 }
295 }
296
297 script_name = strdup(script);
298
299 return 0;
258} 300}
259 301
260static const char * const annotate_usage[] = { 302static const char * const annotate_usage[] = {
@@ -267,13 +309,24 @@ static const struct option options[] = {
267 "dump raw trace in ASCII"), 309 "dump raw trace in ASCII"),
268 OPT_BOOLEAN('v', "verbose", &verbose, 310 OPT_BOOLEAN('v', "verbose", &verbose,
269 "be more verbose (show symbol address, etc)"), 311 "be more verbose (show symbol address, etc)"),
312 OPT_BOOLEAN('l', "latency", &latency_format,
313 "show latency attributes (irqs/preemption disabled, etc)"),
314 OPT_CALLBACK('s', "script", NULL, "name",
315 "script file name (lang:script name, script name, or *)",
316 parse_scriptname),
317 OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
318 "generate perf-trace.xx script in specified language"),
319
270 OPT_END() 320 OPT_END()
271}; 321};
272 322
273int cmd_trace(int argc, const char **argv, const char *prefix __used) 323int cmd_trace(int argc, const char **argv, const char *prefix __used)
274{ 324{
275 symbol__init(); 325 int err;
276 page_size = getpagesize(); 326
327 symbol__init(0);
328
329 setup_scripting();
277 330
278 argc = parse_options(argc, argv, options, annotate_usage, 0); 331 argc = parse_options(argc, argv, options, annotate_usage, 0);
279 if (argc) { 332 if (argc) {
@@ -287,5 +340,50 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
287 340
288 setup_pager(); 341 setup_pager();
289 342
290 return __cmd_trace(); 343 if (generate_script_lang) {
344 struct stat perf_stat;
345
346 int input = open(input_name, O_RDONLY);
347 if (input < 0) {
348 perror("failed to open file");
349 exit(-1);
350 }
351
352 err = fstat(input, &perf_stat);
353 if (err < 0) {
354 perror("failed to stat file");
355 exit(-1);
356 }
357
358 if (!perf_stat.st_size) {
359 fprintf(stderr, "zero-sized file, nothing to do!\n");
360 exit(0);
361 }
362
363 scripting_ops = script_spec__lookup(generate_script_lang);
364 if (!scripting_ops) {
365 fprintf(stderr, "invalid language specifier");
366 return -1;
367 }
368
369 header = perf_header__new();
370 if (header == NULL)
371 return -1;
372
373 perf_header__read(header, input);
374 err = scripting_ops->generate_script("perf-trace");
375 goto out;
376 }
377
378 if (script_name) {
379 err = scripting_ops->start_script(script_name);
380 if (err)
381 goto out;
382 }
383
384 err = __cmd_trace();
385
386 cleanup_scripting();
387out:
388 return err;
291} 389}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index e11d8d231c3b..a3d8bf65f26c 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -15,6 +15,8 @@ extern int read_line_with_nul(char *buf, int size, FILE *file);
15extern int check_pager_config(const char *cmd); 15extern int check_pager_config(const char *cmd);
16 16
17extern int cmd_annotate(int argc, const char **argv, const char *prefix); 17extern int cmd_annotate(int argc, const char **argv, const char *prefix);
18extern int cmd_bench(int argc, const char **argv, const char *prefix);
19extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
18extern int cmd_help(int argc, const char **argv, const char *prefix); 20extern int cmd_help(int argc, const char **argv, const char *prefix);
19extern int cmd_sched(int argc, const char **argv, const char *prefix); 21extern int cmd_sched(int argc, const char **argv, const char *prefix);
20extern int cmd_list(int argc, const char **argv, const char *prefix); 22extern int cmd_list(int argc, const char **argv, const char *prefix);
@@ -25,5 +27,7 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix);
25extern int cmd_top(int argc, const char **argv, const char *prefix); 27extern int cmd_top(int argc, const char **argv, const char *prefix);
26extern int cmd_trace(int argc, const char **argv, const char *prefix); 28extern int cmd_trace(int argc, const char **argv, const char *prefix);
27extern int cmd_version(int argc, const char **argv, const char *prefix); 29extern int cmd_version(int argc, const char **argv, const char *prefix);
30extern int cmd_probe(int argc, const char **argv, const char *prefix);
31extern int cmd_kmem(int argc, const char **argv, const char *prefix);
28 32
29#endif 33#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 00326e230d87..02b09ea17a3e 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,6 +3,8 @@
3# command name category [deprecated] [common] 3# command name category [deprecated] [common]
4# 4#
5perf-annotate mainporcelain common 5perf-annotate mainporcelain common
6perf-bench mainporcelain common
7perf-buildid-list mainporcelain common
6perf-list mainporcelain common 8perf-list mainporcelain common
7perf-sched mainporcelain common 9perf-sched mainporcelain common
8perf-record mainporcelain common 10perf-record mainporcelain common
@@ -11,3 +13,5 @@ perf-stat mainporcelain common
11perf-timechart mainporcelain common 13perf-timechart mainporcelain common
12perf-top mainporcelain common 14perf-top mainporcelain common
13perf-trace mainporcelain common 15perf-trace mainporcelain common
16perf-probe mainporcelain common
17perf-kmem mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index fdd42a824c98..f000c30877ac 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -137,6 +137,8 @@ enum sw_event_ids {
137 PERF_COUNT_SW_CPU_MIGRATIONS = 4, 137 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
138 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, 138 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
139 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, 139 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
140 PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
141 PERF_COUNT_SW_EMULATION_FAULTS = 8,
140}; 142};
141 143
142Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event 144Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 19fc7feb9d59..cf64049bc9bd 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -14,6 +14,7 @@
14#include "util/run-command.h" 14#include "util/run-command.h"
15#include "util/parse-events.h" 15#include "util/parse-events.h"
16#include "util/string.h" 16#include "util/string.h"
17#include "util/debugfs.h"
17 18
18const char perf_usage_string[] = 19const char perf_usage_string[] =
19 "perf [--version] [--help] COMMAND [ARGS]"; 20 "perf [--version] [--help] COMMAND [ARGS]";
@@ -89,8 +90,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
89 /* 90 /*
90 * Check remaining flags. 91 * Check remaining flags.
91 */ 92 */
92 if (!prefixcmp(cmd, "--exec-path")) { 93 if (!prefixcmp(cmd, CMD_EXEC_PATH)) {
93 cmd += 11; 94 cmd += strlen(CMD_EXEC_PATH);
94 if (*cmd == '=') 95 if (*cmd == '=')
95 perf_set_argv_exec_path(cmd + 1); 96 perf_set_argv_exec_path(cmd + 1);
96 else { 97 else {
@@ -117,8 +118,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
117 (*argv)++; 118 (*argv)++;
118 (*argc)--; 119 (*argc)--;
119 handled++; 120 handled++;
120 } else if (!prefixcmp(cmd, "--perf-dir=")) { 121 } else if (!prefixcmp(cmd, CMD_PERF_DIR)) {
121 setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); 122 setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1);
122 if (envchanged) 123 if (envchanged)
123 *envchanged = 1; 124 *envchanged = 1;
124 } else if (!strcmp(cmd, "--work-tree")) { 125 } else if (!strcmp(cmd, "--work-tree")) {
@@ -131,8 +132,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
131 *envchanged = 1; 132 *envchanged = 1;
132 (*argv)++; 133 (*argv)++;
133 (*argc)--; 134 (*argc)--;
134 } else if (!prefixcmp(cmd, "--work-tree=")) { 135 } else if (!prefixcmp(cmd, CMD_WORK_TREE)) {
135 setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); 136 setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1);
136 if (envchanged) 137 if (envchanged)
137 *envchanged = 1; 138 *envchanged = 1;
138 } else if (!strcmp(cmd, "--debugfs-dir")) { 139 } else if (!strcmp(cmd, "--debugfs-dir")) {
@@ -146,8 +147,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged)
146 *envchanged = 1; 147 *envchanged = 1;
147 (*argv)++; 148 (*argv)++;
148 (*argc)--; 149 (*argc)--;
149 } else if (!prefixcmp(cmd, "--debugfs-dir=")) { 150 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
150 strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN); 151 strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN);
151 debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 152 debugfs_mntpt[MAXPATHLEN - 1] = '\0';
152 if (envchanged) 153 if (envchanged)
153 *envchanged = 1; 154 *envchanged = 1;
@@ -284,17 +285,21 @@ static void handle_internal_command(int argc, const char **argv)
284{ 285{
285 const char *cmd = argv[0]; 286 const char *cmd = argv[0];
286 static struct cmd_struct commands[] = { 287 static struct cmd_struct commands[] = {
287 { "help", cmd_help, 0 }, 288 { "buildid-list", cmd_buildid_list, 0 },
288 { "list", cmd_list, 0 }, 289 { "help", cmd_help, 0 },
289 { "record", cmd_record, 0 }, 290 { "list", cmd_list, 0 },
290 { "report", cmd_report, 0 }, 291 { "record", cmd_record, 0 },
291 { "stat", cmd_stat, 0 }, 292 { "report", cmd_report, 0 },
292 { "timechart", cmd_timechart, 0 }, 293 { "bench", cmd_bench, 0 },
293 { "top", cmd_top, 0 }, 294 { "stat", cmd_stat, 0 },
294 { "annotate", cmd_annotate, 0 }, 295 { "timechart", cmd_timechart, 0 },
295 { "version", cmd_version, 0 }, 296 { "top", cmd_top, 0 },
296 { "trace", cmd_trace, 0 }, 297 { "annotate", cmd_annotate, 0 },
297 { "sched", cmd_sched, 0 }, 298 { "version", cmd_version, 0 },
299 { "trace", cmd_trace, 0 },
300 { "sched", cmd_sched, 0 },
301 { "probe", cmd_probe, 0 },
302 { "kmem", cmd_kmem, 0 },
298 }; 303 };
299 unsigned int i; 304 unsigned int i;
300 static const char ext[] = STRIP_EXTENSION; 305 static const char ext[] = STRIP_EXTENSION;
@@ -382,45 +387,12 @@ static int run_argv(int *argcp, const char ***argv)
382/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ 387/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
383static void get_debugfs_mntpt(void) 388static void get_debugfs_mntpt(void)
384{ 389{
385 FILE *file; 390 const char *path = debugfs_find_mountpoint();
386 char fs_type[100];
387 char debugfs[MAXPATHLEN];
388 391
389 /* 392 if (path)
390 * try the standard location 393 strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
391 */ 394 else
392 if (valid_debugfs_mount("/sys/kernel/debug/") == 0) { 395 debugfs_mntpt[0] = '\0';
393 strcpy(debugfs_mntpt, "/sys/kernel/debug/");
394 return;
395 }
396
397 /*
398 * try the sane location
399 */
400 if (valid_debugfs_mount("/debug/") == 0) {
401 strcpy(debugfs_mntpt, "/debug/");
402 return;
403 }
404
405 /*
406 * give up and parse /proc/mounts
407 */
408 file = fopen("/proc/mounts", "r");
409 if (file == NULL)
410 return;
411
412 while (fscanf(file, "%*s %"
413 STR(MAXPATHLEN)
414 "s %99s %*s %*d %*d\n",
415 debugfs, fs_type) == 2) {
416 if (strcmp(fs_type, "debugfs") == 0)
417 break;
418 }
419 fclose(file);
420 if (strcmp(fs_type, "debugfs") == 0) {
421 strncpy(debugfs_mntpt, debugfs, MAXPATHLEN);
422 debugfs_mntpt[MAXPATHLEN - 1] = '\0';
423 }
424} 396}
425 397
426int main(int argc, const char **argv) 398int main(int argc, const char **argv)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 8cc4623afd6f..454d5d55f32d 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -47,6 +47,18 @@
47#define cpu_relax() asm volatile("":::"memory") 47#define cpu_relax() asm volatile("":::"memory")
48#endif 48#endif
49 49
50#ifdef __alpha__
51#include "../../arch/alpha/include/asm/unistd.h"
52#define rmb() asm volatile("mb" ::: "memory")
53#define cpu_relax() asm volatile("" ::: "memory")
54#endif
55
56#ifdef __ia64__
57#include "../../arch/ia64/include/asm/unistd.h"
58#define rmb() asm volatile ("mf" ::: "memory")
59#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
60#endif
61
50#include <time.h> 62#include <time.h>
51#include <unistd.h> 63#include <unistd.h>
52#include <sys/types.h> 64#include <sys/types.h>
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
new file mode 100644
index 000000000000..af78d9a52a7d
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -0,0 +1,134 @@
1/*
2 * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the
3 * contents of Context.xs. Do not edit this file, edit Context.xs instead.
4 *
5 * ANY CHANGES MADE HERE WILL BE LOST!
6 *
7 */
8
9#line 1 "Context.xs"
10/*
11 * Context.xs. XS interfaces for perf trace.
12 *
13 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include "EXTERN.h"
32#include "perl.h"
33#include "XSUB.h"
34#include "../../../util/trace-event-perl.h"
35
36#ifndef PERL_UNUSED_VAR
37# define PERL_UNUSED_VAR(var) if (0) var = var
38#endif
39
40#line 41 "Context.c"
41
42XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
43XS(XS_Perf__Trace__Context_common_pc)
44{
45#ifdef dVAR
46 dVAR; dXSARGS;
47#else
48 dXSARGS;
49#endif
50 if (items != 1)
51 Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context");
52 PERL_UNUSED_VAR(cv); /* -W */
53 {
54 struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
55 int RETVAL;
56 dXSTARG;
57
58 RETVAL = common_pc(context);
59 XSprePUSH; PUSHi((IV)RETVAL);
60 }
61 XSRETURN(1);
62}
63
64
65XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */
66XS(XS_Perf__Trace__Context_common_flags)
67{
68#ifdef dVAR
69 dVAR; dXSARGS;
70#else
71 dXSARGS;
72#endif
73 if (items != 1)
74 Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context");
75 PERL_UNUSED_VAR(cv); /* -W */
76 {
77 struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
78 int RETVAL;
79 dXSTARG;
80
81 RETVAL = common_flags(context);
82 XSprePUSH; PUSHi((IV)RETVAL);
83 }
84 XSRETURN(1);
85}
86
87
88XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */
89XS(XS_Perf__Trace__Context_common_lock_depth)
90{
91#ifdef dVAR
92 dVAR; dXSARGS;
93#else
94 dXSARGS;
95#endif
96 if (items != 1)
97 Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context");
98 PERL_UNUSED_VAR(cv); /* -W */
99 {
100 struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
101 int RETVAL;
102 dXSTARG;
103
104 RETVAL = common_lock_depth(context);
105 XSprePUSH; PUSHi((IV)RETVAL);
106 }
107 XSRETURN(1);
108}
109
110#ifdef __cplusplus
111extern "C"
112#endif
113XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */
114XS(boot_Perf__Trace__Context)
115{
116#ifdef dVAR
117 dVAR; dXSARGS;
118#else
119 dXSARGS;
120#endif
121 const char* file = __FILE__;
122
123 PERL_UNUSED_VAR(cv); /* -W */
124 PERL_UNUSED_VAR(items); /* -W */
125 XS_VERSION_BOOTCHECK ;
126
127 newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$");
128 newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$");
129 newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$");
130 if (PL_unitcheckav)
131 call_list(PL_scopestack_ix, PL_unitcheckav);
132 XSRETURN_YES;
133}
134
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
new file mode 100644
index 000000000000..fb78006c165e
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -0,0 +1,41 @@
1/*
2 * Context.xs. XS interfaces for perf trace.
3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include "EXTERN.h"
23#include "perl.h"
24#include "XSUB.h"
25#include "../../../util/trace-event-perl.h"
26
27MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context
28PROTOTYPES: ENABLE
29
30int
31common_pc(context)
32 struct scripting_context * context
33
34int
35common_flags(context)
36 struct scripting_context * context
37
38int
39common_lock_depth(context)
40 struct scripting_context * context
41
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
new file mode 100644
index 000000000000..decdeb0f6789
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
@@ -0,0 +1,17 @@
1use 5.010000;
2use ExtUtils::MakeMaker;
3# See lib/ExtUtils/MakeMaker.pm for details of how to influence
4# the contents of the Makefile that is written.
5WriteMakefile(
6 NAME => 'Perf::Trace::Context',
7 VERSION_FROM => 'lib/Perf/Trace/Context.pm', # finds $VERSION
8 PREREQ_PM => {}, # e.g., Module::Name => 1.1
9 ($] >= 5.005 ? ## Add these new keywords supported since 5.005
10 (ABSTRACT_FROM => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module
11 AUTHOR => 'Tom Zanussi <tzanussi@gmail.com>') : ()),
12 LIBS => [''], # e.g., '-lm'
13 DEFINE => '-I ../..', # e.g., '-DHAVE_SOMETHING'
14 INC => '-I.', # e.g., '-I. -I/usr/include/other'
15 # Un-comment this if you add C files to link with later:
16 OBJECT => 'Context.o', # link all the C files too
17);
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
new file mode 100644
index 000000000000..9a9707630791
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -0,0 +1,59 @@
1Perf-Trace-Util version 0.01
2============================
3
4This module contains utility functions for use with perf trace.
5
6Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
7that the core perf support for Perl calls on and should always be
8'used', while Util.pm contains useful but optional utility functions
9that scripts may want to use. Context.pm contains the Perl->C
10interface that allows scripts to access data in the embedding perf
11executable; scripts wishing to do that should 'use Context.pm'.
12
13The Perl->C perf interface is completely driven by Context.xs. If you
14want to add new Perl functions that end up accessing C data in the
15perf executable, you add desciptions of the new functions here.
16scripting_context is a pointer to the perf data in the perf executable
17that you want to access - it's passed as the second parameter,
18$context, to all handler functions.
19
20After you do that:
21
22 perl Makefile.PL # to create a Makefile for the next step
23 make # to create Context.c
24
25 edit Context.c to add const to the char* file = __FILE__ line in
26 XS(boot_Perf__Trace__Context) to silence a warning/error.
27
28 You can delete the Makefile, object files and anything else that was
29 generated e.g. blib and shared library, etc, except for of course
30 Context.c
31
32 You should then be able to run the normal perf make as usual.
33
34INSTALLATION
35
36Building perf with perf trace Perl scripting should install this
37module in the right place.
38
39You should make sure libperl and ExtUtils/Embed.pm are installed first
40e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed.
41
42DEPENDENCIES
43
44This module requires these other modules and libraries:
45
46 None
47
48COPYRIGHT AND LICENCE
49
50Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com>
51
52This library is free software; you can redistribute it and/or modify
53it under the same terms as Perl itself, either Perl version 5.10.0 or,
54at your option, any later version of Perl 5 you may have available.
55
56Alternatively, this software may be distributed under the terms of the
57GNU General Public License ("GPL") version 2 as published by the Free
58Software Foundation.
59
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
new file mode 100644
index 000000000000..6c7f3659cb17
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -0,0 +1,55 @@
1package Perf::Trace::Context;
2
3use 5.010000;
4use strict;
5use warnings;
6
7require Exporter;
8
9our @ISA = qw(Exporter);
10
11our %EXPORT_TAGS = ( 'all' => [ qw(
12) ] );
13
14our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
15
16our @EXPORT = qw(
17 common_pc common_flags common_lock_depth
18);
19
20our $VERSION = '0.01';
21
22require XSLoader;
23XSLoader::load('Perf::Trace::Context', $VERSION);
24
251;
26__END__
27=head1 NAME
28
29Perf::Trace::Context - Perl extension for accessing functions in perf.
30
31=head1 SYNOPSIS
32
33 use Perf::Trace::Context;
34
35=head1 SEE ALSO
36
37Perf (trace) documentation
38
39=head1 AUTHOR
40
41Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
42
43=head1 COPYRIGHT AND LICENSE
44
45Copyright (C) 2009 by Tom Zanussi
46
47This library is free software; you can redistribute it and/or modify
48it under the same terms as Perl itself, either Perl version 5.10.0 or,
49at your option, any later version of Perl 5 you may have available.
50
51Alternatively, this software may be distributed under the terms of the
52GNU General Public License ("GPL") version 2 as published by the Free
53Software Foundation.
54
55=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
new file mode 100644
index 000000000000..9df376a9f629
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -0,0 +1,192 @@
1package Perf::Trace::Core;
2
3use 5.010000;
4use strict;
5use warnings;
6
7require Exporter;
8
9our @ISA = qw(Exporter);
10
11our %EXPORT_TAGS = ( 'all' => [ qw(
12) ] );
13
14our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
15
16our @EXPORT = qw(
17define_flag_field define_flag_value flag_str dump_flag_fields
18define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields
19trace_flag_str
20);
21
22our $VERSION = '0.01';
23
24my %trace_flags = (0x00 => "NONE",
25 0x01 => "IRQS_OFF",
26 0x02 => "IRQS_NOSUPPORT",
27 0x04 => "NEED_RESCHED",
28 0x08 => "HARDIRQ",
29 0x10 => "SOFTIRQ");
30
31sub trace_flag_str
32{
33 my ($value) = @_;
34
35 my $string;
36
37 my $print_delim = 0;
38
39 foreach my $idx (sort {$a <=> $b} keys %trace_flags) {
40 if (!$value && !$idx) {
41 $string .= "NONE";
42 last;
43 }
44
45 if ($idx && ($value & $idx) == $idx) {
46 if ($print_delim) {
47 $string .= " | ";
48 }
49 $string .= "$trace_flags{$idx}";
50 $print_delim = 1;
51 $value &= ~$idx;
52 }
53 }
54
55 return $string;
56}
57
58my %flag_fields;
59my %symbolic_fields;
60
61sub flag_str
62{
63 my ($event_name, $field_name, $value) = @_;
64
65 my $string;
66
67 if ($flag_fields{$event_name}{$field_name}) {
68 my $print_delim = 0;
69 foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) {
70 if (!$value && !$idx) {
71 $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
72 last;
73 }
74 if ($idx && ($value & $idx) == $idx) {
75 if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) {
76 $string .= " $flag_fields{$event_name}{$field_name}{'delim'} ";
77 }
78 $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
79 $print_delim = 1;
80 $value &= ~$idx;
81 }
82 }
83 }
84
85 return $string;
86}
87
88sub define_flag_field
89{
90 my ($event_name, $field_name, $delim) = @_;
91
92 $flag_fields{$event_name}{$field_name}{"delim"} = $delim;
93}
94
95sub define_flag_value
96{
97 my ($event_name, $field_name, $value, $field_str) = @_;
98
99 $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
100}
101
102sub dump_flag_fields
103{
104 for my $event (keys %flag_fields) {
105 print "event $event:\n";
106 for my $field (keys %{$flag_fields{$event}}) {
107 print " field: $field:\n";
108 print " delim: $flag_fields{$event}{$field}{'delim'}\n";
109 foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) {
110 print " value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n";
111 }
112 }
113 }
114}
115
116sub symbol_str
117{
118 my ($event_name, $field_name, $value) = @_;
119
120 if ($symbolic_fields{$event_name}{$field_name}) {
121 foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) {
122 if (!$value && !$idx) {
123 return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
124 last;
125 }
126 if ($value == $idx) {
127 return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
128 }
129 }
130 }
131
132 return undef;
133}
134
135sub define_symbolic_field
136{
137 my ($event_name, $field_name) = @_;
138
139 # nothing to do, really
140}
141
142sub define_symbolic_value
143{
144 my ($event_name, $field_name, $value, $field_str) = @_;
145
146 $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
147}
148
149sub dump_symbolic_fields
150{
151 for my $event (keys %symbolic_fields) {
152 print "event $event:\n";
153 for my $field (keys %{$symbolic_fields{$event}}) {
154 print " field: $field:\n";
155 foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) {
156 print " value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n";
157 }
158 }
159 }
160}
161
1621;
163__END__
164=head1 NAME
165
166Perf::Trace::Core - Perl extension for perf trace
167
168=head1 SYNOPSIS
169
170 use Perf::Trace::Core
171
172=head1 SEE ALSO
173
174Perf (trace) documentation
175
176=head1 AUTHOR
177
178Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
179
180=head1 COPYRIGHT AND LICENSE
181
182Copyright (C) 2009 by Tom Zanussi
183
184This library is free software; you can redistribute it and/or modify
185it under the same terms as Perl itself, either Perl version 5.10.0 or,
186at your option, any later version of Perl 5 you may have available.
187
188Alternatively, this software may be distributed under the terms of the
189GNU General Public License ("GPL") version 2 as published by the Free
190Software Foundation.
191
192=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
new file mode 100644
index 000000000000..052f132ced24
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -0,0 +1,88 @@
1package Perf::Trace::Util;
2
3use 5.010000;
4use strict;
5use warnings;
6
7require Exporter;
8
9our @ISA = qw(Exporter);
10
11our %EXPORT_TAGS = ( 'all' => [ qw(
12) ] );
13
14our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
15
16our @EXPORT = qw(
17avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
18);
19
20our $VERSION = '0.01';
21
22sub avg
23{
24 my ($total, $n) = @_;
25
26 return $total / $n;
27}
28
29my $NSECS_PER_SEC = 1000000000;
30
31sub nsecs
32{
33 my ($secs, $nsecs) = @_;
34
35 return $secs * $NSECS_PER_SEC + $nsecs;
36}
37
38sub nsecs_secs {
39 my ($nsecs) = @_;
40
41 return $nsecs / $NSECS_PER_SEC;
42}
43
44sub nsecs_nsecs {
45 my ($nsecs) = @_;
46
47 return $nsecs - nsecs_secs($nsecs);
48}
49
50sub nsecs_str {
51 my ($nsecs) = @_;
52
53 my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs));
54
55 return $str;
56}
57
581;
59__END__
60=head1 NAME
61
62Perf::Trace::Util - Perl extension for perf trace
63
64=head1 SYNOPSIS
65
66 use Perf::Trace::Util;
67
68=head1 SEE ALSO
69
70Perf (trace) documentation
71
72=head1 AUTHOR
73
74Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
75
76=head1 COPYRIGHT AND LICENSE
77
78Copyright (C) 2009 by Tom Zanussi
79
80This library is free software; you can redistribute it and/or modify
81it under the same terms as Perl itself, either Perl version 5.10.0 or,
82at your option, any later version of Perl 5 you may have available.
83
84Alternatively, this software may be distributed under the terms of the
85GNU General Public License ("GPL") version 2 as published by the Free
86Software Foundation.
87
88=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
new file mode 100644
index 000000000000..840836804aa7
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
@@ -0,0 +1 @@
struct scripting_context * T_PTR
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
new file mode 100644
index 000000000000..c7ec5de2f535
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -0,0 +1,7 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry
3
4
5
6
7
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report
new file mode 100644
index 000000000000..89948b015020
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl
3
4
5
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record
new file mode 100644
index 000000000000..b25056ebf963
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
new file mode 100644
index 000000000000..f5dcf9cb5bd2
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl
3
4
5
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record
new file mode 100644
index 000000000000..8903979c5b6c
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
new file mode 100644
index 000000000000..cea16f78a3a2
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
3
4
5
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record
new file mode 100644
index 000000000000..6abedda911a4
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-record
@@ -0,0 +1,6 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup
3
4
5
6
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
new file mode 100644
index 000000000000..85769dc456eb
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
3
4
5
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
new file mode 100644
index 000000000000..fce6637b19ba
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
new file mode 100644
index 000000000000..aa68435be926
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -0,0 +1,6 @@
1#!/bin/bash
2perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
3
4
5
6
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
new file mode 100644
index 000000000000..4e7dc0a407a5
--- /dev/null
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -0,0 +1,106 @@
1# perf trace event handlers, generated by perf trace -g perl
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# This script tests basic functionality such as flag and symbol
6# strings, common_xxx() calls back into perf, begin, end, unhandled
7# events, etc. Basically, if this script runs successfully and
8# displays expected results, perl scripting support should be ok.
9
10use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
11use lib "./Perf-Trace-Util/lib";
12use Perf::Trace::Core;
13use Perf::Trace::Context;
14use Perf::Trace::Util;
15
16sub trace_begin
17{
18 print "trace_begin\n";
19}
20
21sub trace_end
22{
23 print "trace_end\n";
24
25 print_unhandled();
26}
27
28sub irq::softirq_entry
29{
30 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
31 $common_pid, $common_comm,
32 $vec) = @_;
33
34 print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
35 $common_pid, $common_comm);
36
37 print_uncommon($context);
38
39 printf("vec=%s\n",
40 symbol_str("irq::softirq_entry", "vec", $vec));
41}
42
43sub kmem::kmalloc
44{
45 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
46 $common_pid, $common_comm,
47 $call_site, $ptr, $bytes_req, $bytes_alloc,
48 $gfp_flags) = @_;
49
50 print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
51 $common_pid, $common_comm);
52
53 print_uncommon($context);
54
55 printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ".
56 "gfp_flags=%s\n",
57 $call_site, $ptr, $bytes_req, $bytes_alloc,
58
59 flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags));
60}
61
62# print trace fields not included in handler args
63sub print_uncommon
64{
65 my ($context) = @_;
66
67 printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ",
68 common_pc($context), trace_flag_str(common_flags($context)),
69 common_lock_depth($context));
70
71}
72
73my %unhandled;
74
75sub print_unhandled
76{
77 if ((scalar keys %unhandled) == 0) {
78 return;
79 }
80
81 print "\nunhandled events:\n\n";
82
83 printf("%-40s %10s\n", "event", "count");
84 printf("%-40s %10s\n", "----------------------------------------",
85 "-----------");
86
87 foreach my $event_name (keys %unhandled) {
88 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
89 }
90}
91
92sub trace_unhandled
93{
94 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
95 $common_pid, $common_comm) = @_;
96
97 $unhandled{$event_name}++;
98}
99
100sub print_header
101{
102 my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;
103
104 printf("%-20s %5u %05u.%09u %8u %-20s ",
105 $event_name, $cpu, $secs, $nsecs, $pid, $comm);
106}
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
new file mode 100644
index 000000000000..61f91561d848
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -0,0 +1,105 @@
1#!/usr/bin/perl -w
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# Display r/w activity for files read/written to for a given program
6
7# The common_* event handler fields are the most useful fields common to
8# all events. They don't necessarily correspond to the 'common_*' fields
9# in the status files. Those fields not available as handler params can
10# be retrieved via script functions of the form get_common_*().
11
12use 5.010000;
13use strict;
14use warnings;
15
16use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
17use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core;
19use Perf::Trace::Util;
20
21# change this to the comm of the program you're interested in
22my $for_comm = "perf";
23
24my %reads;
25my %writes;
26
27sub syscalls::sys_enter_read
28{
29 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
30 $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
31
32 if ($common_comm eq $for_comm) {
33 $reads{$fd}{bytes_requested} += $count;
34 $reads{$fd}{total_reads}++;
35 }
36}
37
38sub syscalls::sys_enter_write
39{
40 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
41 $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
42
43 if ($common_comm eq $for_comm) {
44 $writes{$fd}{bytes_written} += $count;
45 $writes{$fd}{total_writes}++;
46 }
47}
48
49sub trace_end
50{
51 printf("file read counts for $for_comm:\n\n");
52
53 printf("%6s %10s %10s\n", "fd", "# reads", "bytes_requested");
54 printf("%6s %10s %10s\n", "------", "----------", "-----------");
55
56 foreach my $fd (sort {$reads{$b}{bytes_requested} <=>
57 $reads{$a}{bytes_requested}} keys %reads) {
58 my $total_reads = $reads{$fd}{total_reads};
59 my $bytes_requested = $reads{$fd}{bytes_requested};
60 printf("%6u %10u %10u\n", $fd, $total_reads, $bytes_requested);
61 }
62
63 printf("\nfile write counts for $for_comm:\n\n");
64
65 printf("%6s %10s %10s\n", "fd", "# writes", "bytes_written");
66 printf("%6s %10s %10s\n", "------", "----------", "-----------");
67
68 foreach my $fd (sort {$writes{$b}{bytes_written} <=>
69 $writes{$a}{bytes_written}} keys %writes) {
70 my $total_writes = $writes{$fd}{total_writes};
71 my $bytes_written = $writes{$fd}{bytes_written};
72 printf("%6u %10u %10u\n", $fd, $total_writes, $bytes_written);
73 }
74
75 print_unhandled();
76}
77
78my %unhandled;
79
80sub print_unhandled
81{
82 if ((scalar keys %unhandled) == 0) {
83 return;
84 }
85
86 print "\nunhandled events:\n\n";
87
88 printf("%-40s %10s\n", "event", "count");
89 printf("%-40s %10s\n", "----------------------------------------",
90 "-----------");
91
92 foreach my $event_name (keys %unhandled) {
93 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
94 }
95}
96
97sub trace_unhandled
98{
99 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
100 $common_pid, $common_comm) = @_;
101
102 $unhandled{$event_name}++;
103}
104
105
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
new file mode 100644
index 000000000000..da601fae1a00
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -0,0 +1,170 @@
1#!/usr/bin/perl -w
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# Display r/w activity for all processes
6
7# The common_* event handler fields are the most useful fields common to
8# all events. They don't necessarily correspond to the 'common_*' fields
9# in the status files. Those fields not available as handler params can
10# be retrieved via script functions of the form get_common_*().
11
12use 5.010000;
13use strict;
14use warnings;
15
16use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
17use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core;
19use Perf::Trace::Util;
20
21my %reads;
22my %writes;
23
24sub syscalls::sys_exit_read
25{
26 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
27 $common_pid, $common_comm,
28 $nr, $ret) = @_;
29
30 if ($ret > 0) {
31 $reads{$common_pid}{bytes_read} += $ret;
32 } else {
33 if (!defined ($reads{$common_pid}{bytes_read})) {
34 $reads{$common_pid}{bytes_read} = 0;
35 }
36 $reads{$common_pid}{errors}{$ret}++;
37 }
38}
39
40sub syscalls::sys_enter_read
41{
42 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
43 $common_pid, $common_comm,
44 $nr, $fd, $buf, $count) = @_;
45
46 $reads{$common_pid}{bytes_requested} += $count;
47 $reads{$common_pid}{total_reads}++;
48 $reads{$common_pid}{comm} = $common_comm;
49}
50
51sub syscalls::sys_exit_write
52{
53 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
54 $common_pid, $common_comm,
55 $nr, $ret) = @_;
56
57 if ($ret <= 0) {
58 $writes{$common_pid}{errors}{$ret}++;
59 }
60}
61
62sub syscalls::sys_enter_write
63{
64 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
65 $common_pid, $common_comm,
66 $nr, $fd, $buf, $count) = @_;
67
68 $writes{$common_pid}{bytes_written} += $count;
69 $writes{$common_pid}{total_writes}++;
70 $writes{$common_pid}{comm} = $common_comm;
71}
72
73sub trace_end
74{
75 printf("read counts by pid:\n\n");
76
77 printf("%6s %20s %10s %10s %10s\n", "pid", "comm",
78 "# reads", "bytes_requested", "bytes_read");
79 printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------",
80 "-----------", "----------", "----------");
81
82 foreach my $pid (sort {$reads{$b}{bytes_read} <=>
83 $reads{$a}{bytes_read}} keys %reads) {
84 my $comm = $reads{$pid}{comm};
85 my $total_reads = $reads{$pid}{total_reads};
86 my $bytes_requested = $reads{$pid}{bytes_requested};
87 my $bytes_read = $reads{$pid}{bytes_read};
88
89 printf("%6s %-20s %10s %10s %10s\n", $pid, $comm,
90 $total_reads, $bytes_requested, $bytes_read);
91 }
92
93 printf("\nfailed reads by pid:\n\n");
94
95 printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors");
96 printf("%6s %20s %6s %10s\n", "------", "--------------------",
97 "------", "----------");
98
99 foreach my $pid (keys %reads) {
100 my $comm = $reads{$pid}{comm};
101 foreach my $err (sort {$reads{$b}{comm} cmp $reads{$a}{comm}}
102 keys %{$reads{$pid}{errors}}) {
103 my $errors = $reads{$pid}{errors}{$err};
104
105 printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors);
106 }
107 }
108
109 printf("\nwrite counts by pid:\n\n");
110
111 printf("%6s %20s %10s %10s\n", "pid", "comm",
112 "# writes", "bytes_written");
113 printf("%6s %-20s %10s %10s\n", "------", "--------------------",
114 "-----------", "----------");
115
116 foreach my $pid (sort {$writes{$b}{bytes_written} <=>
117 $writes{$a}{bytes_written}} keys %writes) {
118 my $comm = $writes{$pid}{comm};
119 my $total_writes = $writes{$pid}{total_writes};
120 my $bytes_written = $writes{$pid}{bytes_written};
121
122 printf("%6s %-20s %10s %10s\n", $pid, $comm,
123 $total_writes, $bytes_written);
124 }
125
126 printf("\nfailed writes by pid:\n\n");
127
128 printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors");
129 printf("%6s %20s %6s %10s\n", "------", "--------------------",
130 "------", "----------");
131
132 foreach my $pid (keys %writes) {
133 my $comm = $writes{$pid}{comm};
134 foreach my $err (sort {$writes{$b}{comm} cmp $writes{$a}{comm}}
135 keys %{$writes{$pid}{errors}}) {
136 my $errors = $writes{$pid}{errors}{$err};
137
138 printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors);
139 }
140 }
141
142 print_unhandled();
143}
144
145my %unhandled;
146
147sub print_unhandled
148{
149 if ((scalar keys %unhandled) == 0) {
150 return;
151 }
152
153 print "\nunhandled events:\n\n";
154
155 printf("%-40s %10s\n", "event", "count");
156 printf("%-40s %10s\n", "----------------------------------------",
157 "-----------");
158
159 foreach my $event_name (keys %unhandled) {
160 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
161 }
162}
163
164sub trace_unhandled
165{
166 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
167 $common_pid, $common_comm) = @_;
168
169 $unhandled{$event_name}++;
170}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
new file mode 100644
index 000000000000..ed58ef284e23
--- /dev/null
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -0,0 +1,103 @@
1#!/usr/bin/perl -w
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# Display avg/min/max wakeup latency
6
7# The common_* event handler fields are the most useful fields common to
8# all events. They don't necessarily correspond to the 'common_*' fields
9# in the status files. Those fields not available as handler params can
10# be retrieved via script functions of the form get_common_*().
11
12use 5.010000;
13use strict;
14use warnings;
15
16use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
17use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core;
19use Perf::Trace::Util;
20
21my %last_wakeup;
22
23my $max_wakeup_latency;
24my $min_wakeup_latency;
25my $total_wakeup_latency;
26my $total_wakeups;
27
28sub sched::sched_switch
29{
30 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
31 $common_pid, $common_comm,
32 $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
33 $next_prio) = @_;
34
35 my $wakeup_ts = $last_wakeup{$common_cpu}{ts};
36 if ($wakeup_ts) {
37 my $switch_ts = nsecs($common_secs, $common_nsecs);
38 my $wakeup_latency = $switch_ts - $wakeup_ts;
39 if ($wakeup_latency > $max_wakeup_latency) {
40 $max_wakeup_latency = $wakeup_latency;
41 }
42 if ($wakeup_latency < $min_wakeup_latency) {
43 $min_wakeup_latency = $wakeup_latency;
44 }
45 $total_wakeup_latency += $wakeup_latency;
46 $total_wakeups++;
47 }
48 $last_wakeup{$common_cpu}{ts} = 0;
49}
50
51sub sched::sched_wakeup
52{
53 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
54 $common_pid, $common_comm,
55 $comm, $pid, $prio, $success, $target_cpu) = @_;
56
57 $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
58}
59
60sub trace_begin
61{
62 $min_wakeup_latency = 1000000000;
63 $max_wakeup_latency = 0;
64}
65
66sub trace_end
67{
68 printf("wakeup_latency stats:\n\n");
69 print "total_wakeups: $total_wakeups\n";
70 printf("avg_wakeup_latency (ns): %u\n",
71 avg($total_wakeup_latency, $total_wakeups));
72 printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency);
73 printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency);
74
75 print_unhandled();
76}
77
78my %unhandled;
79
80sub print_unhandled
81{
82 if ((scalar keys %unhandled) == 0) {
83 return;
84 }
85
86 print "\nunhandled events:\n\n";
87
88 printf("%-40s %10s\n", "event", "count");
89 printf("%-40s %10s\n", "----------------------------------------",
90 "-----------");
91
92 foreach my $event_name (keys %unhandled) {
93 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
94 }
95}
96
97sub trace_unhandled
98{
99 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
100 $common_pid, $common_comm) = @_;
101
102 $unhandled{$event_name}++;
103}
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
new file mode 100644
index 000000000000..511302c8a494
--- /dev/null
+++ b/tools/perf/scripts/perl/workqueue-stats.pl
@@ -0,0 +1,129 @@
1#!/usr/bin/perl -w
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# Displays workqueue stats
6#
7# Usage:
8#
9# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
10# workqueue:workqueue_destruction -e workqueue:workqueue_execution
11# -e workqueue:workqueue_insertion
12#
13# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
14
15use 5.010000;
16use strict;
17use warnings;
18
19use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
20use lib "./Perf-Trace-Util/lib";
21use Perf::Trace::Core;
22use Perf::Trace::Util;
23
24my @cpus;
25
26sub workqueue::workqueue_destruction
27{
28 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
29 $common_pid, $common_comm,
30 $thread_comm, $thread_pid) = @_;
31
32 $cpus[$common_cpu]{$thread_pid}{destroyed}++;
33 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
34}
35
36sub workqueue::workqueue_creation
37{
38 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
39 $common_pid, $common_comm,
40 $thread_comm, $thread_pid, $cpu) = @_;
41
42 $cpus[$common_cpu]{$thread_pid}{created}++;
43 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
44}
45
46sub workqueue::workqueue_execution
47{
48 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
49 $common_pid, $common_comm,
50 $thread_comm, $thread_pid, $func) = @_;
51
52 $cpus[$common_cpu]{$thread_pid}{executed}++;
53 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
54}
55
56sub workqueue::workqueue_insertion
57{
58 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
59 $common_pid, $common_comm,
60 $thread_comm, $thread_pid, $func) = @_;
61
62 $cpus[$common_cpu]{$thread_pid}{inserted}++;
63 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
64}
65
66sub trace_end
67{
68 print "workqueue work stats:\n\n";
69 my $cpu = 0;
70 printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
71 printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
72 foreach my $pidhash (@cpus) {
73 while ((my $pid, my $wqhash) = each %$pidhash) {
74 my $ins = $$wqhash{'inserted'};
75 my $exe = $$wqhash{'executed'};
76 my $comm = $$wqhash{'comm'};
77 if ($ins || $exe) {
78 printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
79 }
80 }
81 $cpu++;
82 }
83
84 $cpu = 0;
85 print "\nworkqueue lifecycle stats:\n\n";
86 printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
87 printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
88 foreach my $pidhash (@cpus) {
89 while ((my $pid, my $wqhash) = each %$pidhash) {
90 my $created = $$wqhash{'created'};
91 my $destroyed = $$wqhash{'destroyed'};
92 my $comm = $$wqhash{'comm'};
93 if ($created || $destroyed) {
94 printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
95 $comm);
96 }
97 }
98 $cpu++;
99 }
100
101 print_unhandled();
102}
103
104my %unhandled;
105
106sub print_unhandled
107{
108 if ((scalar keys %unhandled) == 0) {
109 return;
110 }
111
112 print "\nunhandled events:\n\n";
113
114 printf("%-40s %10s\n", "event", "count");
115 printf("%-40s %10s\n", "----------------------------------------",
116 "-----------");
117
118 foreach my $event_name (keys %unhandled) {
119 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
120 }
121}
122
123sub trace_unhandled
124{
125 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
126 $common_pid, $common_comm) = @_;
127
128 $unhandled{$event_name}++;
129}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 6f8ea9d210b6..918eb376abe3 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -1,10 +1,15 @@
1#ifndef CACHE_H 1#ifndef __PERF_CACHE_H
2#define CACHE_H 2#define __PERF_CACHE_H
3 3
4#include "util.h" 4#include "util.h"
5#include "strbuf.h" 5#include "strbuf.h"
6#include "../perf.h" 6#include "../perf.h"
7 7
8#define CMD_EXEC_PATH "--exec-path"
9#define CMD_PERF_DIR "--perf-dir="
10#define CMD_WORK_TREE "--work-tree="
11#define CMD_DEBUGFS_DIR "--debugfs-dir="
12
8#define PERF_DIR_ENVIRONMENT "PERF_DIR" 13#define PERF_DIR_ENVIRONMENT "PERF_DIR"
9#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" 14#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
10#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" 15#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
@@ -117,4 +122,4 @@ extern char *perf_pathdup(const char *fmt, ...)
117 122
118extern size_t strlcpy(char *dest, const char *src, size_t size); 123extern size_t strlcpy(char *dest, const char *src, size_t size);
119 124
120#endif /* CACHE_H */ 125#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3b8380f1b478..b3b71258272a 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -206,7 +206,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain,
206 } 206 }
207 node->val_nr = chain->nr - start; 207 node->val_nr = chain->nr - start;
208 if (!node->val_nr) 208 if (!node->val_nr)
209 printf("Warning: empty node in callchain tree\n"); 209 pr_warning("Warning: empty node in callchain tree\n");
210} 210}
211 211
212static void 212static void
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 43cf3ea9e088..ad4626de4c2b 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -58,4 +58,4 @@ static inline u64 cumul_hits(struct callchain_node *node)
58int register_callchain_param(struct callchain_param *param); 58int register_callchain_param(struct callchain_param *param);
59void append_chain(struct callchain_node *root, struct ip_callchain *chain, 59void append_chain(struct callchain_node *root, struct ip_callchain *chain,
60 struct symbol **syms); 60 struct symbol **syms);
61#endif 61#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 58d597564b99..24e8809210bb 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -1,5 +1,5 @@
1#ifndef COLOR_H 1#ifndef __PERF_COLOR_H
2#define COLOR_H 2#define __PERF_COLOR_H
3 3
4/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ 4/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
5#define COLOR_MAXLEN 24 5#define COLOR_MAXLEN 24
@@ -39,4 +39,4 @@ int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *bu
39int percent_color_fprintf(FILE *fp, const char *fmt, double percent); 39int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
40const char *get_percent_color(double percent); 40const char *get_percent_color(double percent);
41 41
42#endif /* COLOR_H */ 42#endif /* __PERF_COLOR_H */
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 0b791bd346bc..35073621e5de 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = {
29 A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ 29 A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
30 /* Nothing in the 128.. range */ 30 /* Nothing in the 128.. range */
31}; 31};
32
33const char *graph_line =
34 "_____________________________________________________________________"
35 "_____________________________________________________________________";
36const char *graph_dotted_line =
37 "---------------------------------------------------------------------"
38 "---------------------------------------------------------------------"
39 "---------------------------------------------------------------------";
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
new file mode 100644
index 000000000000..ca0bedf637c2
--- /dev/null
+++ b/tools/perf/util/data_map.c
@@ -0,0 +1,291 @@
1#include "data_map.h"
2#include "symbol.h"
3#include "util.h"
4#include "debug.h"
5
6
7static struct perf_file_handler *curr_handler;
8static unsigned long mmap_window = 32;
9static char __cwd[PATH_MAX];
10
11static int process_event_stub(event_t *event __used)
12{
13 dump_printf(": unhandled!\n");
14 return 0;
15}
16
17void register_perf_file_handler(struct perf_file_handler *handler)
18{
19 if (!handler->process_sample_event)
20 handler->process_sample_event = process_event_stub;
21 if (!handler->process_mmap_event)
22 handler->process_mmap_event = process_event_stub;
23 if (!handler->process_comm_event)
24 handler->process_comm_event = process_event_stub;
25 if (!handler->process_fork_event)
26 handler->process_fork_event = process_event_stub;
27 if (!handler->process_exit_event)
28 handler->process_exit_event = process_event_stub;
29 if (!handler->process_lost_event)
30 handler->process_lost_event = process_event_stub;
31 if (!handler->process_read_event)
32 handler->process_read_event = process_event_stub;
33 if (!handler->process_throttle_event)
34 handler->process_throttle_event = process_event_stub;
35 if (!handler->process_unthrottle_event)
36 handler->process_unthrottle_event = process_event_stub;
37
38 curr_handler = handler;
39}
40
41static const char *event__name[] = {
42 [0] = "TOTAL",
43 [PERF_RECORD_MMAP] = "MMAP",
44 [PERF_RECORD_LOST] = "LOST",
45 [PERF_RECORD_COMM] = "COMM",
46 [PERF_RECORD_EXIT] = "EXIT",
47 [PERF_RECORD_THROTTLE] = "THROTTLE",
48 [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
49 [PERF_RECORD_FORK] = "FORK",
50 [PERF_RECORD_READ] = "READ",
51 [PERF_RECORD_SAMPLE] = "SAMPLE",
52};
53
54unsigned long event__total[PERF_RECORD_MAX];
55
56void event__print_totals(void)
57{
58 int i;
59 for (i = 0; i < PERF_RECORD_MAX; ++i)
60 pr_info("%10s events: %10ld\n",
61 event__name[i], event__total[i]);
62}
63
64static int
65process_event(event_t *event, unsigned long offset, unsigned long head)
66{
67 trace_event(event);
68
69 if (event->header.type < PERF_RECORD_MAX) {
70 dump_printf("%p [%p]: PERF_RECORD_%s",
71 (void *)(offset + head),
72 (void *)(long)(event->header.size),
73 event__name[event->header.type]);
74 ++event__total[0];
75 ++event__total[event->header.type];
76 }
77
78 switch (event->header.type) {
79 case PERF_RECORD_SAMPLE:
80 return curr_handler->process_sample_event(event);
81 case PERF_RECORD_MMAP:
82 return curr_handler->process_mmap_event(event);
83 case PERF_RECORD_COMM:
84 return curr_handler->process_comm_event(event);
85 case PERF_RECORD_FORK:
86 return curr_handler->process_fork_event(event);
87 case PERF_RECORD_EXIT:
88 return curr_handler->process_exit_event(event);
89 case PERF_RECORD_LOST:
90 return curr_handler->process_lost_event(event);
91 case PERF_RECORD_READ:
92 return curr_handler->process_read_event(event);
93 case PERF_RECORD_THROTTLE:
94 return curr_handler->process_throttle_event(event);
95 case PERF_RECORD_UNTHROTTLE:
96 return curr_handler->process_unthrottle_event(event);
97 default:
98 curr_handler->total_unknown++;
99 return -1;
100 }
101}
102
103int perf_header__read_build_ids(int input, off_t offset, off_t size)
104{
105 struct build_id_event bev;
106 char filename[PATH_MAX];
107 off_t limit = offset + size;
108 int err = -1;
109
110 while (offset < limit) {
111 struct dso *dso;
112 ssize_t len;
113
114 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
115 goto out;
116
117 len = bev.header.size - sizeof(bev);
118 if (read(input, filename, len) != len)
119 goto out;
120
121 dso = dsos__findnew(filename);
122 if (dso != NULL)
123 dso__set_build_id(dso, &bev.build_id);
124
125 offset += bev.header.size;
126 }
127 err = 0;
128out:
129 return err;
130}
131
132int mmap_dispatch_perf_file(struct perf_header **pheader,
133 const char *input_name,
134 int force,
135 int full_paths,
136 int *cwdlen,
137 char **cwd)
138{
139 int err;
140 struct perf_header *header;
141 unsigned long head, shift;
142 unsigned long offset = 0;
143 struct stat input_stat;
144 size_t page_size;
145 u64 sample_type;
146 event_t *event;
147 uint32_t size;
148 int input;
149 char *buf;
150
151 if (curr_handler == NULL) {
152 pr_debug("Forgot to register perf file handler\n");
153 return -EINVAL;
154 }
155
156 page_size = getpagesize();
157
158 input = open(input_name, O_RDONLY);
159 if (input < 0) {
160 pr_err("Failed to open file: %s", input_name);
161 if (!strcmp(input_name, "perf.data"))
162 pr_err(" (try 'perf record' first)");
163 pr_err("\n");
164 return -errno;
165 }
166
167 if (fstat(input, &input_stat) < 0) {
168 pr_err("failed to stat file");
169 err = -errno;
170 goto out_close;
171 }
172
173 err = -EACCES;
174 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
175 pr_err("file: %s not owned by current user or root\n",
176 input_name);
177 goto out_close;
178 }
179
180 if (input_stat.st_size == 0) {
181 pr_info("zero-sized file, nothing to do!\n");
182 goto done;
183 }
184
185 err = -ENOMEM;
186 header = perf_header__new();
187 if (header == NULL)
188 goto out_close;
189
190 err = perf_header__read(header, input);
191 if (err < 0)
192 goto out_delete;
193 *pheader = header;
194 head = header->data_offset;
195
196 sample_type = perf_header__sample_type(header);
197
198 err = -EINVAL;
199 if (curr_handler->sample_type_check &&
200 curr_handler->sample_type_check(sample_type) < 0)
201 goto out_delete;
202
203 if (!full_paths) {
204 if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
205 pr_err("failed to get the current directory\n");
206 err = -errno;
207 goto out_delete;
208 }
209 *cwd = __cwd;
210 *cwdlen = strlen(*cwd);
211 } else {
212 *cwd = NULL;
213 *cwdlen = 0;
214 }
215
216 shift = page_size * (head / page_size);
217 offset += shift;
218 head -= shift;
219
220remap:
221 buf = mmap(NULL, page_size * mmap_window, PROT_READ,
222 MAP_SHARED, input, offset);
223 if (buf == MAP_FAILED) {
224 pr_err("failed to mmap file\n");
225 err = -errno;
226 goto out_delete;
227 }
228
229more:
230 event = (event_t *)(buf + head);
231
232 size = event->header.size;
233 if (!size)
234 size = 8;
235
236 if (head + event->header.size >= page_size * mmap_window) {
237 int munmap_ret;
238
239 shift = page_size * (head / page_size);
240
241 munmap_ret = munmap(buf, page_size * mmap_window);
242 assert(munmap_ret == 0);
243
244 offset += shift;
245 head -= shift;
246 goto remap;
247 }
248
249 size = event->header.size;
250
251 dump_printf("\n%p [%p]: event: %d\n",
252 (void *)(offset + head),
253 (void *)(long)event->header.size,
254 event->header.type);
255
256 if (!size || process_event(event, offset, head) < 0) {
257
258 dump_printf("%p [%p]: skipping unknown header type: %d\n",
259 (void *)(offset + head),
260 (void *)(long)(event->header.size),
261 event->header.type);
262
263 /*
264 * assume we lost track of the stream, check alignment, and
265 * increment a single u64 in the hope to catch on again 'soon'.
266 */
267
268 if (unlikely(head & 7))
269 head &= ~7ULL;
270
271 size = 8;
272 }
273
274 head += size;
275
276 if (offset + head >= header->data_offset + header->data_size)
277 goto done;
278
279 if (offset + head < (unsigned long)input_stat.st_size)
280 goto more;
281
282done:
283 err = 0;
284out_close:
285 close(input);
286
287 return err;
288out_delete:
289 perf_header__delete(header);
290 goto out_close;
291}
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h
new file mode 100644
index 000000000000..3180ff7e3633
--- /dev/null
+++ b/tools/perf/util/data_map.h
@@ -0,0 +1,32 @@
1#ifndef __PERF_DATAMAP_H
2#define __PERF_DATAMAP_H
3
4#include "event.h"
5#include "header.h"
6
7typedef int (*event_type_handler_t)(event_t *);
8
9struct perf_file_handler {
10 event_type_handler_t process_sample_event;
11 event_type_handler_t process_mmap_event;
12 event_type_handler_t process_comm_event;
13 event_type_handler_t process_fork_event;
14 event_type_handler_t process_exit_event;
15 event_type_handler_t process_lost_event;
16 event_type_handler_t process_read_event;
17 event_type_handler_t process_throttle_event;
18 event_type_handler_t process_unthrottle_event;
19 int (*sample_type_check)(u64 sample_type);
20 unsigned long total_unknown;
21};
22
23void register_perf_file_handler(struct perf_file_handler *handler);
24int mmap_dispatch_perf_file(struct perf_header **pheader,
25 const char *input_name,
26 int force,
27 int full_paths,
28 int *cwdlen,
29 char **cwd);
30int perf_header__read_build_ids(int input, off_t offset, off_t file_size);
31
32#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index e8ca98fe0bd4..28d520d5a1fb 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -13,12 +13,12 @@
13int verbose = 0; 13int verbose = 0;
14int dump_trace = 0; 14int dump_trace = 0;
15 15
16int eprintf(const char *fmt, ...) 16int eprintf(int level, const char *fmt, ...)
17{ 17{
18 va_list args; 18 va_list args;
19 int ret = 0; 19 int ret = 0;
20 20
21 if (verbose) { 21 if (verbose >= level) {
22 va_start(args, fmt); 22 va_start(args, fmt);
23 ret = vfprintf(stderr, fmt, args); 23 ret = vfprintf(stderr, fmt, args);
24 va_end(args); 24 va_end(args);
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 437eea58ce40..c6c24c522dea 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -1,8 +1,15 @@
1/* For debugging general purposes */ 1/* For debugging general purposes */
2#ifndef __PERF_DEBUG_H
3#define __PERF_DEBUG_H
4
5#include "event.h"
2 6
3extern int verbose; 7extern int verbose;
4extern int dump_trace; 8extern int dump_trace;
5 9
6int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 10int eprintf(int level,
11 const char *fmt, ...) __attribute__((format(printf, 2, 3)));
7int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 12int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
8void trace_event(event_t *event); 13void trace_event(event_t *event);
14
15#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
new file mode 100644
index 000000000000..06b73ee02c49
--- /dev/null
+++ b/tools/perf/util/debugfs.c
@@ -0,0 +1,241 @@
1#include "util.h"
2#include "debugfs.h"
3#include "cache.h"
4
5static int debugfs_premounted;
6static char debugfs_mountpoint[MAX_PATH+1];
7
8static const char *debugfs_known_mountpoints[] = {
9 "/sys/kernel/debug/",
10 "/debug/",
11 0,
12};
13
14/* use this to force a umount */
15void debugfs_force_cleanup(void)
16{
17 debugfs_find_mountpoint();
18 debugfs_premounted = 0;
19 debugfs_umount();
20}
21
22/* construct a full path to a debugfs element */
23int debugfs_make_path(const char *element, char *buffer, int size)
24{
25 int len;
26
27 if (strlen(debugfs_mountpoint) == 0) {
28 buffer[0] = '\0';
29 return -1;
30 }
31
32 len = strlen(debugfs_mountpoint) + strlen(element) + 1;
33 if (len >= size)
34 return len+1;
35
36 snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
37 return 0;
38}
39
40static int debugfs_found;
41
42/* find the path to the mounted debugfs */
43const char *debugfs_find_mountpoint(void)
44{
45 const char **ptr;
46 char type[100];
47 FILE *fp;
48
49 if (debugfs_found)
50 return (const char *) debugfs_mountpoint;
51
52 ptr = debugfs_known_mountpoints;
53 while (*ptr) {
54 if (debugfs_valid_mountpoint(*ptr) == 0) {
55 debugfs_found = 1;
56 strcpy(debugfs_mountpoint, *ptr);
57 return debugfs_mountpoint;
58 }
59 ptr++;
60 }
61
62 /* give up and parse /proc/mounts */
63 fp = fopen("/proc/mounts", "r");
64 if (fp == NULL)
65 die("Can't open /proc/mounts for read");
66
67 while (fscanf(fp, "%*s %"
68 STR(MAX_PATH)
69 "s %99s %*s %*d %*d\n",
70 debugfs_mountpoint, type) == 2) {
71 if (strcmp(type, "debugfs") == 0)
72 break;
73 }
74 fclose(fp);
75
76 if (strcmp(type, "debugfs") != 0)
77 return NULL;
78
79 debugfs_found = 1;
80
81 return debugfs_mountpoint;
82}
83
84/* verify that a mountpoint is actually a debugfs instance */
85
86int debugfs_valid_mountpoint(const char *debugfs)
87{
88 struct statfs st_fs;
89
90 if (statfs(debugfs, &st_fs) < 0)
91 return -ENOENT;
92 else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
93 return -ENOENT;
94
95 return 0;
96}
97
98
99int debugfs_valid_entry(const char *path)
100{
101 struct stat st;
102
103 if (stat(path, &st))
104 return -errno;
105
106 return 0;
107}
108
109/* mount the debugfs somewhere */
110
111int debugfs_mount(const char *mountpoint)
112{
113 char mountcmd[128];
114
115 /* see if it's already mounted */
116 if (debugfs_find_mountpoint()) {
117 debugfs_premounted = 1;
118 return 0;
119 }
120
121 /* if not mounted and no argument */
122 if (mountpoint == NULL) {
123 /* see if environment variable set */
124 mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT);
125 /* if no environment variable, use default */
126 if (mountpoint == NULL)
127 mountpoint = "/sys/kernel/debug";
128 }
129
130 /* save the mountpoint */
131 strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
132
133 /* mount it */
134 snprintf(mountcmd, sizeof(mountcmd),
135 "/bin/mount -t debugfs debugfs %s", mountpoint);
136 return system(mountcmd);
137}
138
139/* umount the debugfs */
140
141int debugfs_umount(void)
142{
143 char umountcmd[128];
144 int ret;
145
146 /* if it was already mounted, leave it */
147 if (debugfs_premounted)
148 return 0;
149
150 /* make sure it's a valid mount point */
151 ret = debugfs_valid_mountpoint(debugfs_mountpoint);
152 if (ret)
153 return ret;
154
155 snprintf(umountcmd, sizeof(umountcmd),
156 "/bin/umount %s", debugfs_mountpoint);
157 return system(umountcmd);
158}
159
160int debugfs_write(const char *entry, const char *value)
161{
162 char path[MAX_PATH+1];
163 int ret, count;
164 int fd;
165
166 /* construct the path */
167 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
168
169 /* verify that it exists */
170 ret = debugfs_valid_entry(path);
171 if (ret)
172 return ret;
173
174 /* get how many chars we're going to write */
175 count = strlen(value);
176
177 /* open the debugfs entry */
178 fd = open(path, O_RDWR);
179 if (fd < 0)
180 return -errno;
181
182 while (count > 0) {
183 /* write it */
184 ret = write(fd, value, count);
185 if (ret <= 0) {
186 if (ret == EAGAIN)
187 continue;
188 close(fd);
189 return -errno;
190 }
191 count -= ret;
192 }
193
194 /* close it */
195 close(fd);
196
197 /* return success */
198 return 0;
199}
200
201/*
202 * read a debugfs entry
203 * returns the number of chars read or a negative errno
204 */
205int debugfs_read(const char *entry, char *buffer, size_t size)
206{
207 char path[MAX_PATH+1];
208 int ret;
209 int fd;
210
211 /* construct the path */
212 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
213
214 /* verify that it exists */
215 ret = debugfs_valid_entry(path);
216 if (ret)
217 return ret;
218
219 /* open the debugfs entry */
220 fd = open(path, O_RDONLY);
221 if (fd < 0)
222 return -errno;
223
224 do {
225 /* read it */
226 ret = read(fd, buffer, size);
227 if (ret == 0) {
228 close(fd);
229 return EOF;
230 }
231 } while (ret < 0 && errno == EAGAIN);
232
233 /* close it */
234 close(fd);
235
236 /* make *sure* there's a null character at the end */
237 buffer[ret] = '\0';
238
239 /* return the number of chars read */
240 return ret;
241}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
new file mode 100644
index 000000000000..3cd14f9ae784
--- /dev/null
+++ b/tools/perf/util/debugfs.h
@@ -0,0 +1,25 @@
1#ifndef __DEBUGFS_H__
2#define __DEBUGFS_H__
3
4#include <sys/mount.h>
5
6#ifndef MAX_PATH
7# define MAX_PATH 256
8#endif
9
10#ifndef STR
11# define _STR(x) #x
12# define STR(x) _STR(x)
13#endif
14
15extern const char *debugfs_find_mountpoint(void);
16extern int debugfs_valid_mountpoint(const char *debugfs);
17extern int debugfs_valid_entry(const char *path);
18extern int debugfs_mount(const char *mountpoint);
19extern int debugfs_umount(void);
20extern int debugfs_write(const char *entry, const char *value);
21extern int debugfs_read(const char *entry, char *buffer, size_t size);
22extern void debugfs_force_cleanup(void);
23extern int debugfs_make_path(const char *element, char *buffer, int size);
24
25#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
new file mode 100644
index 000000000000..414b89d1bde9
--- /dev/null
+++ b/tools/perf/util/event.c
@@ -0,0 +1,312 @@
1#include <linux/types.h>
2#include "event.h"
3#include "debug.h"
4#include "string.h"
5#include "thread.h"
6
7static pid_t event__synthesize_comm(pid_t pid, int full,
8 int (*process)(event_t *event))
9{
10 event_t ev;
11 char filename[PATH_MAX];
12 char bf[BUFSIZ];
13 FILE *fp;
14 size_t size = 0;
15 DIR *tasks;
16 struct dirent dirent, *next;
17 pid_t tgid = 0;
18
19 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
20
21 fp = fopen(filename, "r");
22 if (fp == NULL) {
23out_race:
24 /*
25 * We raced with a task exiting - just return:
26 */
27 pr_debug("couldn't open %s\n", filename);
28 return 0;
29 }
30
31 memset(&ev.comm, 0, sizeof(ev.comm));
32 while (!ev.comm.comm[0] || !ev.comm.pid) {
33 if (fgets(bf, sizeof(bf), fp) == NULL)
34 goto out_failure;
35
36 if (memcmp(bf, "Name:", 5) == 0) {
37 char *name = bf + 5;
38 while (*name && isspace(*name))
39 ++name;
40 size = strlen(name) - 1;
41 memcpy(ev.comm.comm, name, size++);
42 } else if (memcmp(bf, "Tgid:", 5) == 0) {
43 char *tgids = bf + 5;
44 while (*tgids && isspace(*tgids))
45 ++tgids;
46 tgid = ev.comm.pid = atoi(tgids);
47 }
48 }
49
50 ev.comm.header.type = PERF_RECORD_COMM;
51 size = ALIGN(size, sizeof(u64));
52 ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
53
54 if (!full) {
55 ev.comm.tid = pid;
56
57 process(&ev);
58 goto out_fclose;
59 }
60
61 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
62
63 tasks = opendir(filename);
64 if (tasks == NULL)
65 goto out_race;
66
67 while (!readdir_r(tasks, &dirent, &next) && next) {
68 char *end;
69 pid = strtol(dirent.d_name, &end, 10);
70 if (*end)
71 continue;
72
73 ev.comm.tid = pid;
74
75 process(&ev);
76 }
77 closedir(tasks);
78
79out_fclose:
80 fclose(fp);
81 return tgid;
82
83out_failure:
84 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
85 return -1;
86}
87
88static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
89 int (*process)(event_t *event))
90{
91 char filename[PATH_MAX];
92 FILE *fp;
93
94 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
95
96 fp = fopen(filename, "r");
97 if (fp == NULL) {
98 /*
99 * We raced with a task exiting - just return:
100 */
101 pr_debug("couldn't open %s\n", filename);
102 return -1;
103 }
104
105 while (1) {
106 char bf[BUFSIZ], *pbf = bf;
107 event_t ev = {
108 .header = { .type = PERF_RECORD_MMAP },
109 };
110 int n;
111 size_t size;
112 if (fgets(bf, sizeof(bf), fp) == NULL)
113 break;
114
115 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
116 n = hex2u64(pbf, &ev.mmap.start);
117 if (n < 0)
118 continue;
119 pbf += n + 1;
120 n = hex2u64(pbf, &ev.mmap.len);
121 if (n < 0)
122 continue;
123 pbf += n + 3;
124 if (*pbf == 'x') { /* vm_exec */
125 char *execname = strchr(bf, '/');
126
127 /* Catch VDSO */
128 if (execname == NULL)
129 execname = strstr(bf, "[vdso]");
130
131 if (execname == NULL)
132 continue;
133
134 size = strlen(execname);
135 execname[size - 1] = '\0'; /* Remove \n */
136 memcpy(ev.mmap.filename, execname, size);
137 size = ALIGN(size, sizeof(u64));
138 ev.mmap.len -= ev.mmap.start;
139 ev.mmap.header.size = (sizeof(ev.mmap) -
140 (sizeof(ev.mmap.filename) - size));
141 ev.mmap.pid = tgid;
142 ev.mmap.tid = pid;
143
144 process(&ev);
145 }
146 }
147
148 fclose(fp);
149 return 0;
150}
151
152int event__synthesize_thread(pid_t pid, int (*process)(event_t *event))
153{
154 pid_t tgid = event__synthesize_comm(pid, 1, process);
155 if (tgid == -1)
156 return -1;
157 return event__synthesize_mmap_events(pid, tgid, process);
158}
159
160void event__synthesize_threads(int (*process)(event_t *event))
161{
162 DIR *proc;
163 struct dirent dirent, *next;
164
165 proc = opendir("/proc");
166
167 while (!readdir_r(proc, &dirent, &next) && next) {
168 char *end;
169 pid_t pid = strtol(dirent.d_name, &end, 10);
170
171 if (*end) /* only interested in proper numerical dirents */
172 continue;
173
174 event__synthesize_thread(pid, process);
175 }
176
177 closedir(proc);
178}
179
180char *event__cwd;
181int event__cwdlen;
182
183struct events_stats event__stats;
184
185int event__process_comm(event_t *self)
186{
187 struct thread *thread = threads__findnew(self->comm.pid);
188
189 dump_printf(": %s:%d\n", self->comm.comm, self->comm.pid);
190
191 if (thread == NULL || thread__set_comm(thread, self->comm.comm)) {
192 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
193 return -1;
194 }
195
196 return 0;
197}
198
199int event__process_lost(event_t *self)
200{
201 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
202 event__stats.lost += self->lost.lost;
203 return 0;
204}
205
206int event__process_mmap(event_t *self)
207{
208 struct thread *thread = threads__findnew(self->mmap.pid);
209 struct map *map = map__new(&self->mmap, MAP__FUNCTION,
210 event__cwd, event__cwdlen);
211
212 dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
213 self->mmap.pid, self->mmap.tid,
214 (void *)(long)self->mmap.start,
215 (void *)(long)self->mmap.len,
216 (void *)(long)self->mmap.pgoff,
217 self->mmap.filename);
218
219 if (thread == NULL || map == NULL)
220 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
221 else
222 thread__insert_map(thread, map);
223
224 return 0;
225}
226
227int event__process_task(event_t *self)
228{
229 struct thread *thread = threads__findnew(self->fork.pid);
230 struct thread *parent = threads__findnew(self->fork.ppid);
231
232 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
233 self->fork.ppid, self->fork.ptid);
234 /*
235 * A thread clone will have the same PID for both parent and child.
236 */
237 if (thread == parent)
238 return 0;
239
240 if (self->header.type == PERF_RECORD_EXIT)
241 return 0;
242
243 if (thread == NULL || parent == NULL ||
244 thread__fork(thread, parent) < 0) {
245 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
246 return -1;
247 }
248
249 return 0;
250}
251
252void thread__find_addr_location(struct thread *self, u8 cpumode,
253 enum map_type type, u64 addr,
254 struct addr_location *al,
255 symbol_filter_t filter)
256{
257 struct thread *thread = al->thread = self;
258
259 al->addr = addr;
260
261 if (cpumode & PERF_RECORD_MISC_KERNEL) {
262 al->level = 'k';
263 thread = kthread;
264 } else if (cpumode & PERF_RECORD_MISC_USER)
265 al->level = '.';
266 else {
267 al->level = 'H';
268 al->map = NULL;
269 al->sym = NULL;
270 return;
271 }
272try_again:
273 al->map = thread__find_map(thread, type, al->addr);
274 if (al->map == NULL) {
275 /*
276 * If this is outside of all known maps, and is a negative
277 * address, try to look it up in the kernel dso, as it might be
278 * a vsyscall or vdso (which executes in user-mode).
279 *
280 * XXX This is nasty, we should have a symbol list in the
281 * "[vdso]" dso, but for now lets use the old trick of looking
282 * in the whole kernel symbol list.
283 */
284 if ((long long)al->addr < 0 && thread != kthread) {
285 thread = kthread;
286 goto try_again;
287 }
288 al->sym = NULL;
289 } else {
290 al->addr = al->map->map_ip(al->map, al->addr);
291 al->sym = map__find_symbol(al->map, al->addr, filter);
292 }
293}
294
295int event__preprocess_sample(const event_t *self, struct addr_location *al,
296 symbol_filter_t filter)
297{
298 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
299 struct thread *thread = threads__findnew(self->ip.pid);
300
301 if (thread == NULL)
302 return -1;
303
304 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
305
306 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
307 self->ip.ip, al, filter);
308 dump_printf(" ...... dso: %s\n",
309 al->map ? al->map->dso->long_name :
310 al->level == 'H' ? "[hypervisor]" : "<not found>");
311 return 0;
312}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2c9c26d6ded0..a4cc8105cf67 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,14 +1,10 @@
1#ifndef __PERF_RECORD_H 1#ifndef __PERF_RECORD_H
2#define __PERF_RECORD_H 2#define __PERF_RECORD_H
3
3#include "../perf.h" 4#include "../perf.h"
4#include "util.h" 5#include "util.h"
5#include <linux/list.h> 6#include <linux/list.h>
6 7#include <linux/rbtree.h>
7enum {
8 SHOW_KERNEL = 1,
9 SHOW_USER = 2,
10 SHOW_HV = 4,
11};
12 8
13/* 9/*
14 * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * 10 * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -65,6 +61,13 @@ struct sample_event{
65 u64 array[]; 61 u64 array[];
66}; 62};
67 63
64#define BUILD_ID_SIZE 20
65
66struct build_id_event {
67 struct perf_event_header header;
68 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
69 char filename[];
70};
68 71
69typedef union event_union { 72typedef union event_union {
70 struct perf_event_header header; 73 struct perf_event_header header;
@@ -77,12 +80,30 @@ typedef union event_union {
77 struct sample_event sample; 80 struct sample_event sample;
78} event_t; 81} event_t;
79 82
83struct events_stats {
84 unsigned long total;
85 unsigned long lost;
86};
87
88void event__print_totals(void);
89
90enum map_type {
91 MAP__FUNCTION = 0,
92
93 MAP__NR_TYPES,
94};
95
80struct map { 96struct map {
81 struct list_head node; 97 union {
98 struct rb_node rb_node;
99 struct list_head node;
100 };
82 u64 start; 101 u64 start;
83 u64 end; 102 u64 end;
103 enum map_type type;
84 u64 pgoff; 104 u64 pgoff;
85 u64 (*map_ip)(struct map *, u64); 105 u64 (*map_ip)(struct map *, u64);
106 u64 (*unmap_ip)(struct map *, u64);
86 struct dso *dso; 107 struct dso *dso;
87}; 108};
88 109
@@ -91,14 +112,48 @@ static inline u64 map__map_ip(struct map *map, u64 ip)
91 return ip - map->start + map->pgoff; 112 return ip - map->start + map->pgoff;
92} 113}
93 114
94static inline u64 vdso__map_ip(struct map *map __used, u64 ip) 115static inline u64 map__unmap_ip(struct map *map, u64 ip)
116{
117 return ip + map->start - map->pgoff;
118}
119
120static inline u64 identity__map_ip(struct map *map __used, u64 ip)
95{ 121{
96 return ip; 122 return ip;
97} 123}
98 124
99struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); 125struct symbol;
126
127typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
128
129void map__init(struct map *self, enum map_type type,
130 u64 start, u64 end, u64 pgoff, struct dso *dso);
131struct map *map__new(struct mmap_event *event, enum map_type,
132 char *cwd, int cwdlen);
133void map__delete(struct map *self);
100struct map *map__clone(struct map *self); 134struct map *map__clone(struct map *self);
101int map__overlap(struct map *l, struct map *r); 135int map__overlap(struct map *l, struct map *r);
102size_t map__fprintf(struct map *self, FILE *fp); 136size_t map__fprintf(struct map *self, FILE *fp);
137struct symbol *map__find_symbol(struct map *self, u64 addr,
138 symbol_filter_t filter);
139void map__fixup_start(struct map *self);
140void map__fixup_end(struct map *self);
141
142int event__synthesize_thread(pid_t pid, int (*process)(event_t *event));
143void event__synthesize_threads(int (*process)(event_t *event));
144
145extern char *event__cwd;
146extern int event__cwdlen;
147extern struct events_stats event__stats;
148extern unsigned long event__total[PERF_RECORD_MAX];
149
150int event__process_comm(event_t *self);
151int event__process_lost(event_t *self);
152int event__process_mmap(event_t *self);
153int event__process_task(event_t *self);
154
155struct addr_location;
156int event__preprocess_sample(const event_t *self, struct addr_location *al,
157 symbol_filter_t filter);
103 158
104#endif 159#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
index effe25eb1545..31647ac92ed1 100644
--- a/tools/perf/util/exec_cmd.h
+++ b/tools/perf/util/exec_cmd.h
@@ -1,5 +1,5 @@
1#ifndef PERF_EXEC_CMD_H 1#ifndef __PERF_EXEC_CMD_H
2#define PERF_EXEC_CMD_H 2#define __PERF_EXEC_CMD_H
3 3
4extern void perf_set_argv_exec_path(const char *exec_path); 4extern void perf_set_argv_exec_path(const char *exec_path);
5extern const char *perf_extract_argv0_path(const char *path); 5extern const char *perf_extract_argv0_path(const char *path);
@@ -10,4 +10,4 @@ extern int execv_perf_cmd(const char **argv); /* NULL terminated */
10extern int execl_perf_cmd(const char *cmd, ...); 10extern int execl_perf_cmd(const char *cmd, ...);
11extern const char *system_path(const char *path); 11extern const char *system_path(const char *path);
12 12
13#endif /* PERF_EXEC_CMD_H */ 13#endif /* __PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e306857b2c2b..4805e6dfd23c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2,9 +2,15 @@
2#include <unistd.h> 2#include <unistd.h>
3#include <stdio.h> 3#include <stdio.h>
4#include <stdlib.h> 4#include <stdlib.h>
5#include <linux/list.h>
5 6
6#include "util.h" 7#include "util.h"
7#include "header.h" 8#include "header.h"
9#include "../perf.h"
10#include "trace-event.h"
11#include "symbol.h"
12#include "data_map.h"
13#include "debug.h"
8 14
9/* 15/*
10 * Create new perf.data header attribute: 16 * Create new perf.data header attribute:
@@ -13,32 +19,43 @@ struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
13{ 19{
14 struct perf_header_attr *self = malloc(sizeof(*self)); 20 struct perf_header_attr *self = malloc(sizeof(*self));
15 21
16 if (!self) 22 if (self != NULL) {
17 die("nomem"); 23 self->attr = *attr;
18 24 self->ids = 0;
19 self->attr = *attr; 25 self->size = 1;
20 self->ids = 0; 26 self->id = malloc(sizeof(u64));
21 self->size = 1; 27 if (self->id == NULL) {
22 self->id = malloc(sizeof(u64)); 28 free(self);
23 29 self = NULL;
24 if (!self->id) 30 }
25 die("nomem"); 31 }
26 32
27 return self; 33 return self;
28} 34}
29 35
30void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) 36void perf_header_attr__delete(struct perf_header_attr *self)
37{
38 free(self->id);
39 free(self);
40}
41
42int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
31{ 43{
32 int pos = self->ids; 44 int pos = self->ids;
33 45
34 self->ids++; 46 self->ids++;
35 if (self->ids > self->size) { 47 if (self->ids > self->size) {
36 self->size *= 2; 48 int nsize = self->size * 2;
37 self->id = realloc(self->id, self->size * sizeof(u64)); 49 u64 *nid = realloc(self->id, nsize * sizeof(u64));
38 if (!self->id) 50
39 die("nomem"); 51 if (nid == NULL)
52 return -1;
53
54 self->size = nsize;
55 self->id = nid;
40 } 56 }
41 self->id[pos] = id; 57 self->id[pos] = id;
58 return 0;
42} 59}
43 60
44/* 61/*
@@ -46,42 +63,52 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
46 */ 63 */
47struct perf_header *perf_header__new(void) 64struct perf_header *perf_header__new(void)
48{ 65{
49 struct perf_header *self = malloc(sizeof(*self)); 66 struct perf_header *self = zalloc(sizeof(*self));
50 67
51 if (!self) 68 if (self != NULL) {
52 die("nomem"); 69 self->size = 1;
70 self->attr = malloc(sizeof(void *));
53 71
54 self->frozen = 0; 72 if (self->attr == NULL) {
73 free(self);
74 self = NULL;
75 }
76 }
55 77
56 self->attrs = 0; 78 return self;
57 self->size = 1; 79}
58 self->attr = malloc(sizeof(void *));
59 80
60 if (!self->attr) 81void perf_header__delete(struct perf_header *self)
61 die("nomem"); 82{
83 int i;
62 84
63 self->data_offset = 0; 85 for (i = 0; i < self->attrs; ++i)
64 self->data_size = 0; 86 perf_header_attr__delete(self->attr[i]);
65 87
66 return self; 88 free(self->attr);
89 free(self);
67} 90}
68 91
69void perf_header__add_attr(struct perf_header *self, 92int perf_header__add_attr(struct perf_header *self,
70 struct perf_header_attr *attr) 93 struct perf_header_attr *attr)
71{ 94{
72 int pos = self->attrs;
73
74 if (self->frozen) 95 if (self->frozen)
75 die("frozen"); 96 return -1;
76 97
77 self->attrs++; 98 if (self->attrs == self->size) {
78 if (self->attrs > self->size) { 99 int nsize = self->size * 2;
79 self->size *= 2; 100 struct perf_header_attr **nattr;
80 self->attr = realloc(self->attr, self->size * sizeof(void *)); 101
81 if (!self->attr) 102 nattr = realloc(self->attr, nsize * sizeof(void *));
82 die("nomem"); 103 if (nattr == NULL)
104 return -1;
105
106 self->size = nsize;
107 self->attr = nattr;
83 } 108 }
84 self->attr[pos] = attr; 109
110 self->attr[self->attrs++] = attr;
111 return 0;
85} 112}
86 113
87#define MAX_EVENT_NAME 64 114#define MAX_EVENT_NAME 64
@@ -97,7 +124,7 @@ static struct perf_trace_event_type *events;
97void perf_header__push_event(u64 id, const char *name) 124void perf_header__push_event(u64 id, const char *name)
98{ 125{
99 if (strlen(name) > MAX_EVENT_NAME) 126 if (strlen(name) > MAX_EVENT_NAME)
100 printf("Event %s will be truncated\n", name); 127 pr_warning("Event %s will be truncated\n", name);
101 128
102 if (!events) { 129 if (!events) {
103 events = malloc(sizeof(struct perf_trace_event_type)); 130 events = malloc(sizeof(struct perf_trace_event_type));
@@ -128,44 +155,137 @@ static const char *__perf_magic = "PERFFILE";
128 155
129#define PERF_MAGIC (*(u64 *)__perf_magic) 156#define PERF_MAGIC (*(u64 *)__perf_magic)
130 157
131struct perf_file_section {
132 u64 offset;
133 u64 size;
134};
135
136struct perf_file_attr { 158struct perf_file_attr {
137 struct perf_event_attr attr; 159 struct perf_event_attr attr;
138 struct perf_file_section ids; 160 struct perf_file_section ids;
139}; 161};
140 162
141struct perf_file_header { 163void perf_header__set_feat(struct perf_header *self, int feat)
142 u64 magic; 164{
143 u64 size; 165 set_bit(feat, self->adds_features);
144 u64 attr_size; 166}
145 struct perf_file_section attrs;
146 struct perf_file_section data;
147 struct perf_file_section event_types;
148};
149 167
150static void do_write(int fd, void *buf, size_t size) 168bool perf_header__has_feat(const struct perf_header *self, int feat)
169{
170 return test_bit(feat, self->adds_features);
171}
172
173static int do_write(int fd, const void *buf, size_t size)
151{ 174{
152 while (size) { 175 while (size) {
153 int ret = write(fd, buf, size); 176 int ret = write(fd, buf, size);
154 177
155 if (ret < 0) 178 if (ret < 0)
156 die("failed to write"); 179 return -errno;
157 180
158 size -= ret; 181 size -= ret;
159 buf += ret; 182 buf += ret;
160 } 183 }
184
185 return 0;
186}
187
188static int __dsos__write_buildid_table(struct list_head *head, int fd)
189{
190 struct dso *pos;
191
192 list_for_each_entry(pos, head, node) {
193 int err;
194 struct build_id_event b;
195 size_t len;
196
197 if (!pos->has_build_id)
198 continue;
199 len = pos->long_name_len + 1;
200 len = ALIGN(len, 64);
201 memset(&b, 0, sizeof(b));
202 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
203 b.header.size = sizeof(b) + len;
204 err = do_write(fd, &b, sizeof(b));
205 if (err < 0)
206 return err;
207 err = do_write(fd, pos->long_name, len);
208 if (err < 0)
209 return err;
210 }
211
212 return 0;
161} 213}
162 214
163void perf_header__write(struct perf_header *self, int fd) 215static int dsos__write_buildid_table(int fd)
216{
217 int err = __dsos__write_buildid_table(&dsos__kernel, fd);
218 if (err == 0)
219 err = __dsos__write_buildid_table(&dsos__user, fd);
220 return err;
221}
222
223static int perf_header__adds_write(struct perf_header *self, int fd)
224{
225 int nr_sections;
226 struct perf_file_section *feat_sec;
227 int sec_size;
228 u64 sec_start;
229 int idx = 0, err;
230
231 if (dsos__read_build_ids())
232 perf_header__set_feat(self, HEADER_BUILD_ID);
233
234 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
235 if (!nr_sections)
236 return 0;
237
238 feat_sec = calloc(sizeof(*feat_sec), nr_sections);
239 if (feat_sec == NULL)
240 return -ENOMEM;
241
242 sec_size = sizeof(*feat_sec) * nr_sections;
243
244 sec_start = self->data_offset + self->data_size;
245 lseek(fd, sec_start + sec_size, SEEK_SET);
246
247 if (perf_header__has_feat(self, HEADER_TRACE_INFO)) {
248 struct perf_file_section *trace_sec;
249
250 trace_sec = &feat_sec[idx++];
251
252 /* Write trace info */
253 trace_sec->offset = lseek(fd, 0, SEEK_CUR);
254 read_tracing_data(fd, attrs, nr_counters);
255 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
256 }
257
258
259 if (perf_header__has_feat(self, HEADER_BUILD_ID)) {
260 struct perf_file_section *buildid_sec;
261
262 buildid_sec = &feat_sec[idx++];
263
264 /* Write build-ids */
265 buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
266 err = dsos__write_buildid_table(fd);
267 if (err < 0) {
268 pr_debug("failed to write buildid table\n");
269 goto out_free;
270 }
271 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
272 }
273
274 lseek(fd, sec_start, SEEK_SET);
275 err = do_write(fd, feat_sec, sec_size);
276 if (err < 0)
277 pr_debug("failed to write feature section\n");
278out_free:
279 free(feat_sec);
280 return err;
281}
282
283int perf_header__write(struct perf_header *self, int fd, bool at_exit)
164{ 284{
165 struct perf_file_header f_header; 285 struct perf_file_header f_header;
166 struct perf_file_attr f_attr; 286 struct perf_file_attr f_attr;
167 struct perf_header_attr *attr; 287 struct perf_header_attr *attr;
168 int i; 288 int i, err;
169 289
170 lseek(fd, sizeof(f_header), SEEK_SET); 290 lseek(fd, sizeof(f_header), SEEK_SET);
171 291
@@ -174,7 +294,11 @@ void perf_header__write(struct perf_header *self, int fd)
174 attr = self->attr[i]; 294 attr = self->attr[i];
175 295
176 attr->id_offset = lseek(fd, 0, SEEK_CUR); 296 attr->id_offset = lseek(fd, 0, SEEK_CUR);
177 do_write(fd, attr->id, attr->ids * sizeof(u64)); 297 err = do_write(fd, attr->id, attr->ids * sizeof(u64));
298 if (err < 0) {
299 pr_debug("failed to write perf header\n");
300 return err;
301 }
178 } 302 }
179 303
180 304
@@ -190,17 +314,31 @@ void perf_header__write(struct perf_header *self, int fd)
190 .size = attr->ids * sizeof(u64), 314 .size = attr->ids * sizeof(u64),
191 } 315 }
192 }; 316 };
193 do_write(fd, &f_attr, sizeof(f_attr)); 317 err = do_write(fd, &f_attr, sizeof(f_attr));
318 if (err < 0) {
319 pr_debug("failed to write perf header attribute\n");
320 return err;
321 }
194 } 322 }
195 323
196 self->event_offset = lseek(fd, 0, SEEK_CUR); 324 self->event_offset = lseek(fd, 0, SEEK_CUR);
197 self->event_size = event_count * sizeof(struct perf_trace_event_type); 325 self->event_size = event_count * sizeof(struct perf_trace_event_type);
198 if (events) 326 if (events) {
199 do_write(fd, events, self->event_size); 327 err = do_write(fd, events, self->event_size);
200 328 if (err < 0) {
329 pr_debug("failed to write perf header events\n");
330 return err;
331 }
332 }
201 333
202 self->data_offset = lseek(fd, 0, SEEK_CUR); 334 self->data_offset = lseek(fd, 0, SEEK_CUR);
203 335
336 if (at_exit) {
337 err = perf_header__adds_write(self, fd);
338 if (err < 0)
339 return err;
340 }
341
204 f_header = (struct perf_file_header){ 342 f_header = (struct perf_file_header){
205 .magic = PERF_MAGIC, 343 .magic = PERF_MAGIC,
206 .size = sizeof(f_header), 344 .size = sizeof(f_header),
@@ -219,11 +357,18 @@ void perf_header__write(struct perf_header *self, int fd)
219 }, 357 },
220 }; 358 };
221 359
360 memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features));
361
222 lseek(fd, 0, SEEK_SET); 362 lseek(fd, 0, SEEK_SET);
223 do_write(fd, &f_header, sizeof(f_header)); 363 err = do_write(fd, &f_header, sizeof(f_header));
364 if (err < 0) {
365 pr_debug("failed to write perf header\n");
366 return err;
367 }
224 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 368 lseek(fd, self->data_offset + self->data_size, SEEK_SET);
225 369
226 self->frozen = 1; 370 self->frozen = 1;
371 return 0;
227} 372}
228 373
229static void do_read(int fd, void *buf, size_t size) 374static void do_read(int fd, void *buf, size_t size)
@@ -241,22 +386,109 @@ static void do_read(int fd, void *buf, size_t size)
241 } 386 }
242} 387}
243 388
244struct perf_header *perf_header__read(int fd) 389int perf_header__process_sections(struct perf_header *self, int fd,
390 int (*process)(struct perf_file_section *self,
391 int feat, int fd))
392{
393 struct perf_file_section *feat_sec;
394 int nr_sections;
395 int sec_size;
396 int idx = 0;
397 int err = 0, feat = 1;
398
399 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
400 if (!nr_sections)
401 return 0;
402
403 feat_sec = calloc(sizeof(*feat_sec), nr_sections);
404 if (!feat_sec)
405 return -1;
406
407 sec_size = sizeof(*feat_sec) * nr_sections;
408
409 lseek(fd, self->data_offset + self->data_size, SEEK_SET);
410
411 do_read(fd, feat_sec, sec_size);
412
413 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
414 if (perf_header__has_feat(self, feat)) {
415 struct perf_file_section *sec = &feat_sec[idx++];
416
417 err = process(sec, feat, fd);
418 if (err < 0)
419 break;
420 }
421 ++feat;
422 }
423
424 free(feat_sec);
425 return err;
426};
427
428int perf_file_header__read(struct perf_file_header *self,
429 struct perf_header *ph, int fd)
430{
431 lseek(fd, 0, SEEK_SET);
432 do_read(fd, self, sizeof(*self));
433
434 if (self->magic != PERF_MAGIC ||
435 self->attr_size != sizeof(struct perf_file_attr))
436 return -1;
437
438 if (self->size != sizeof(*self)) {
439 /* Support the previous format */
440 if (self->size == offsetof(typeof(*self), adds_features))
441 bitmap_zero(self->adds_features, HEADER_FEAT_BITS);
442 else
443 return -1;
444 }
445
446 memcpy(&ph->adds_features, &self->adds_features,
447 sizeof(self->adds_features));
448
449 ph->event_offset = self->event_types.offset;
450 ph->event_size = self->event_types.size;
451 ph->data_offset = self->data.offset;
452 ph->data_size = self->data.size;
453 return 0;
454}
455
456static int perf_file_section__process(struct perf_file_section *self,
457 int feat, int fd)
458{
459 if (lseek(fd, self->offset, SEEK_SET) < 0) {
460 pr_debug("Failed to lseek to %Ld offset for feature %d, "
461 "continuing...\n", self->offset, feat);
462 return 0;
463 }
464
465 switch (feat) {
466 case HEADER_TRACE_INFO:
467 trace_report(fd);
468 break;
469
470 case HEADER_BUILD_ID:
471 if (perf_header__read_build_ids(fd, self->offset, self->size))
472 pr_debug("Failed to read buildids, continuing...\n");
473 break;
474 default:
475 pr_debug("unknown feature %d, continuing...\n", feat);
476 }
477
478 return 0;
479}
480
481int perf_header__read(struct perf_header *self, int fd)
245{ 482{
246 struct perf_header *self = perf_header__new();
247 struct perf_file_header f_header; 483 struct perf_file_header f_header;
248 struct perf_file_attr f_attr; 484 struct perf_file_attr f_attr;
249 u64 f_id; 485 u64 f_id;
250
251 int nr_attrs, nr_ids, i, j; 486 int nr_attrs, nr_ids, i, j;
252 487
253 lseek(fd, 0, SEEK_SET); 488 if (perf_file_header__read(&f_header, self, fd) < 0) {
254 do_read(fd, &f_header, sizeof(f_header)); 489 pr_debug("incompatible file format\n");
255 490 return -EINVAL;
256 if (f_header.magic != PERF_MAGIC || 491 }
257 f_header.size != sizeof(f_header) ||
258 f_header.attr_size != sizeof(f_attr))
259 die("incompatible file format");
260 492
261 nr_attrs = f_header.attrs.size / sizeof(f_attr); 493 nr_attrs = f_header.attrs.size / sizeof(f_attr);
262 lseek(fd, f_header.attrs.offset, SEEK_SET); 494 lseek(fd, f_header.attrs.offset, SEEK_SET);
@@ -269,6 +501,8 @@ struct perf_header *perf_header__read(int fd)
269 tmp = lseek(fd, 0, SEEK_CUR); 501 tmp = lseek(fd, 0, SEEK_CUR);
270 502
271 attr = perf_header_attr__new(&f_attr.attr); 503 attr = perf_header_attr__new(&f_attr.attr);
504 if (attr == NULL)
505 return -ENOMEM;
272 506
273 nr_ids = f_attr.ids.size / sizeof(u64); 507 nr_ids = f_attr.ids.size / sizeof(u64);
274 lseek(fd, f_attr.ids.offset, SEEK_SET); 508 lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -276,31 +510,34 @@ struct perf_header *perf_header__read(int fd)
276 for (j = 0; j < nr_ids; j++) { 510 for (j = 0; j < nr_ids; j++) {
277 do_read(fd, &f_id, sizeof(f_id)); 511 do_read(fd, &f_id, sizeof(f_id));
278 512
279 perf_header_attr__add_id(attr, f_id); 513 if (perf_header_attr__add_id(attr, f_id) < 0) {
514 perf_header_attr__delete(attr);
515 return -ENOMEM;
516 }
280 } 517 }
281 perf_header__add_attr(self, attr); 518 if (perf_header__add_attr(self, attr) < 0) {
519 perf_header_attr__delete(attr);
520 return -ENOMEM;
521 }
522
282 lseek(fd, tmp, SEEK_SET); 523 lseek(fd, tmp, SEEK_SET);
283 } 524 }
284 525
285 if (f_header.event_types.size) { 526 if (f_header.event_types.size) {
286 lseek(fd, f_header.event_types.offset, SEEK_SET); 527 lseek(fd, f_header.event_types.offset, SEEK_SET);
287 events = malloc(f_header.event_types.size); 528 events = malloc(f_header.event_types.size);
288 if (!events) 529 if (events == NULL)
289 die("nomem"); 530 return -ENOMEM;
290 do_read(fd, events, f_header.event_types.size); 531 do_read(fd, events, f_header.event_types.size);
291 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); 532 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
292 } 533 }
293 self->event_offset = f_header.event_types.offset;
294 self->event_size = f_header.event_types.size;
295 534
296 self->data_offset = f_header.data.offset; 535 perf_header__process_sections(self, fd, perf_file_section__process);
297 self->data_size = f_header.data.size;
298 536
299 lseek(fd, self->data_offset, SEEK_SET); 537 lseek(fd, self->data_offset, SEEK_SET);
300 538
301 self->frozen = 1; 539 self->frozen = 1;
302 540 return 0;
303 return self;
304} 541}
305 542
306u64 perf_header__sample_type(struct perf_header *header) 543u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index a0761bc7863c..d1dbe2b79c42 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,10 +1,13 @@
1#ifndef _PERF_HEADER_H 1#ifndef __PERF_HEADER_H
2#define _PERF_HEADER_H 2#define __PERF_HEADER_H
3 3
4#include "../../../include/linux/perf_event.h" 4#include "../../../include/linux/perf_event.h"
5#include <sys/types.h> 5#include <sys/types.h>
6#include <stdbool.h>
6#include "types.h" 7#include "types.h"
7 8
9#include <linux/bitmap.h>
10
8struct perf_header_attr { 11struct perf_header_attr {
9 struct perf_event_attr attr; 12 struct perf_event_attr attr;
10 int ids, size; 13 int ids, size;
@@ -12,36 +15,71 @@ struct perf_header_attr {
12 off_t id_offset; 15 off_t id_offset;
13}; 16};
14 17
18enum {
19 HEADER_TRACE_INFO = 1,
20 HEADER_BUILD_ID,
21 HEADER_LAST_FEATURE,
22};
23
24#define HEADER_FEAT_BITS 256
25
26struct perf_file_section {
27 u64 offset;
28 u64 size;
29};
30
31struct perf_file_header {
32 u64 magic;
33 u64 size;
34 u64 attr_size;
35 struct perf_file_section attrs;
36 struct perf_file_section data;
37 struct perf_file_section event_types;
38 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
39};
40
41struct perf_header;
42
43int perf_file_header__read(struct perf_file_header *self,
44 struct perf_header *ph, int fd);
45
15struct perf_header { 46struct perf_header {
16 int frozen; 47 int frozen;
17 int attrs, size; 48 int attrs, size;
18 struct perf_header_attr **attr; 49 struct perf_header_attr **attr;
19 s64 attr_offset; 50 s64 attr_offset;
20 u64 data_offset; 51 u64 data_offset;
21 u64 data_size; 52 u64 data_size;
22 u64 event_offset; 53 u64 event_offset;
23 u64 event_size; 54 u64 event_size;
55 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
24}; 56};
25 57
26struct perf_header *perf_header__read(int fd); 58struct perf_header *perf_header__new(void);
27void perf_header__write(struct perf_header *self, int fd); 59void perf_header__delete(struct perf_header *self);
28 60
29void perf_header__add_attr(struct perf_header *self, 61int perf_header__read(struct perf_header *self, int fd);
30 struct perf_header_attr *attr); 62int perf_header__write(struct perf_header *self, int fd, bool at_exit);
63
64int perf_header__add_attr(struct perf_header *self,
65 struct perf_header_attr *attr);
31 66
32void perf_header__push_event(u64 id, const char *name); 67void perf_header__push_event(u64 id, const char *name);
33char *perf_header__find_event(u64 id); 68char *perf_header__find_event(u64 id);
34 69
70struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
71void perf_header_attr__delete(struct perf_header_attr *self);
35 72
36struct perf_header_attr * 73int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
37perf_header_attr__new(struct perf_event_attr *attr);
38void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
39 74
40u64 perf_header__sample_type(struct perf_header *header); 75u64 perf_header__sample_type(struct perf_header *header);
41struct perf_event_attr * 76struct perf_event_attr *
42perf_header__find_attr(u64 id, struct perf_header *header); 77perf_header__find_attr(u64 id, struct perf_header *header);
78void perf_header__set_feat(struct perf_header *self, int feat);
79bool perf_header__has_feat(const struct perf_header *self, int feat);
43 80
81int perf_header__process_sections(struct perf_header *self, int fd,
82 int (*process)(struct perf_file_section *self,
83 int feat, int fd));
44 84
45struct perf_header *perf_header__new(void); 85#endif /* __PERF_HEADER_H */
46
47#endif /* _PERF_HEADER_H */
diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h
index 7128783637b4..7f5c6dedd714 100644
--- a/tools/perf/util/help.h
+++ b/tools/perf/util/help.h
@@ -1,5 +1,5 @@
1#ifndef HELP_H 1#ifndef __PERF_HELP_H
2#define HELP_H 2#define __PERF_HELP_H
3 3
4struct cmdnames { 4struct cmdnames {
5 size_t alloc; 5 size_t alloc;
@@ -26,4 +26,4 @@ int is_in_cmdlist(struct cmdnames *c, const char *s);
26void list_commands(const char *title, struct cmdnames *main_cmds, 26void list_commands(const char *title, struct cmdnames *main_cmds,
27 struct cmdnames *other_cmds); 27 struct cmdnames *other_cmds);
28 28
29#endif /* HELP_H */ 29#endif /* __PERF_HELP_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
new file mode 100644
index 000000000000..0ebf6ee16caa
--- /dev/null
+++ b/tools/perf/util/hist.c
@@ -0,0 +1,202 @@
1#include "hist.h"
2
3struct rb_root hist;
4struct rb_root collapse_hists;
5struct rb_root output_hists;
6int callchain;
7
8struct callchain_param callchain_param = {
9 .mode = CHAIN_GRAPH_REL,
10 .min_percent = 0.5
11};
12
13/*
14 * histogram, sorted on item, collects counts
15 */
16
17struct hist_entry *__hist_entry__add(struct addr_location *al,
18 struct symbol *sym_parent,
19 u64 count, bool *hit)
20{
21 struct rb_node **p = &hist.rb_node;
22 struct rb_node *parent = NULL;
23 struct hist_entry *he;
24 struct hist_entry entry = {
25 .thread = al->thread,
26 .map = al->map,
27 .sym = al->sym,
28 .ip = al->addr,
29 .level = al->level,
30 .count = count,
31 .parent = sym_parent,
32 };
33 int cmp;
34
35 while (*p != NULL) {
36 parent = *p;
37 he = rb_entry(parent, struct hist_entry, rb_node);
38
39 cmp = hist_entry__cmp(&entry, he);
40
41 if (!cmp) {
42 *hit = true;
43 return he;
44 }
45
46 if (cmp < 0)
47 p = &(*p)->rb_left;
48 else
49 p = &(*p)->rb_right;
50 }
51
52 he = malloc(sizeof(*he));
53 if (!he)
54 return NULL;
55 *he = entry;
56 rb_link_node(&he->rb_node, parent, p);
57 rb_insert_color(&he->rb_node, &hist);
58 *hit = false;
59 return he;
60}
61
62int64_t
63hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
64{
65 struct sort_entry *se;
66 int64_t cmp = 0;
67
68 list_for_each_entry(se, &hist_entry__sort_list, list) {
69 cmp = se->cmp(left, right);
70 if (cmp)
71 break;
72 }
73
74 return cmp;
75}
76
77int64_t
78hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
79{
80 struct sort_entry *se;
81 int64_t cmp = 0;
82
83 list_for_each_entry(se, &hist_entry__sort_list, list) {
84 int64_t (*f)(struct hist_entry *, struct hist_entry *);
85
86 f = se->collapse ?: se->cmp;
87
88 cmp = f(left, right);
89 if (cmp)
90 break;
91 }
92
93 return cmp;
94}
95
96void hist_entry__free(struct hist_entry *he)
97{
98 free(he);
99}
100
101/*
102 * collapse the histogram
103 */
104
105void collapse__insert_entry(struct hist_entry *he)
106{
107 struct rb_node **p = &collapse_hists.rb_node;
108 struct rb_node *parent = NULL;
109 struct hist_entry *iter;
110 int64_t cmp;
111
112 while (*p != NULL) {
113 parent = *p;
114 iter = rb_entry(parent, struct hist_entry, rb_node);
115
116 cmp = hist_entry__collapse(iter, he);
117
118 if (!cmp) {
119 iter->count += he->count;
120 hist_entry__free(he);
121 return;
122 }
123
124 if (cmp < 0)
125 p = &(*p)->rb_left;
126 else
127 p = &(*p)->rb_right;
128 }
129
130 rb_link_node(&he->rb_node, parent, p);
131 rb_insert_color(&he->rb_node, &collapse_hists);
132}
133
134void collapse__resort(void)
135{
136 struct rb_node *next;
137 struct hist_entry *n;
138
139 if (!sort__need_collapse)
140 return;
141
142 next = rb_first(&hist);
143 while (next) {
144 n = rb_entry(next, struct hist_entry, rb_node);
145 next = rb_next(&n->rb_node);
146
147 rb_erase(&n->rb_node, &hist);
148 collapse__insert_entry(n);
149 }
150}
151
152/*
153 * reverse the map, sort on count.
154 */
155
156void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits)
157{
158 struct rb_node **p = &output_hists.rb_node;
159 struct rb_node *parent = NULL;
160 struct hist_entry *iter;
161
162 if (callchain)
163 callchain_param.sort(&he->sorted_chain, &he->callchain,
164 min_callchain_hits, &callchain_param);
165
166 while (*p != NULL) {
167 parent = *p;
168 iter = rb_entry(parent, struct hist_entry, rb_node);
169
170 if (he->count > iter->count)
171 p = &(*p)->rb_left;
172 else
173 p = &(*p)->rb_right;
174 }
175
176 rb_link_node(&he->rb_node, parent, p);
177 rb_insert_color(&he->rb_node, &output_hists);
178}
179
180void output__resort(u64 total_samples)
181{
182 struct rb_node *next;
183 struct hist_entry *n;
184 struct rb_root *tree = &hist;
185 u64 min_callchain_hits;
186
187 min_callchain_hits =
188 total_samples * (callchain_param.min_percent / 100);
189
190 if (sort__need_collapse)
191 tree = &collapse_hists;
192
193 next = rb_first(tree);
194
195 while (next) {
196 n = rb_entry(next, struct hist_entry, rb_node);
197 next = rb_next(&n->rb_node);
198
199 rb_erase(&n->rb_node, tree);
200 output__insert_entry(n, min_callchain_hits);
201 }
202}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
new file mode 100644
index 000000000000..3020db0c9292
--- /dev/null
+++ b/tools/perf/util/hist.h
@@ -0,0 +1,50 @@
1#ifndef __PERF_HIST_H
2#define __PERF_HIST_H
3#include "../builtin.h"
4
5#include "util.h"
6
7#include "color.h"
8#include <linux/list.h>
9#include "cache.h"
10#include <linux/rbtree.h>
11#include "symbol.h"
12#include "string.h"
13#include "callchain.h"
14#include "strlist.h"
15#include "values.h"
16
17#include "../perf.h"
18#include "debug.h"
19#include "header.h"
20
21#include "parse-options.h"
22#include "parse-events.h"
23
24#include "thread.h"
25#include "sort.h"
26
27extern struct rb_root hist;
28extern struct rb_root collapse_hists;
29extern struct rb_root output_hists;
30extern int callchain;
31extern struct callchain_param callchain_param;
32extern unsigned long total;
33extern unsigned long total_mmap;
34extern unsigned long total_comm;
35extern unsigned long total_fork;
36extern unsigned long total_unknown;
37extern unsigned long total_lost;
38
39struct hist_entry *__hist_entry__add(struct addr_location *al,
40 struct symbol *parent,
41 u64 count, bool *hit);
42extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
43extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
44extern void hist_entry__free(struct hist_entry *);
45extern void collapse__insert_entry(struct hist_entry *);
46extern void collapse__resort(void);
47extern void output__insert_entry(struct hist_entry *, u64);
48extern void output__resort(u64);
49
50#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..ed538942523d
--- /dev/null
+++ b/tools/perf/util/include/asm/asm-offsets.h
@@ -0,0 +1 @@
/* stub */
diff --git a/tools/perf/util/include/asm/bitops.h b/tools/perf/util/include/asm/bitops.h
new file mode 100644
index 000000000000..58e9817ffae0
--- /dev/null
+++ b/tools/perf/util/include/asm/bitops.h
@@ -0,0 +1,18 @@
1#ifndef _PERF_ASM_BITOPS_H_
2#define _PERF_ASM_BITOPS_H_
3
4#include <sys/types.h>
5#include "../../types.h"
6#include <linux/compiler.h>
7
8/* CHECKME: Not sure both always match */
9#define BITS_PER_LONG __WORDSIZE
10
11#include "../../../../include/asm-generic/bitops/__fls.h"
12#include "../../../../include/asm-generic/bitops/fls.h"
13#include "../../../../include/asm-generic/bitops/fls64.h"
14#include "../../../../include/asm-generic/bitops/__ffs.h"
15#include "../../../../include/asm-generic/bitops/ffz.h"
16#include "../../../../include/asm-generic/bitops/hweight.h"
17
18#endif
diff --git a/tools/perf/util/include/asm/bug.h b/tools/perf/util/include/asm/bug.h
new file mode 100644
index 000000000000..7fcc6810adc2
--- /dev/null
+++ b/tools/perf/util/include/asm/bug.h
@@ -0,0 +1,22 @@
1#ifndef _PERF_ASM_GENERIC_BUG_H
2#define _PERF_ASM_GENERIC_BUG_H
3
4#define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0)
5
6#define WARN(condition, format...) ({ \
7 int __ret_warn_on = !!(condition); \
8 if (unlikely(__ret_warn_on)) \
9 __WARN_printf(format); \
10 unlikely(__ret_warn_on); \
11})
12
13#define WARN_ONCE(condition, format...) ({ \
14 static int __warned; \
15 int __ret_warn_once = !!(condition); \
16 \
17 if (unlikely(__ret_warn_once)) \
18 if (WARN(!__warned, format)) \
19 __warned = 1; \
20 unlikely(__ret_warn_once); \
21})
22#endif
diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h
new file mode 100644
index 000000000000..b722abe3a626
--- /dev/null
+++ b/tools/perf/util/include/asm/byteorder.h
@@ -0,0 +1,2 @@
1#include <asm/types.h>
2#include "../../../../include/linux/swab.h"
diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h
new file mode 100644
index 000000000000..ed538942523d
--- /dev/null
+++ b/tools/perf/util/include/asm/swab.h
@@ -0,0 +1 @@
/* stub */
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
new file mode 100644
index 000000000000..d0f72b8fcc35
--- /dev/null
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -0,0 +1,14 @@
1#ifndef _PERF_ASM_UACCESS_H_
2#define _PERF_ASM_UACCESS_H_
3
4#define __get_user(src, dest) \
5({ \
6 (src) = *dest; \
7 0; \
8})
9
10#define get_user __get_user
11
12#define access_ok(type, addr, size) 1
13
14#endif
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
new file mode 100644
index 000000000000..94507639a8c4
--- /dev/null
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -0,0 +1,3 @@
1#include "../../../../include/linux/bitmap.h"
2#include "../../../../include/asm-generic/bitops/find.h"
3#include <linux/errno.h>
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
new file mode 100644
index 000000000000..8d63116e9435
--- /dev/null
+++ b/tools/perf/util/include/linux/bitops.h
@@ -0,0 +1,29 @@
1#ifndef _PERF_LINUX_BITOPS_H_
2#define _PERF_LINUX_BITOPS_H_
3
4#define __KERNEL__
5
6#define CONFIG_GENERIC_FIND_NEXT_BIT
7#define CONFIG_GENERIC_FIND_FIRST_BIT
8#include "../../../../include/linux/bitops.h"
9
10#undef __KERNEL__
11
12static inline void set_bit(int nr, unsigned long *addr)
13{
14 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
15}
16
17static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
18{
19 return ((1UL << (nr % BITS_PER_LONG)) &
20 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
21}
22
23unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
24 long size, unsigned long offset);
25
26unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
27 long size, unsigned long offset);
28
29#endif
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
new file mode 100644
index 000000000000..dfb0713ed47f
--- /dev/null
+++ b/tools/perf/util/include/linux/compiler.h
@@ -0,0 +1,10 @@
1#ifndef _PERF_LINUX_COMPILER_H_
2#define _PERF_LINUX_COMPILER_H_
3
4#ifndef __always_inline
5#define __always_inline inline
6#endif
7#define __user
8#define __attribute_const__
9
10#endif
diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h
new file mode 100644
index 000000000000..a53d4ee1e0b7
--- /dev/null
+++ b/tools/perf/util/include/linux/ctype.h
@@ -0,0 +1 @@
#include "../util.h"
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index a6b87390cb52..21c0274c02fa 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -1,6 +1,16 @@
1#ifndef PERF_LINUX_KERNEL_H_ 1#ifndef PERF_LINUX_KERNEL_H_
2#define PERF_LINUX_KERNEL_H_ 2#define PERF_LINUX_KERNEL_H_
3 3
4#include <stdarg.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <assert.h>
8
9#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
10
11#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
12#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
13
4#ifndef offsetof 14#ifndef offsetof
5#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 15#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
6#endif 16#endif
@@ -26,4 +36,70 @@
26 _max1 > _max2 ? _max1 : _max2; }) 36 _max1 > _max2 ? _max1 : _max2; })
27#endif 37#endif
28 38
39#ifndef min
40#define min(x, y) ({ \
41 typeof(x) _min1 = (x); \
42 typeof(y) _min2 = (y); \
43 (void) (&_min1 == &_min2); \
44 _min1 < _min2 ? _min1 : _min2; })
45#endif
46
47#ifndef BUG_ON
48#define BUG_ON(cond) assert(!(cond))
49#endif
50
51/*
52 * Both need more care to handle endianness
53 * (Don't use bitmap_copy_le() for now)
54 */
55#define cpu_to_le64(x) (x)
56#define cpu_to_le32(x) (x)
57
58static inline int
59vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
60{
61 int i;
62 ssize_t ssize = size;
63
64 i = vsnprintf(buf, size, fmt, args);
65
66 return (i >= ssize) ? (ssize - 1) : i;
67}
68
69static inline int scnprintf(char * buf, size_t size, const char * fmt, ...)
70{
71 va_list args;
72 ssize_t ssize = size;
73 int i;
74
75 va_start(args, fmt);
76 i = vsnprintf(buf, size, fmt, args);
77 va_end(args);
78
79 return (i >= ssize) ? (ssize - 1) : i;
80}
81
82static inline unsigned long
83simple_strtoul(const char *nptr, char **endptr, int base)
84{
85 return strtoul(nptr, endptr, base);
86}
87
88#ifndef pr_fmt
89#define pr_fmt(fmt) fmt
90#endif
91
92#define pr_err(fmt, ...) \
93 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
94#define pr_warning(fmt, ...) \
95 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
96#define pr_info(fmt, ...) \
97 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0)
98#define pr_debug(fmt, ...) \
99 eprintf(1, pr_fmt(fmt), ##__VA_ARGS__)
100#define pr_debugN(n, fmt, ...) \
101 eprintf(n, pr_fmt(fmt), ##__VA_ARGS__)
102#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
103#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
104
29#endif 105#endif
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
new file mode 100644
index 000000000000..3b2f5900276f
--- /dev/null
+++ b/tools/perf/util/include/linux/string.h
@@ -0,0 +1 @@
#include <string.h>
diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h
new file mode 100644
index 000000000000..196862a81a21
--- /dev/null
+++ b/tools/perf/util/include/linux/types.h
@@ -0,0 +1,9 @@
1#ifndef _PERF_LINUX_TYPES_H_
2#define _PERF_LINUX_TYPES_H_
3
4#include <asm/types.h>
5
6#define DECLARE_BITMAP(name,bits) \
7 unsigned long name[BITS_TO_LONGS(bits)]
8
9#endif
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
index 0173abeef52c..b0fcb6d8a881 100644
--- a/tools/perf/util/levenshtein.h
+++ b/tools/perf/util/levenshtein.h
@@ -1,8 +1,8 @@
1#ifndef LEVENSHTEIN_H 1#ifndef __PERF_LEVENSHTEIN_H
2#define LEVENSHTEIN_H 2#define __PERF_LEVENSHTEIN_H
3 3
4int levenshtein(const char *string1, const char *string2, 4int levenshtein(const char *string1, const char *string2,
5 int swap_penalty, int substition_penalty, 5 int swap_penalty, int substition_penalty,
6 int insertion_penalty, int deletion_penalty); 6 int insertion_penalty, int deletion_penalty);
7 7
8#endif 8#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 804e02382739..69f94fe9db20 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -3,6 +3,7 @@
3#include <stdlib.h> 3#include <stdlib.h>
4#include <string.h> 4#include <string.h>
5#include <stdio.h> 5#include <stdio.h>
6#include "debug.h"
6 7
7static inline int is_anon_memory(const char *filename) 8static inline int is_anon_memory(const char *filename)
8{ 9{
@@ -19,13 +20,28 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen)
19 return n; 20 return n;
20} 21}
21 22
22 struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) 23void map__init(struct map *self, enum map_type type,
24 u64 start, u64 end, u64 pgoff, struct dso *dso)
25{
26 self->type = type;
27 self->start = start;
28 self->end = end;
29 self->pgoff = pgoff;
30 self->dso = dso;
31 self->map_ip = map__map_ip;
32 self->unmap_ip = map__unmap_ip;
33 RB_CLEAR_NODE(&self->rb_node);
34}
35
36struct map *map__new(struct mmap_event *event, enum map_type type,
37 char *cwd, int cwdlen)
23{ 38{
24 struct map *self = malloc(sizeof(*self)); 39 struct map *self = malloc(sizeof(*self));
25 40
26 if (self != NULL) { 41 if (self != NULL) {
27 const char *filename = event->filename; 42 const char *filename = event->filename;
28 char newfilename[PATH_MAX]; 43 char newfilename[PATH_MAX];
44 struct dso *dso;
29 int anon; 45 int anon;
30 46
31 if (cwd) { 47 if (cwd) {
@@ -45,18 +61,15 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen)
45 filename = newfilename; 61 filename = newfilename;
46 } 62 }
47 63
48 self->start = event->start; 64 dso = dsos__findnew(filename);
49 self->end = event->start + event->len; 65 if (dso == NULL)
50 self->pgoff = event->pgoff;
51
52 self->dso = dsos__findnew(filename);
53 if (self->dso == NULL)
54 goto out_delete; 66 goto out_delete;
55 67
68 map__init(self, type, event->start, event->start + event->len,
69 event->pgoff, dso);
70
56 if (self->dso == vdso || anon) 71 if (self->dso == vdso || anon)
57 self->map_ip = vdso__map_ip; 72 self->map_ip = self->unmap_ip = identity__map_ip;
58 else
59 self->map_ip = map__map_ip;
60 } 73 }
61 return self; 74 return self;
62out_delete: 75out_delete:
@@ -64,6 +77,72 @@ out_delete:
64 return NULL; 77 return NULL;
65} 78}
66 79
80void map__delete(struct map *self)
81{
82 free(self);
83}
84
85void map__fixup_start(struct map *self)
86{
87 struct rb_root *symbols = &self->dso->symbols[self->type];
88 struct rb_node *nd = rb_first(symbols);
89 if (nd != NULL) {
90 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
91 self->start = sym->start;
92 }
93}
94
95void map__fixup_end(struct map *self)
96{
97 struct rb_root *symbols = &self->dso->symbols[self->type];
98 struct rb_node *nd = rb_last(symbols);
99 if (nd != NULL) {
100 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
101 self->end = sym->end;
102 }
103}
104
105#define DSO__DELETED "(deleted)"
106
107struct symbol *map__find_symbol(struct map *self, u64 addr,
108 symbol_filter_t filter)
109{
110 if (!dso__loaded(self->dso, self->type)) {
111 int nr = dso__load(self->dso, self, filter);
112
113 if (nr < 0) {
114 if (self->dso->has_build_id) {
115 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
116
117 build_id__sprintf(self->dso->build_id,
118 sizeof(self->dso->build_id),
119 sbuild_id);
120 pr_warning("%s with build id %s not found",
121 self->dso->long_name, sbuild_id);
122 } else
123 pr_warning("Failed to open %s",
124 self->dso->long_name);
125 pr_warning(", continuing without symbols\n");
126 return NULL;
127 } else if (nr == 0) {
128 const char *name = self->dso->long_name;
129 const size_t len = strlen(name);
130 const size_t real_len = len - sizeof(DSO__DELETED);
131
132 if (len > sizeof(DSO__DELETED) &&
133 strcmp(name + real_len + 1, DSO__DELETED) == 0) {
134 pr_warning("%.*s was updated, restart the long running apps that use it!\n",
135 (int)real_len, name);
136 } else {
137 pr_warning("no symbols found in %s, maybe install a debug package?\n", name);
138 }
139 return NULL;
140 }
141 }
142
143 return self->dso->find_symbol(self->dso, self->type, addr);
144}
145
67struct map *map__clone(struct map *self) 146struct map *map__clone(struct map *self)
68{ 147{
69 struct map *map = malloc(sizeof(*self)); 148 struct map *map = malloc(sizeof(*self));
diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c
deleted file mode 100644
index 0d8c85defcd2..000000000000
--- a/tools/perf/util/module.c
+++ /dev/null
@@ -1,545 +0,0 @@
1#include "util.h"
2#include "../perf.h"
3#include "string.h"
4#include "module.h"
5
6#include <libelf.h>
7#include <libgen.h>
8#include <gelf.h>
9#include <elf.h>
10#include <dirent.h>
11#include <sys/utsname.h>
12
13static unsigned int crc32(const char *p, unsigned int len)
14{
15 int i;
16 unsigned int crc = 0;
17
18 while (len--) {
19 crc ^= *p++;
20 for (i = 0; i < 8; i++)
21 crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
22 }
23 return crc;
24}
25
26/* module section methods */
27
28struct sec_dso *sec_dso__new_dso(const char *name)
29{
30 struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1);
31
32 if (self != NULL) {
33 strcpy(self->name, name);
34 self->secs = RB_ROOT;
35 self->find_section = sec_dso__find_section;
36 }
37
38 return self;
39}
40
41static void sec_dso__delete_section(struct section *self)
42{
43 free(((void *)self));
44}
45
46void sec_dso__delete_sections(struct sec_dso *self)
47{
48 struct section *pos;
49 struct rb_node *next = rb_first(&self->secs);
50
51 while (next) {
52 pos = rb_entry(next, struct section, rb_node);
53 next = rb_next(&pos->rb_node);
54 rb_erase(&pos->rb_node, &self->secs);
55 sec_dso__delete_section(pos);
56 }
57}
58
59void sec_dso__delete_self(struct sec_dso *self)
60{
61 sec_dso__delete_sections(self);
62 free(self);
63}
64
65static void sec_dso__insert_section(struct sec_dso *self, struct section *sec)
66{
67 struct rb_node **p = &self->secs.rb_node;
68 struct rb_node *parent = NULL;
69 const u64 hash = sec->hash;
70 struct section *s;
71
72 while (*p != NULL) {
73 parent = *p;
74 s = rb_entry(parent, struct section, rb_node);
75 if (hash < s->hash)
76 p = &(*p)->rb_left;
77 else
78 p = &(*p)->rb_right;
79 }
80 rb_link_node(&sec->rb_node, parent, p);
81 rb_insert_color(&sec->rb_node, &self->secs);
82}
83
84struct section *sec_dso__find_section(struct sec_dso *self, const char *name)
85{
86 struct rb_node *n;
87 u64 hash;
88 int len;
89
90 if (self == NULL)
91 return NULL;
92
93 len = strlen(name);
94 hash = crc32(name, len);
95
96 n = self->secs.rb_node;
97
98 while (n) {
99 struct section *s = rb_entry(n, struct section, rb_node);
100
101 if (hash < s->hash)
102 n = n->rb_left;
103 else if (hash > s->hash)
104 n = n->rb_right;
105 else {
106 if (!strcmp(name, s->name))
107 return s;
108 else
109 n = rb_next(&s->rb_node);
110 }
111 }
112
113 return NULL;
114}
115
116static size_t sec_dso__fprintf_section(struct section *self, FILE *fp)
117{
118 return fprintf(fp, "name:%s vma:%llx path:%s\n",
119 self->name, self->vma, self->path);
120}
121
122size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp)
123{
124 size_t ret = fprintf(fp, "dso: %s\n", self->name);
125
126 struct rb_node *nd;
127 for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) {
128 struct section *pos = rb_entry(nd, struct section, rb_node);
129 ret += sec_dso__fprintf_section(pos, fp);
130 }
131
132 return ret;
133}
134
135static struct section *section__new(const char *name, const char *path)
136{
137 struct section *self = calloc(1, sizeof(*self));
138
139 if (!self)
140 goto out_failure;
141
142 self->name = calloc(1, strlen(name) + 1);
143 if (!self->name)
144 goto out_failure;
145
146 self->path = calloc(1, strlen(path) + 1);
147 if (!self->path)
148 goto out_failure;
149
150 strcpy(self->name, name);
151 strcpy(self->path, path);
152 self->hash = crc32(self->name, strlen(name));
153
154 return self;
155
156out_failure:
157 if (self) {
158 if (self->name)
159 free(self->name);
160 if (self->path)
161 free(self->path);
162 free(self);
163 }
164
165 return NULL;
166}
167
168/* module methods */
169
170struct mod_dso *mod_dso__new_dso(const char *name)
171{
172 struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1);
173
174 if (self != NULL) {
175 strcpy(self->name, name);
176 self->mods = RB_ROOT;
177 self->find_module = mod_dso__find_module;
178 }
179
180 return self;
181}
182
183static void mod_dso__delete_module(struct module *self)
184{
185 free(((void *)self));
186}
187
188void mod_dso__delete_modules(struct mod_dso *self)
189{
190 struct module *pos;
191 struct rb_node *next = rb_first(&self->mods);
192
193 while (next) {
194 pos = rb_entry(next, struct module, rb_node);
195 next = rb_next(&pos->rb_node);
196 rb_erase(&pos->rb_node, &self->mods);
197 mod_dso__delete_module(pos);
198 }
199}
200
201void mod_dso__delete_self(struct mod_dso *self)
202{
203 mod_dso__delete_modules(self);
204 free(self);
205}
206
207static void mod_dso__insert_module(struct mod_dso *self, struct module *mod)
208{
209 struct rb_node **p = &self->mods.rb_node;
210 struct rb_node *parent = NULL;
211 const u64 hash = mod->hash;
212 struct module *m;
213
214 while (*p != NULL) {
215 parent = *p;
216 m = rb_entry(parent, struct module, rb_node);
217 if (hash < m->hash)
218 p = &(*p)->rb_left;
219 else
220 p = &(*p)->rb_right;
221 }
222 rb_link_node(&mod->rb_node, parent, p);
223 rb_insert_color(&mod->rb_node, &self->mods);
224}
225
226struct module *mod_dso__find_module(struct mod_dso *self, const char *name)
227{
228 struct rb_node *n;
229 u64 hash;
230 int len;
231
232 if (self == NULL)
233 return NULL;
234
235 len = strlen(name);
236 hash = crc32(name, len);
237
238 n = self->mods.rb_node;
239
240 while (n) {
241 struct module *m = rb_entry(n, struct module, rb_node);
242
243 if (hash < m->hash)
244 n = n->rb_left;
245 else if (hash > m->hash)
246 n = n->rb_right;
247 else {
248 if (!strcmp(name, m->name))
249 return m;
250 else
251 n = rb_next(&m->rb_node);
252 }
253 }
254
255 return NULL;
256}
257
258static size_t mod_dso__fprintf_module(struct module *self, FILE *fp)
259{
260 return fprintf(fp, "name:%s path:%s\n", self->name, self->path);
261}
262
263size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp)
264{
265 struct rb_node *nd;
266 size_t ret;
267
268 ret = fprintf(fp, "dso: %s\n", self->name);
269
270 for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) {
271 struct module *pos = rb_entry(nd, struct module, rb_node);
272
273 ret += mod_dso__fprintf_module(pos, fp);
274 }
275
276 return ret;
277}
278
279static struct module *module__new(const char *name, const char *path)
280{
281 struct module *self = calloc(1, sizeof(*self));
282
283 if (!self)
284 goto out_failure;
285
286 self->name = calloc(1, strlen(name) + 1);
287 if (!self->name)
288 goto out_failure;
289
290 self->path = calloc(1, strlen(path) + 1);
291 if (!self->path)
292 goto out_failure;
293
294 strcpy(self->name, name);
295 strcpy(self->path, path);
296 self->hash = crc32(self->name, strlen(name));
297
298 return self;
299
300out_failure:
301 if (self) {
302 if (self->name)
303 free(self->name);
304 if (self->path)
305 free(self->path);
306 free(self);
307 }
308
309 return NULL;
310}
311
312static int mod_dso__load_sections(struct module *mod)
313{
314 int count = 0, path_len;
315 struct dirent *entry;
316 char *line = NULL;
317 char *dir_path;
318 DIR *dir;
319 size_t n;
320
321 path_len = strlen("/sys/module/");
322 path_len += strlen(mod->name);
323 path_len += strlen("/sections/");
324
325 dir_path = calloc(1, path_len + 1);
326 if (dir_path == NULL)
327 goto out_failure;
328
329 strcat(dir_path, "/sys/module/");
330 strcat(dir_path, mod->name);
331 strcat(dir_path, "/sections/");
332
333 dir = opendir(dir_path);
334 if (dir == NULL)
335 goto out_free;
336
337 while ((entry = readdir(dir))) {
338 struct section *section;
339 char *path, *vma;
340 int line_len;
341 FILE *file;
342
343 if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name))
344 continue;
345
346 path = calloc(1, path_len + strlen(entry->d_name) + 1);
347 if (path == NULL)
348 break;
349 strcat(path, dir_path);
350 strcat(path, entry->d_name);
351
352 file = fopen(path, "r");
353 if (file == NULL) {
354 free(path);
355 break;
356 }
357
358 line_len = getline(&line, &n, file);
359 if (line_len < 0) {
360 free(path);
361 fclose(file);
362 break;
363 }
364
365 if (!line) {
366 free(path);
367 fclose(file);
368 break;
369 }
370
371 line[--line_len] = '\0'; /* \n */
372
373 vma = strstr(line, "0x");
374 if (!vma) {
375 free(path);
376 fclose(file);
377 break;
378 }
379 vma += 2;
380
381 section = section__new(entry->d_name, path);
382 if (!section) {
383 fprintf(stderr, "load_sections: allocation error\n");
384 free(path);
385 fclose(file);
386 break;
387 }
388
389 hex2u64(vma, &section->vma);
390 sec_dso__insert_section(mod->sections, section);
391
392 free(path);
393 fclose(file);
394 count++;
395 }
396
397 closedir(dir);
398 free(line);
399 free(dir_path);
400
401 return count;
402
403out_free:
404 free(dir_path);
405
406out_failure:
407 return count;
408}
409
410static int mod_dso__load_module_paths(struct mod_dso *self)
411{
412 struct utsname uts;
413 int count = 0, len, err = -1;
414 char *line = NULL;
415 FILE *file;
416 char *dpath, *dir;
417 size_t n;
418
419 if (uname(&uts) < 0)
420 return err;
421
422 len = strlen("/lib/modules/");
423 len += strlen(uts.release);
424 len += strlen("/modules.dep");
425
426 dpath = calloc(1, len + 1);
427 if (dpath == NULL)
428 return err;
429
430 strcat(dpath, "/lib/modules/");
431 strcat(dpath, uts.release);
432 strcat(dpath, "/modules.dep");
433
434 file = fopen(dpath, "r");
435 if (file == NULL)
436 goto out_failure;
437
438 dir = dirname(dpath);
439 if (!dir)
440 goto out_failure;
441 strcat(dir, "/");
442
443 while (!feof(file)) {
444 struct module *module;
445 char *name, *path, *tmp;
446 FILE *modfile;
447 int line_len;
448
449 line_len = getline(&line, &n, file);
450 if (line_len < 0)
451 break;
452
453 if (!line)
454 break;
455
456 line[--line_len] = '\0'; /* \n */
457
458 path = strchr(line, ':');
459 if (!path)
460 break;
461 *path = '\0';
462
463 path = strdup(line);
464 if (!path)
465 break;
466
467 if (!strstr(path, dir)) {
468 if (strncmp(path, "kernel/", 7))
469 break;
470
471 free(path);
472 path = calloc(1, strlen(dir) + strlen(line) + 1);
473 if (!path)
474 break;
475 strcat(path, dir);
476 strcat(path, line);
477 }
478
479 modfile = fopen(path, "r");
480 if (modfile == NULL)
481 break;
482 fclose(modfile);
483
484 name = strdup(path);
485 if (!name)
486 break;
487
488 name = strtok(name, "/");
489 tmp = name;
490
491 while (tmp) {
492 tmp = strtok(NULL, "/");
493 if (tmp)
494 name = tmp;
495 }
496
497 name = strsep(&name, ".");
498 if (!name)
499 break;
500
501 /* Quirk: replace '-' with '_' in all modules */
502 for (len = strlen(name); len; len--) {
503 if (*(name+len) == '-')
504 *(name+len) = '_';
505 }
506
507 module = module__new(name, path);
508 if (!module)
509 break;
510 mod_dso__insert_module(self, module);
511
512 module->sections = sec_dso__new_dso("sections");
513 if (!module->sections)
514 break;
515
516 module->active = mod_dso__load_sections(module);
517
518 if (module->active > 0)
519 count++;
520 }
521
522 if (feof(file))
523 err = count;
524 else
525 fprintf(stderr, "load_module_paths: modules.dep parsing failure!\n");
526
527out_failure:
528 if (dpath)
529 free(dpath);
530 if (file)
531 fclose(file);
532 if (line)
533 free(line);
534
535 return err;
536}
537
538int mod_dso__load_modules(struct mod_dso *dso)
539{
540 int err;
541
542 err = mod_dso__load_module_paths(dso);
543
544 return err;
545}
diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h
deleted file mode 100644
index 8a592ef641ca..000000000000
--- a/tools/perf/util/module.h
+++ /dev/null
@@ -1,53 +0,0 @@
1#ifndef _PERF_MODULE_
2#define _PERF_MODULE_ 1
3
4#include <linux/types.h>
5#include "../types.h"
6#include <linux/list.h>
7#include <linux/rbtree.h>
8
9struct section {
10 struct rb_node rb_node;
11 u64 hash;
12 u64 vma;
13 char *name;
14 char *path;
15};
16
17struct sec_dso {
18 struct list_head node;
19 struct rb_root secs;
20 struct section *(*find_section)(struct sec_dso *, const char *name);
21 char name[0];
22};
23
24struct module {
25 struct rb_node rb_node;
26 u64 hash;
27 char *name;
28 char *path;
29 struct sec_dso *sections;
30 int active;
31};
32
33struct mod_dso {
34 struct list_head node;
35 struct rb_root mods;
36 struct module *(*find_module)(struct mod_dso *, const char *name);
37 char name[0];
38};
39
40struct sec_dso *sec_dso__new_dso(const char *name);
41void sec_dso__delete_sections(struct sec_dso *self);
42void sec_dso__delete_self(struct sec_dso *self);
43size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp);
44struct section *sec_dso__find_section(struct sec_dso *self, const char *name);
45
46struct mod_dso *mod_dso__new_dso(const char *name);
47void mod_dso__delete_modules(struct mod_dso *self);
48void mod_dso__delete_self(struct mod_dso *self);
49size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp);
50struct module *mod_dso__find_module(struct mod_dso *self, const char *name);
51int mod_dso__load_modules(struct mod_dso *dso);
52
53#endif /* _PERF_MODULE_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8cfb48cbbea0..9e5dbd66d34d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,4 +1,4 @@
1 1#include "../../../include/linux/hw_breakpoint.h"
2#include "util.h" 2#include "util.h"
3#include "../perf.h" 3#include "../perf.h"
4#include "parse-options.h" 4#include "parse-options.h"
@@ -7,10 +7,12 @@
7#include "string.h" 7#include "string.h"
8#include "cache.h" 8#include "cache.h"
9#include "header.h" 9#include "header.h"
10#include "debugfs.h"
10 11
11int nr_counters; 12int nr_counters;
12 13
13struct perf_event_attr attrs[MAX_COUNTERS]; 14struct perf_event_attr attrs[MAX_COUNTERS];
15char *filters[MAX_COUNTERS];
14 16
15struct event_symbol { 17struct event_symbol {
16 u8 type; 18 u8 type;
@@ -46,6 +48,8 @@ static struct event_symbol event_symbols[] = {
46 { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, 48 { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
47 { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, 49 { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
48 { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, 50 { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
51 { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
52 { CSW(EMULATION_FAULTS), "emulation-faults", "" },
49}; 53};
50 54
51#define __PERF_EVENT_FIELD(config, name) \ 55#define __PERF_EVENT_FIELD(config, name) \
@@ -74,6 +78,8 @@ static const char *sw_event_names[] = {
74 "CPU-migrations", 78 "CPU-migrations",
75 "minor-faults", 79 "minor-faults",
76 "major-faults", 80 "major-faults",
81 "alignment-faults",
82 "emulation-faults",
77}; 83};
78 84
79#define MAX_ALIASES 8 85#define MAX_ALIASES 8
@@ -148,16 +154,6 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
148 154
149#define MAX_EVENT_LENGTH 512 155#define MAX_EVENT_LENGTH 512
150 156
151int valid_debugfs_mount(const char *debugfs)
152{
153 struct statfs st_fs;
154
155 if (statfs(debugfs, &st_fs) < 0)
156 return -ENOENT;
157 else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
158 return -ENOENT;
159 return 0;
160}
161 157
162struct tracepoint_path *tracepoint_id_to_path(u64 config) 158struct tracepoint_path *tracepoint_id_to_path(u64 config)
163{ 159{
@@ -170,7 +166,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
170 char evt_path[MAXPATHLEN]; 166 char evt_path[MAXPATHLEN];
171 char dir_path[MAXPATHLEN]; 167 char dir_path[MAXPATHLEN];
172 168
173 if (valid_debugfs_mount(debugfs_path)) 169 if (debugfs_valid_mountpoint(debugfs_path))
174 return NULL; 170 return NULL;
175 171
176 sys_dir = opendir(debugfs_path); 172 sys_dir = opendir(debugfs_path);
@@ -201,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
201 if (id == config) { 197 if (id == config) {
202 closedir(evt_dir); 198 closedir(evt_dir);
203 closedir(sys_dir); 199 closedir(sys_dir);
204 path = calloc(1, sizeof(path)); 200 path = zalloc(sizeof(path));
205 path->system = malloc(MAX_EVENT_LENGTH); 201 path->system = malloc(MAX_EVENT_LENGTH);
206 if (!path->system) { 202 if (!path->system) {
207 free(path); 203 free(path);
@@ -509,7 +505,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
509 char sys_name[MAX_EVENT_LENGTH]; 505 char sys_name[MAX_EVENT_LENGTH];
510 unsigned int sys_length, evt_length; 506 unsigned int sys_length, evt_length;
511 507
512 if (valid_debugfs_mount(debugfs_path)) 508 if (debugfs_valid_mountpoint(debugfs_path))
513 return 0; 509 return 0;
514 510
515 evt_name = strchr(*strp, ':'); 511 evt_name = strchr(*strp, ':');
@@ -544,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp,
544 attr, strp); 540 attr, strp);
545} 541}
546 542
543static enum event_result
544parse_breakpoint_type(const char *type, const char **strp,
545 struct perf_event_attr *attr)
546{
547 int i;
548
549 for (i = 0; i < 3; i++) {
550 if (!type[i])
551 break;
552
553 switch (type[i]) {
554 case 'r':
555 attr->bp_type |= HW_BREAKPOINT_R;
556 break;
557 case 'w':
558 attr->bp_type |= HW_BREAKPOINT_W;
559 break;
560 case 'x':
561 attr->bp_type |= HW_BREAKPOINT_X;
562 break;
563 default:
564 return EVT_FAILED;
565 }
566 }
567 if (!attr->bp_type) /* Default */
568 attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
569
570 *strp = type + i;
571
572 return EVT_HANDLED;
573}
574
575static enum event_result
576parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
577{
578 const char *target;
579 const char *type;
580 char *endaddr;
581 u64 addr;
582 enum event_result err;
583
584 target = strchr(*strp, ':');
585 if (!target)
586 return EVT_FAILED;
587
588 if (strncmp(*strp, "mem", target - *strp) != 0)
589 return EVT_FAILED;
590
591 target++;
592
593 addr = strtoull(target, &endaddr, 0);
594 if (target == endaddr)
595 return EVT_FAILED;
596
597 attr->bp_addr = addr;
598 *strp = endaddr;
599
600 type = strchr(target, ':');
601
602 /* If no type is defined, just rw as default */
603 if (!type) {
604 attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
605 } else {
606 err = parse_breakpoint_type(++type, strp, attr);
607 if (err == EVT_FAILED)
608 return EVT_FAILED;
609 }
610
611 /* We should find a nice way to override the access type */
612 attr->bp_len = HW_BREAKPOINT_LEN_4;
613 attr->type = PERF_TYPE_BREAKPOINT;
614
615 return EVT_HANDLED;
616}
617
547static int check_events(const char *str, unsigned int i) 618static int check_events(const char *str, unsigned int i)
548{ 619{
549 int n; 620 int n;
@@ -677,6 +748,12 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr)
677 if (ret != EVT_FAILED) 748 if (ret != EVT_FAILED)
678 goto modifier; 749 goto modifier;
679 750
751 ret = parse_breakpoint_event(str, attr);
752 if (ret != EVT_FAILED)
753 goto modifier;
754
755 fprintf(stderr, "invalid or unsupported event: '%s'\n", *str);
756 fprintf(stderr, "Run 'perf list' for a list of valid events\n");
680 return EVT_FAILED; 757 return EVT_FAILED;
681 758
682modifier: 759modifier:
@@ -708,7 +785,6 @@ static void store_event_type(const char *orgname)
708 perf_header__push_event(id, orgname); 785 perf_header__push_event(id, orgname);
709} 786}
710 787
711
712int parse_events(const struct option *opt __used, const char *str, int unset __used) 788int parse_events(const struct option *opt __used, const char *str, int unset __used)
713{ 789{
714 struct perf_event_attr attr; 790 struct perf_event_attr attr;
@@ -745,6 +821,28 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
745 return 0; 821 return 0;
746} 822}
747 823
824int parse_filter(const struct option *opt __used, const char *str,
825 int unset __used)
826{
827 int i = nr_counters - 1;
828 int len = strlen(str);
829
830 if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) {
831 fprintf(stderr,
832 "-F option should follow a -e tracepoint option\n");
833 return -1;
834 }
835
836 filters[i] = malloc(len + 1);
837 if (!filters[i]) {
838 fprintf(stderr, "not enough memory to hold filter string\n");
839 return -1;
840 }
841 strcpy(filters[i], str);
842
843 return 0;
844}
845
748static const char * const event_type_descriptors[] = { 846static const char * const event_type_descriptors[] = {
749 "", 847 "",
750 "Hardware event", 848 "Hardware event",
@@ -764,7 +862,7 @@ static void print_tracepoint_events(void)
764 char evt_path[MAXPATHLEN]; 862 char evt_path[MAXPATHLEN];
765 char dir_path[MAXPATHLEN]; 863 char dir_path[MAXPATHLEN];
766 864
767 if (valid_debugfs_mount(debugfs_path)) 865 if (debugfs_valid_mountpoint(debugfs_path))
768 return; 866 return;
769 867
770 sys_dir = opendir(debugfs_path); 868 sys_dir = opendir(debugfs_path);
@@ -782,7 +880,7 @@ static void print_tracepoint_events(void)
782 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { 880 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
783 snprintf(evt_path, MAXPATHLEN, "%s:%s", 881 snprintf(evt_path, MAXPATHLEN, "%s:%s",
784 sys_dirent.d_name, evt_dirent.d_name); 882 sys_dirent.d_name, evt_dirent.d_name);
785 fprintf(stderr, " %-42s [%s]\n", evt_path, 883 printf(" %-42s [%s]\n", evt_path,
786 event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); 884 event_type_descriptors[PERF_TYPE_TRACEPOINT+1]);
787 } 885 }
788 closedir(evt_dir); 886 closedir(evt_dir);
@@ -799,8 +897,8 @@ void print_events(void)
799 unsigned int i, type, op, prev_type = -1; 897 unsigned int i, type, op, prev_type = -1;
800 char name[40]; 898 char name[40];
801 899
802 fprintf(stderr, "\n"); 900 printf("\n");
803 fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); 901 printf("List of pre-defined events (to be used in -e):\n");
804 902
805 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { 903 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
806 type = syms->type + 1; 904 type = syms->type + 1;
@@ -808,19 +906,19 @@ void print_events(void)
808 type = 0; 906 type = 0;
809 907
810 if (type != prev_type) 908 if (type != prev_type)
811 fprintf(stderr, "\n"); 909 printf("\n");
812 910
813 if (strlen(syms->alias)) 911 if (strlen(syms->alias))
814 sprintf(name, "%s OR %s", syms->symbol, syms->alias); 912 sprintf(name, "%s OR %s", syms->symbol, syms->alias);
815 else 913 else
816 strcpy(name, syms->symbol); 914 strcpy(name, syms->symbol);
817 fprintf(stderr, " %-42s [%s]\n", name, 915 printf(" %-42s [%s]\n", name,
818 event_type_descriptors[type]); 916 event_type_descriptors[type]);
819 917
820 prev_type = type; 918 prev_type = type;
821 } 919 }
822 920
823 fprintf(stderr, "\n"); 921 printf("\n");
824 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { 922 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
825 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { 923 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
826 /* skip invalid cache type */ 924 /* skip invalid cache type */
@@ -828,17 +926,20 @@ void print_events(void)
828 continue; 926 continue;
829 927
830 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { 928 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
831 fprintf(stderr, " %-42s [%s]\n", 929 printf(" %-42s [%s]\n",
832 event_cache_name(type, op, i), 930 event_cache_name(type, op, i),
833 event_type_descriptors[4]); 931 event_type_descriptors[4]);
834 } 932 }
835 } 933 }
836 } 934 }
837 935
838 fprintf(stderr, "\n"); 936 printf("\n");
839 fprintf(stderr, " %-42s [raw hardware event descriptor]\n", 937 printf(" %-42s [raw hardware event descriptor]\n",
840 "rNNN"); 938 "rNNN");
841 fprintf(stderr, "\n"); 939 printf("\n");
940
941 printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
942 printf("\n");
842 943
843 print_tracepoint_events(); 944 print_tracepoint_events();
844 945
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 30c608112845..b8c1f64bc935 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -1,5 +1,5 @@
1#ifndef _PARSE_EVENTS_H 1#ifndef __PERF_PARSE_EVENTS_H
2#define _PARSE_EVENTS_H 2#define __PERF_PARSE_EVENTS_H
3/* 3/*
4 * Parse symbolic events/counts passed in as options: 4 * Parse symbolic events/counts passed in as options:
5 */ 5 */
@@ -17,11 +17,13 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
17extern int nr_counters; 17extern int nr_counters;
18 18
19extern struct perf_event_attr attrs[MAX_COUNTERS]; 19extern struct perf_event_attr attrs[MAX_COUNTERS];
20extern char *filters[MAX_COUNTERS];
20 21
21extern const char *event_name(int ctr); 22extern const char *event_name(int ctr);
22extern const char *__event_name(int type, u64 config); 23extern const char *__event_name(int type, u64 config);
23 24
24extern int parse_events(const struct option *opt, const char *str, int unset); 25extern int parse_events(const struct option *opt, const char *str, int unset);
26extern int parse_filter(const struct option *opt, const char *str, int unset);
25 27
26#define EVENTS_HELP_MAX (128*1024) 28#define EVENTS_HELP_MAX (128*1024)
27 29
@@ -31,4 +33,4 @@ extern char debugfs_path[];
31extern int valid_debugfs_mount(const char *debugfs); 33extern int valid_debugfs_mount(const char *debugfs);
32 34
33 35
34#endif /* _PARSE_EVENTS_H */ 36#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 2ee248ff27e5..948805af43c2 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -1,5 +1,5 @@
1#ifndef PARSE_OPTIONS_H 1#ifndef __PERF_PARSE_OPTIONS_H
2#define PARSE_OPTIONS_H 2#define __PERF_PARSE_OPTIONS_H
3 3
4enum parse_opt_type { 4enum parse_opt_type {
5 /* special types */ 5 /* special types */
@@ -174,4 +174,4 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
174 174
175extern const char *parse_options_fix_filename(const char *prefix, const char *file); 175extern const char *parse_options_fix_filename(const char *prefix, const char *file);
176 176
177#endif 177#endif /* __PERF_PARSE_OPTIONS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644
index 000000000000..cd7fbda5e2a5
--- /dev/null
+++ b/tools/perf/util/probe-event.c
@@ -0,0 +1,484 @@
1/*
2 * probe-event.c : perf-probe definition to kprobe_events format converter
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#define _GNU_SOURCE
23#include <sys/utsname.h>
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <fcntl.h>
27#include <errno.h>
28#include <stdio.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <string.h>
32#include <stdarg.h>
33#include <limits.h>
34
35#undef _GNU_SOURCE
36#include "event.h"
37#include "string.h"
38#include "strlist.h"
39#include "debug.h"
40#include "parse-events.h" /* For debugfs_path */
41#include "probe-event.h"
42
43#define MAX_CMDLEN 256
44#define MAX_PROBE_ARGS 128
45#define PERFPROBE_GROUP "probe"
46
47#define semantic_error(msg ...) die("Semantic error :" msg)
48
49/* If there is no space to write, returns -E2BIG. */
50static int e_snprintf(char *str, size_t size, const char *format, ...)
51{
52 int ret;
53 va_list ap;
54 va_start(ap, format);
55 ret = vsnprintf(str, size, format, ap);
56 va_end(ap);
57 if (ret >= (int)size)
58 ret = -E2BIG;
59 return ret;
60}
61
62/* Parse probepoint definition. */
63static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
64{
65 char *ptr, *tmp;
66 char c, nc = 0;
67 /*
68 * <Syntax>
69 * perf probe SRC:LN
70 * perf probe FUNC[+OFFS|%return][@SRC]
71 */
72
73 ptr = strpbrk(arg, ":+@%");
74 if (ptr) {
75 nc = *ptr;
76 *ptr++ = '\0';
77 }
78
79 /* Check arg is function or file and copy it */
80 if (strchr(arg, '.')) /* File */
81 pp->file = strdup(arg);
82 else /* Function */
83 pp->function = strdup(arg);
84 DIE_IF(pp->file == NULL && pp->function == NULL);
85
86 /* Parse other options */
87 while (ptr) {
88 arg = ptr;
89 c = nc;
90 ptr = strpbrk(arg, ":+@%");
91 if (ptr) {
92 nc = *ptr;
93 *ptr++ = '\0';
94 }
95 switch (c) {
96 case ':': /* Line number */
97 pp->line = strtoul(arg, &tmp, 0);
98 if (*tmp != '\0')
99 semantic_error("There is non-digit charactor"
100 " in line number.");
101 break;
102 case '+': /* Byte offset from a symbol */
103 pp->offset = strtoul(arg, &tmp, 0);
104 if (*tmp != '\0')
105 semantic_error("There is non-digit charactor"
106 " in offset.");
107 break;
108 case '@': /* File name */
109 if (pp->file)
110 semantic_error("SRC@SRC is not allowed.");
111 pp->file = strdup(arg);
112 DIE_IF(pp->file == NULL);
113 if (ptr)
114 semantic_error("@SRC must be the last "
115 "option.");
116 break;
117 case '%': /* Probe places */
118 if (strcmp(arg, "return") == 0) {
119 pp->retprobe = 1;
120 } else /* Others not supported yet */
121 semantic_error("%%%s is not supported.", arg);
122 break;
123 default:
124 DIE_IF("Program has a bug.");
125 break;
126 }
127 }
128
129 /* Exclusion check */
130 if (pp->line && pp->offset)
131 semantic_error("Offset can't be used with line number.");
132
133 if (!pp->line && pp->file && !pp->function)
134 semantic_error("File always requires line number.");
135
136 if (pp->offset && !pp->function)
137 semantic_error("Offset requires an entry function.");
138
139 if (pp->retprobe && !pp->function)
140 semantic_error("Return probe requires an entry function.");
141
142 if ((pp->offset || pp->line) && pp->retprobe)
143 semantic_error("Offset/Line can't be used with return probe.");
144
145 pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
146 pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
147}
148
149/* Parse perf-probe event definition */
150int parse_perf_probe_event(const char *str, struct probe_point *pp)
151{
152 char **argv;
153 int argc, i, need_dwarf = 0;
154
155 argv = argv_split(str, &argc);
156 if (!argv)
157 die("argv_split failed.");
158 if (argc > MAX_PROBE_ARGS + 1)
159 semantic_error("Too many arguments");
160
161 /* Parse probe point */
162 parse_perf_probe_probepoint(argv[0], pp);
163 if (pp->file || pp->line)
164 need_dwarf = 1;
165
166 /* Copy arguments and ensure return probe has no C argument */
167 pp->nr_args = argc - 1;
168 pp->args = zalloc(sizeof(char *) * pp->nr_args);
169 for (i = 0; i < pp->nr_args; i++) {
170 pp->args[i] = strdup(argv[i + 1]);
171 if (!pp->args[i])
172 die("Failed to copy argument.");
173 if (is_c_varname(pp->args[i])) {
174 if (pp->retprobe)
175 semantic_error("You can't specify local"
176 " variable for kretprobe");
177 need_dwarf = 1;
178 }
179 }
180
181 argv_free(argv);
182 return need_dwarf;
183}
184
185/* Parse kprobe_events event into struct probe_point */
186void parse_trace_kprobe_event(const char *str, char **group, char **event,
187 struct probe_point *pp)
188{
189 char pr;
190 char *p;
191 int ret, i, argc;
192 char **argv;
193
194 pr_debug("Parsing kprobe_events: %s\n", str);
195 argv = argv_split(str, &argc);
196 if (!argv)
197 die("argv_split failed.");
198 if (argc < 2)
199 semantic_error("Too less arguments.");
200
201 /* Scan event and group name. */
202 ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]",
203 &pr, (float *)(void *)group, (float *)(void *)event);
204 if (ret != 3)
205 semantic_error("Failed to parse event name: %s", argv[0]);
206 pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr);
207
208 if (!pp)
209 goto end;
210
211 pp->retprobe = (pr == 'r');
212
213 /* Scan function name and offset */
214 ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, &pp->offset);
215 if (ret == 1)
216 pp->offset = 0;
217
218 /* kprobe_events doesn't have this information */
219 pp->line = 0;
220 pp->file = NULL;
221
222 pp->nr_args = argc - 2;
223 pp->args = zalloc(sizeof(char *) * pp->nr_args);
224 for (i = 0; i < pp->nr_args; i++) {
225 p = strchr(argv[i + 2], '=');
226 if (p) /* We don't need which register is assigned. */
227 *p = '\0';
228 pp->args[i] = strdup(argv[i + 2]);
229 if (!pp->args[i])
230 die("Failed to copy argument.");
231 }
232
233end:
234 argv_free(argv);
235}
236
237int synthesize_perf_probe_event(struct probe_point *pp)
238{
239 char *buf;
240 char offs[64] = "", line[64] = "";
241 int i, len, ret;
242
243 pp->probes[0] = buf = zalloc(MAX_CMDLEN);
244 if (!buf)
245 die("Failed to allocate memory by zalloc.");
246 if (pp->offset) {
247 ret = e_snprintf(offs, 64, "+%d", pp->offset);
248 if (ret <= 0)
249 goto error;
250 }
251 if (pp->line) {
252 ret = e_snprintf(line, 64, ":%d", pp->line);
253 if (ret <= 0)
254 goto error;
255 }
256
257 if (pp->function)
258 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
259 offs, pp->retprobe ? "%return" : "", line);
260 else
261 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line);
262 if (ret <= 0)
263 goto error;
264 len = ret;
265
266 for (i = 0; i < pp->nr_args; i++) {
267 ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
268 pp->args[i]);
269 if (ret <= 0)
270 goto error;
271 len += ret;
272 }
273 pp->found = 1;
274
275 return pp->found;
276error:
277 free(pp->probes[0]);
278
279 return ret;
280}
281
282int synthesize_trace_kprobe_event(struct probe_point *pp)
283{
284 char *buf;
285 int i, len, ret;
286
287 pp->probes[0] = buf = zalloc(MAX_CMDLEN);
288 if (!buf)
289 die("Failed to allocate memory by zalloc.");
290 ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset);
291 if (ret <= 0)
292 goto error;
293 len = ret;
294
295 for (i = 0; i < pp->nr_args; i++) {
296 ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
297 pp->args[i]);
298 if (ret <= 0)
299 goto error;
300 len += ret;
301 }
302 pp->found = 1;
303
304 return pp->found;
305error:
306 free(pp->probes[0]);
307
308 return ret;
309}
310
311static int open_kprobe_events(int flags, int mode)
312{
313 char buf[PATH_MAX];
314 int ret;
315
316 ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path);
317 if (ret < 0)
318 die("Failed to make kprobe_events path.");
319
320 ret = open(buf, flags, mode);
321 if (ret < 0) {
322 if (errno == ENOENT)
323 die("kprobe_events file does not exist -"
324 " please rebuild with CONFIG_KPROBE_TRACER.");
325 else
326 die("Could not open kprobe_events file: %s",
327 strerror(errno));
328 }
329 return ret;
330}
331
332/* Get raw string list of current kprobe_events */
333static struct strlist *get_trace_kprobe_event_rawlist(int fd)
334{
335 int ret, idx;
336 FILE *fp;
337 char buf[MAX_CMDLEN];
338 char *p;
339 struct strlist *sl;
340
341 sl = strlist__new(true, NULL);
342
343 fp = fdopen(dup(fd), "r");
344 while (!feof(fp)) {
345 p = fgets(buf, MAX_CMDLEN, fp);
346 if (!p)
347 break;
348
349 idx = strlen(p) - 1;
350 if (p[idx] == '\n')
351 p[idx] = '\0';
352 ret = strlist__add(sl, buf);
353 if (ret < 0)
354 die("strlist__add failed: %s", strerror(-ret));
355 }
356 fclose(fp);
357
358 return sl;
359}
360
361/* Free and zero clear probe_point */
362static void clear_probe_point(struct probe_point *pp)
363{
364 int i;
365
366 if (pp->function)
367 free(pp->function);
368 if (pp->file)
369 free(pp->file);
370 for (i = 0; i < pp->nr_args; i++)
371 free(pp->args[i]);
372 if (pp->args)
373 free(pp->args);
374 for (i = 0; i < pp->found; i++)
375 free(pp->probes[i]);
376 memset(pp, 0, sizeof(pp));
377}
378
379/* List up current perf-probe events */
380void show_perf_probe_events(void)
381{
382 unsigned int i;
383 int fd;
384 char *group, *event;
385 struct probe_point pp;
386 struct strlist *rawlist;
387 struct str_node *ent;
388
389 fd = open_kprobe_events(O_RDONLY, 0);
390 rawlist = get_trace_kprobe_event_rawlist(fd);
391 close(fd);
392
393 for (i = 0; i < strlist__nr_entries(rawlist); i++) {
394 ent = strlist__entry(rawlist, i);
395 parse_trace_kprobe_event(ent->s, &group, &event, &pp);
396 synthesize_perf_probe_event(&pp);
397 printf("[%s:%s]\t%s\n", group, event, pp.probes[0]);
398 free(group);
399 free(event);
400 clear_probe_point(&pp);
401 }
402
403 strlist__delete(rawlist);
404}
405
406/* Get current perf-probe event names */
407static struct strlist *get_perf_event_names(int fd)
408{
409 unsigned int i;
410 char *group, *event;
411 struct strlist *sl, *rawlist;
412 struct str_node *ent;
413
414 rawlist = get_trace_kprobe_event_rawlist(fd);
415
416 sl = strlist__new(false, NULL);
417 for (i = 0; i < strlist__nr_entries(rawlist); i++) {
418 ent = strlist__entry(rawlist, i);
419 parse_trace_kprobe_event(ent->s, &group, &event, NULL);
420 strlist__add(sl, event);
421 free(group);
422 }
423
424 strlist__delete(rawlist);
425
426 return sl;
427}
428
429static int write_trace_kprobe_event(int fd, const char *buf)
430{
431 int ret;
432
433 ret = write(fd, buf, strlen(buf));
434 if (ret <= 0)
435 die("Failed to create event.");
436 else
437 printf("Added new event: %s\n", buf);
438
439 return ret;
440}
441
442static void get_new_event_name(char *buf, size_t len, const char *base,
443 struct strlist *namelist)
444{
445 int i, ret;
446 for (i = 0; i < MAX_EVENT_INDEX; i++) {
447 ret = e_snprintf(buf, len, "%s_%d", base, i);
448 if (ret < 0)
449 die("snprintf() failed: %s", strerror(-ret));
450 if (!strlist__has_entry(namelist, buf))
451 break;
452 }
453 if (i == MAX_EVENT_INDEX)
454 die("Too many events are on the same function.");
455}
456
457void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
458{
459 int i, j, fd;
460 struct probe_point *pp;
461 char buf[MAX_CMDLEN];
462 char event[64];
463 struct strlist *namelist;
464
465 fd = open_kprobe_events(O_RDWR, O_APPEND);
466 /* Get current event names */
467 namelist = get_perf_event_names(fd);
468
469 for (j = 0; j < nr_probes; j++) {
470 pp = probes + j;
471 for (i = 0; i < pp->found; i++) {
472 /* Get an unused new event name */
473 get_new_event_name(event, 64, pp->function, namelist);
474 snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n",
475 pp->retprobe ? 'r' : 'p',
476 PERFPROBE_GROUP, event,
477 pp->probes[i]);
478 write_trace_kprobe_event(fd, buf);
479 /* Add added event name to namelist */
480 strlist__add(namelist, event);
481 }
482 }
483 close(fd);
484}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644
index 000000000000..0c6fe56fe38a
--- /dev/null
+++ b/tools/perf/util/probe-event.h
@@ -0,0 +1,18 @@
1#ifndef _PROBE_EVENT_H
2#define _PROBE_EVENT_H
3
4#include "probe-finder.h"
5#include "strlist.h"
6
7extern int parse_perf_probe_event(const char *str, struct probe_point *pp);
8extern int synthesize_perf_probe_event(struct probe_point *pp);
9extern void parse_trace_kprobe_event(const char *str, char **group,
10 char **event, struct probe_point *pp);
11extern int synthesize_trace_kprobe_event(struct probe_point *pp);
12extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
13extern void show_perf_probe_events(void);
14
15/* Maximum index number of event-name postfix */
16#define MAX_EVENT_INDEX 1024
17
18#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 000000000000..293cdfc1b8ca
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,732 @@
1/*
2 * probe-finder.c : C expression to kprobe event converter
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#include <sys/utsname.h>
23#include <sys/types.h>
24#include <sys/stat.h>
25#include <fcntl.h>
26#include <errno.h>
27#include <stdio.h>
28#include <unistd.h>
29#include <getopt.h>
30#include <stdlib.h>
31#include <string.h>
32#include <stdarg.h>
33#include <ctype.h>
34
35#include "event.h"
36#include "debug.h"
37#include "util.h"
38#include "probe-finder.h"
39
40
41/* Dwarf_Die Linkage to parent Die */
42struct die_link {
43 struct die_link *parent; /* Parent die */
44 Dwarf_Die die; /* Current die */
45};
46
47static Dwarf_Debug __dw_debug;
48static Dwarf_Error __dw_error;
49
50/*
51 * Generic dwarf analysis helpers
52 */
53
54#define X86_32_MAX_REGS 8
55const char *x86_32_regs_table[X86_32_MAX_REGS] = {
56 "%ax",
57 "%cx",
58 "%dx",
59 "%bx",
60 "$stack", /* Stack address instead of %sp */
61 "%bp",
62 "%si",
63 "%di",
64};
65
66#define X86_64_MAX_REGS 16
67const char *x86_64_regs_table[X86_64_MAX_REGS] = {
68 "%ax",
69 "%dx",
70 "%cx",
71 "%bx",
72 "%si",
73 "%di",
74 "%bp",
75 "%sp",
76 "%r8",
77 "%r9",
78 "%r10",
79 "%r11",
80 "%r12",
81 "%r13",
82 "%r14",
83 "%r15",
84};
85
86/* TODO: switching by dwarf address size */
87#ifdef __x86_64__
88#define ARCH_MAX_REGS X86_64_MAX_REGS
89#define arch_regs_table x86_64_regs_table
90#else
91#define ARCH_MAX_REGS X86_32_MAX_REGS
92#define arch_regs_table x86_32_regs_table
93#endif
94
95/* Return architecture dependent register string (for kprobe-tracer) */
96static const char *get_arch_regstr(unsigned int n)
97{
98 return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
99}
100
101/*
102 * Compare the tail of two strings.
103 * Return 0 if whole of either string is same as another's tail part.
104 */
105static int strtailcmp(const char *s1, const char *s2)
106{
107 int i1 = strlen(s1);
108 int i2 = strlen(s2);
109 while (--i1 > 0 && --i2 > 0) {
110 if (s1[i1] != s2[i2])
111 return s1[i1] - s2[i2];
112 }
113 return 0;
114}
115
116/* Find the fileno of the target file. */
117static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
118{
119 Dwarf_Signed cnt, i;
120 Dwarf_Unsigned found = 0;
121 char **srcs;
122 int ret;
123
124 if (!fname)
125 return 0;
126
127 ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
128 if (ret == DW_DLV_OK) {
129 for (i = 0; i < cnt && !found; i++) {
130 if (strtailcmp(srcs[i], fname) == 0)
131 found = i + 1;
132 dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
133 }
134 for (; i < cnt; i++)
135 dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
136 dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
137 }
138 if (found)
139 pr_debug("found fno: %d\n", (int)found);
140 return found;
141}
142
143/* Compare diename and tname */
144static int die_compare_name(Dwarf_Die dw_die, const char *tname)
145{
146 char *name;
147 int ret;
148 ret = dwarf_diename(dw_die, &name, &__dw_error);
149 DIE_IF(ret == DW_DLV_ERROR);
150 if (ret == DW_DLV_OK) {
151 ret = strcmp(tname, name);
152 dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
153 } else
154 ret = -1;
155 return ret;
156}
157
158/* Check the address is in the subprogram(function). */
159static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr,
160 Dwarf_Signed *offs)
161{
162 Dwarf_Addr lopc, hipc;
163 int ret;
164
165 /* TODO: check ranges */
166 ret = dwarf_lowpc(sp_die, &lopc, &__dw_error);
167 DIE_IF(ret == DW_DLV_ERROR);
168 if (ret == DW_DLV_NO_ENTRY)
169 return 0;
170 ret = dwarf_highpc(sp_die, &hipc, &__dw_error);
171 DIE_IF(ret != DW_DLV_OK);
172 if (lopc <= addr && addr < hipc) {
173 *offs = addr - lopc;
174 return 1;
175 } else
176 return 0;
177}
178
179/* Check the die is inlined function */
180static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die)
181{
182 /* TODO: check strictly */
183 Dwarf_Bool inl;
184 int ret;
185
186 ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error);
187 DIE_IF(ret == DW_DLV_ERROR);
188 return inl;
189}
190
191/* Get the offset of abstruct_origin */
192static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die)
193{
194 Dwarf_Attribute attr;
195 Dwarf_Off cu_offs;
196 int ret;
197
198 ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error);
199 DIE_IF(ret != DW_DLV_OK);
200 ret = dwarf_formref(attr, &cu_offs, &__dw_error);
201 DIE_IF(ret != DW_DLV_OK);
202 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
203 return cu_offs;
204}
205
206/* Get entry pc(or low pc, 1st entry of ranges) of the die */
207static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die)
208{
209 Dwarf_Attribute attr;
210 Dwarf_Addr addr;
211 Dwarf_Off offs;
212 Dwarf_Ranges *ranges;
213 Dwarf_Signed cnt;
214 int ret;
215
216 /* Try to get entry pc */
217 ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error);
218 DIE_IF(ret == DW_DLV_ERROR);
219 if (ret == DW_DLV_OK) {
220 ret = dwarf_formaddr(attr, &addr, &__dw_error);
221 DIE_IF(ret != DW_DLV_OK);
222 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
223 return addr;
224 }
225
226 /* Try to get low pc */
227 ret = dwarf_lowpc(dw_die, &addr, &__dw_error);
228 DIE_IF(ret == DW_DLV_ERROR);
229 if (ret == DW_DLV_OK)
230 return addr;
231
232 /* Try to get ranges */
233 ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error);
234 DIE_IF(ret != DW_DLV_OK);
235 ret = dwarf_formref(attr, &offs, &__dw_error);
236 DIE_IF(ret != DW_DLV_OK);
237 ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL,
238 &__dw_error);
239 DIE_IF(ret != DW_DLV_OK);
240 addr = ranges[0].dwr_addr1;
241 dwarf_ranges_dealloc(__dw_debug, ranges, cnt);
242 return addr;
243}
244
245/*
246 * Search a Die from Die tree.
247 * Note: cur_link->die should be deallocated in this function.
248 */
249static int __search_die_tree(struct die_link *cur_link,
250 int (*die_cb)(struct die_link *, void *),
251 void *data)
252{
253 Dwarf_Die new_die;
254 struct die_link new_link;
255 int ret;
256
257 if (!die_cb)
258 return 0;
259
260 /* Check current die */
261 while (!(ret = die_cb(cur_link, data))) {
262 /* Check child die */
263 ret = dwarf_child(cur_link->die, &new_die, &__dw_error);
264 DIE_IF(ret == DW_DLV_ERROR);
265 if (ret == DW_DLV_OK) {
266 new_link.parent = cur_link;
267 new_link.die = new_die;
268 ret = __search_die_tree(&new_link, die_cb, data);
269 if (ret)
270 break;
271 }
272
273 /* Move to next sibling */
274 ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die,
275 &__dw_error);
276 DIE_IF(ret == DW_DLV_ERROR);
277 dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
278 cur_link->die = new_die;
279 if (ret == DW_DLV_NO_ENTRY)
280 return 0;
281 }
282 dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
283 return ret;
284}
285
286/* Search a die in its children's die tree */
287static int search_die_from_children(Dwarf_Die parent_die,
288 int (*die_cb)(struct die_link *, void *),
289 void *data)
290{
291 struct die_link new_link;
292 int ret;
293
294 new_link.parent = NULL;
295 ret = dwarf_child(parent_die, &new_link.die, &__dw_error);
296 DIE_IF(ret == DW_DLV_ERROR);
297 if (ret == DW_DLV_OK)
298 return __search_die_tree(&new_link, die_cb, data);
299 else
300 return 0;
301}
302
303/* Find a locdesc corresponding to the address */
304static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc,
305 Dwarf_Addr addr)
306{
307 Dwarf_Signed lcnt;
308 Dwarf_Locdesc **llbuf;
309 int ret, i;
310
311 ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error);
312 DIE_IF(ret != DW_DLV_OK);
313 ret = DW_DLV_NO_ENTRY;
314 for (i = 0; i < lcnt; ++i) {
315 if (llbuf[i]->ld_lopc <= addr &&
316 llbuf[i]->ld_hipc > addr) {
317 memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc));
318 desc->ld_s =
319 malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
320 DIE_IF(desc->ld_s == NULL);
321 memcpy(desc->ld_s, llbuf[i]->ld_s,
322 sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
323 ret = DW_DLV_OK;
324 break;
325 }
326 dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
327 dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
328 }
329 /* Releasing loop */
330 for (; i < lcnt; ++i) {
331 dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
332 dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
333 }
334 dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST);
335 return ret;
336}
337
338/* Get decl_file attribute value (file number) */
339static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die)
340{
341 Dwarf_Attribute attr;
342 Dwarf_Unsigned fno;
343 int ret;
344
345 ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error);
346 DIE_IF(ret != DW_DLV_OK);
347 dwarf_formudata(attr, &fno, &__dw_error);
348 DIE_IF(ret != DW_DLV_OK);
349 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
350 return fno;
351}
352
353/* Get decl_line attribute value (line number) */
354static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die)
355{
356 Dwarf_Attribute attr;
357 Dwarf_Unsigned lno;
358 int ret;
359
360 ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error);
361 DIE_IF(ret != DW_DLV_OK);
362 dwarf_formudata(attr, &lno, &__dw_error);
363 DIE_IF(ret != DW_DLV_OK);
364 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
365 return lno;
366}
367
368/*
369 * Probe finder related functions
370 */
371
372/* Show a location */
373static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
374{
375 Dwarf_Small op;
376 Dwarf_Unsigned regn;
377 Dwarf_Signed offs;
378 int deref = 0, ret;
379 const char *regs;
380
381 op = loc->lr_atom;
382
383 /* If this is based on frame buffer, set the offset */
384 if (op == DW_OP_fbreg) {
385 deref = 1;
386 offs = (Dwarf_Signed)loc->lr_number;
387 op = pf->fbloc.ld_s[0].lr_atom;
388 loc = &pf->fbloc.ld_s[0];
389 } else
390 offs = 0;
391
392 if (op >= DW_OP_breg0 && op <= DW_OP_breg31) {
393 regn = op - DW_OP_breg0;
394 offs += (Dwarf_Signed)loc->lr_number;
395 deref = 1;
396 } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) {
397 regn = op - DW_OP_reg0;
398 } else if (op == DW_OP_bregx) {
399 regn = loc->lr_number;
400 offs += (Dwarf_Signed)loc->lr_number2;
401 deref = 1;
402 } else if (op == DW_OP_regx) {
403 regn = loc->lr_number;
404 } else
405 die("Dwarf_OP %d is not supported.\n", op);
406
407 regs = get_arch_regstr(regn);
408 if (!regs)
409 die("%lld exceeds max register number.\n", regn);
410
411 if (deref)
412 ret = snprintf(pf->buf, pf->len,
413 " %s=%+lld(%s)", pf->var, offs, regs);
414 else
415 ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs);
416 DIE_IF(ret < 0);
417 DIE_IF(ret >= pf->len);
418}
419
420/* Show a variables in kprobe event format */
421static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
422{
423 Dwarf_Attribute attr;
424 Dwarf_Locdesc ld;
425 int ret;
426
427 ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error);
428 if (ret != DW_DLV_OK)
429 goto error;
430 ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base));
431 if (ret != DW_DLV_OK)
432 goto error;
433 /* TODO? */
434 DIE_IF(ld.ld_cents != 1);
435 show_location(&ld.ld_s[0], pf);
436 free(ld.ld_s);
437 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
438 return ;
439error:
440 die("Failed to find the location of %s at this address.\n"
441 " Perhaps, it has been optimized out.\n", pf->var);
442}
443
444static int variable_callback(struct die_link *dlink, void *data)
445{
446 struct probe_finder *pf = (struct probe_finder *)data;
447 Dwarf_Half tag;
448 int ret;
449
450 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
451 DIE_IF(ret == DW_DLV_ERROR);
452 if ((tag == DW_TAG_formal_parameter ||
453 tag == DW_TAG_variable) &&
454 (die_compare_name(dlink->die, pf->var) == 0)) {
455 show_variable(dlink->die, pf);
456 return 1;
457 }
458 /* TODO: Support struct members and arrays */
459 return 0;
460}
461
462/* Find a variable in a subprogram die */
463static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
464{
465 int ret;
466
467 if (!is_c_varname(pf->var)) {
468 /* Output raw parameters */
469 ret = snprintf(pf->buf, pf->len, " %s", pf->var);
470 DIE_IF(ret < 0);
471 DIE_IF(ret >= pf->len);
472 return ;
473 }
474
475 pr_debug("Searching '%s' variable in context.\n", pf->var);
476 /* Search child die for local variables and parameters. */
477 ret = search_die_from_children(sp_die, variable_callback, pf);
478 if (!ret)
479 die("Failed to find '%s' in this function.\n", pf->var);
480}
481
482/* Get a frame base on the address */
483static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf)
484{
485 Dwarf_Attribute attr;
486 int ret;
487
488 ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error);
489 DIE_IF(ret != DW_DLV_OK);
490 ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base));
491 DIE_IF(ret != DW_DLV_OK);
492 dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
493}
494
495static void free_current_frame_base(struct probe_finder *pf)
496{
497 free(pf->fbloc.ld_s);
498 memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc));
499}
500
501/* Show a probe point to output buffer */
502static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs,
503 struct probe_finder *pf)
504{
505 struct probe_point *pp = pf->pp;
506 char *name;
507 char tmp[MAX_PROBE_BUFFER];
508 int ret, i, len;
509
510 /* Output name of probe point */
511 ret = dwarf_diename(sp_die, &name, &__dw_error);
512 DIE_IF(ret == DW_DLV_ERROR);
513 if (ret == DW_DLV_OK) {
514 ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name,
515 (unsigned int)offs);
516 /* Copy the function name if possible */
517 if (!pp->function) {
518 pp->function = strdup(name);
519 pp->offset = offs;
520 }
521 dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
522 } else {
523 /* This function has no name. */
524 ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr);
525 if (!pp->function) {
526 /* TODO: Use _stext */
527 pp->function = strdup("");
528 pp->offset = (int)pf->addr;
529 }
530 }
531 DIE_IF(ret < 0);
532 DIE_IF(ret >= MAX_PROBE_BUFFER);
533 len = ret;
534 pr_debug("Probe point found: %s\n", tmp);
535
536 /* Find each argument */
537 get_current_frame_base(sp_die, pf);
538 for (i = 0; i < pp->nr_args; i++) {
539 pf->var = pp->args[i];
540 pf->buf = &tmp[len];
541 pf->len = MAX_PROBE_BUFFER - len;
542 find_variable(sp_die, pf);
543 len += strlen(pf->buf);
544 }
545 free_current_frame_base(pf);
546
547 pp->probes[pp->found] = strdup(tmp);
548 pp->found++;
549}
550
551static int probeaddr_callback(struct die_link *dlink, void *data)
552{
553 struct probe_finder *pf = (struct probe_finder *)data;
554 Dwarf_Half tag;
555 Dwarf_Signed offs;
556 int ret;
557
558 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
559 DIE_IF(ret == DW_DLV_ERROR);
560 /* Check the address is in this subprogram */
561 if (tag == DW_TAG_subprogram &&
562 die_within_subprogram(dlink->die, pf->addr, &offs)) {
563 show_probepoint(dlink->die, offs, pf);
564 return 1;
565 }
566 return 0;
567}
568
569/* Find probe point from its line number */
570static void find_by_line(struct probe_finder *pf)
571{
572 Dwarf_Signed cnt, i, clm;
573 Dwarf_Line *lines;
574 Dwarf_Unsigned lineno = 0;
575 Dwarf_Addr addr;
576 Dwarf_Unsigned fno;
577 int ret;
578
579 ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error);
580 DIE_IF(ret != DW_DLV_OK);
581
582 for (i = 0; i < cnt; i++) {
583 ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
584 DIE_IF(ret != DW_DLV_OK);
585 if (fno != pf->fno)
586 continue;
587
588 ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
589 DIE_IF(ret != DW_DLV_OK);
590 if (lineno != pf->lno)
591 continue;
592
593 ret = dwarf_lineoff(lines[i], &clm, &__dw_error);
594 DIE_IF(ret != DW_DLV_OK);
595
596 ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
597 DIE_IF(ret != DW_DLV_OK);
598 pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n",
599 (int)i, (unsigned)lineno, (int)clm, addr);
600 pf->addr = addr;
601 /* Search a real subprogram including this line, */
602 ret = search_die_from_children(pf->cu_die,
603 probeaddr_callback, pf);
604 if (ret == 0)
605 die("Probe point is not found in subprograms.\n");
606 /* Continuing, because target line might be inlined. */
607 }
608 dwarf_srclines_dealloc(__dw_debug, lines, cnt);
609}
610
611/* Search function from function name */
612static int probefunc_callback(struct die_link *dlink, void *data)
613{
614 struct probe_finder *pf = (struct probe_finder *)data;
615 struct probe_point *pp = pf->pp;
616 struct die_link *lk;
617 Dwarf_Signed offs;
618 Dwarf_Half tag;
619 int ret;
620
621 ret = dwarf_tag(dlink->die, &tag, &__dw_error);
622 DIE_IF(ret == DW_DLV_ERROR);
623 if (tag == DW_TAG_subprogram) {
624 if (die_compare_name(dlink->die, pp->function) == 0) {
625 if (pp->line) { /* Function relative line */
626 pf->fno = die_get_decl_file(dlink->die);
627 pf->lno = die_get_decl_line(dlink->die)
628 + pp->line;
629 find_by_line(pf);
630 return 1;
631 }
632 if (die_inlined_subprogram(dlink->die)) {
633 /* Inlined function, save it. */
634 ret = dwarf_die_CU_offset(dlink->die,
635 &pf->inl_offs,
636 &__dw_error);
637 DIE_IF(ret != DW_DLV_OK);
638 pr_debug("inline definition offset %lld\n",
639 pf->inl_offs);
640 return 0; /* Continue to search */
641 }
642 /* Get probe address */
643 pf->addr = die_get_entrypc(dlink->die);
644 pf->addr += pp->offset;
645 /* TODO: Check the address in this function */
646 show_probepoint(dlink->die, pp->offset, pf);
647 return 1; /* Exit; no same symbol in this CU. */
648 }
649 } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) {
650 if (die_get_abstract_origin(dlink->die) == pf->inl_offs) {
651 /* Get probe address */
652 pf->addr = die_get_entrypc(dlink->die);
653 pf->addr += pp->offset;
654 pr_debug("found inline addr: 0x%llx\n", pf->addr);
655 /* Inlined function. Get a real subprogram */
656 for (lk = dlink->parent; lk != NULL; lk = lk->parent) {
657 tag = 0;
658 dwarf_tag(lk->die, &tag, &__dw_error);
659 DIE_IF(ret == DW_DLV_ERROR);
660 if (tag == DW_TAG_subprogram &&
661 !die_inlined_subprogram(lk->die))
662 goto found;
663 }
664 die("Failed to find real subprogram.\n");
665found:
666 /* Get offset from subprogram */
667 ret = die_within_subprogram(lk->die, pf->addr, &offs);
668 DIE_IF(!ret);
669 show_probepoint(lk->die, offs, pf);
670 /* Continue to search */
671 }
672 }
673 return 0;
674}
675
676static void find_by_func(struct probe_finder *pf)
677{
678 search_die_from_children(pf->cu_die, probefunc_callback, pf);
679}
680
681/* Find a probe point */
682int find_probepoint(int fd, struct probe_point *pp)
683{
684 Dwarf_Half addr_size = 0;
685 Dwarf_Unsigned next_cuh = 0;
686 int cu_number = 0, ret;
687 struct probe_finder pf = {.pp = pp};
688
689 ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
690 if (ret != DW_DLV_OK) {
691 pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n");
692 return -ENOENT;
693 }
694
695 pp->found = 0;
696 while (++cu_number) {
697 /* Search CU (Compilation Unit) */
698 ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
699 &addr_size, &next_cuh, &__dw_error);
700 DIE_IF(ret == DW_DLV_ERROR);
701 if (ret == DW_DLV_NO_ENTRY)
702 break;
703
704 /* Get the DIE(Debugging Information Entry) of this CU */
705 ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error);
706 DIE_IF(ret != DW_DLV_OK);
707
708 /* Check if target file is included. */
709 if (pp->file)
710 pf.fno = cu_find_fileno(pf.cu_die, pp->file);
711
712 if (!pp->file || pf.fno) {
713 /* Save CU base address (for frame_base) */
714 ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error);
715 DIE_IF(ret == DW_DLV_ERROR);
716 if (ret == DW_DLV_NO_ENTRY)
717 pf.cu_base = 0;
718 if (pp->function)
719 find_by_func(&pf);
720 else {
721 pf.lno = pp->line;
722 find_by_line(&pf);
723 }
724 }
725 dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
726 }
727 ret = dwarf_finish(__dw_debug, &__dw_error);
728 DIE_IF(ret != DW_DLV_OK);
729
730 return pp->found;
731}
732
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 000000000000..bdebca6697d2
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,57 @@
1#ifndef _PROBE_FINDER_H
2#define _PROBE_FINDER_H
3
4#define MAX_PATH_LEN 256
5#define MAX_PROBE_BUFFER 1024
6#define MAX_PROBES 128
7
8static inline int is_c_varname(const char *name)
9{
10 /* TODO */
11 return isalpha(name[0]) || name[0] == '_';
12}
13
14struct probe_point {
15 /* Inputs */
16 char *file; /* File name */
17 int line; /* Line number */
18
19 char *function; /* Function name */
20 int offset; /* Offset bytes */
21
22 int nr_args; /* Number of arguments */
23 char **args; /* Arguments */
24
25 int retprobe; /* Return probe */
26
27 /* Output */
28 int found; /* Number of found probe points */
29 char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
30};
31
32#ifndef NO_LIBDWARF
33extern int find_probepoint(int fd, struct probe_point *pp);
34
35#include <libdwarf/dwarf.h>
36#include <libdwarf/libdwarf.h>
37
38struct probe_finder {
39 struct probe_point *pp; /* Target probe point */
40
41 /* For function searching */
42 Dwarf_Addr addr; /* Address */
43 Dwarf_Unsigned fno; /* File number */
44 Dwarf_Unsigned lno; /* Line number */
45 Dwarf_Off inl_offs; /* Inline offset */
46 Dwarf_Die cu_die; /* Current CU */
47
48 /* For variable searching */
49 Dwarf_Addr cu_base; /* Current CU base address */
50 Dwarf_Locdesc fbloc; /* Location of Current Frame Base */
51 const char *var; /* Current variable name */
52 char *buf; /* Current output buffer */
53 int len; /* Length of output buffer */
54};
55#endif /* NO_LIBDWARF */
56
57#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index a5454a1d1c13..b6a019733919 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -1,5 +1,5 @@
1#ifndef QUOTE_H 1#ifndef __PERF_QUOTE_H
2#define QUOTE_H 2#define __PERF_QUOTE_H
3 3
4#include <stddef.h> 4#include <stddef.h>
5#include <stdio.h> 5#include <stdio.h>
@@ -65,4 +65,4 @@ extern void perl_quote_print(FILE *stream, const char *src);
65extern void python_quote_print(FILE *stream, const char *src); 65extern void python_quote_print(FILE *stream, const char *src);
66extern void tcl_quote_print(FILE *stream, const char *src); 66extern void tcl_quote_print(FILE *stream, const char *src);
67 67
68#endif 68#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
index cc1837deba88..d79028727ce2 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/perf/util/run-command.h
@@ -1,5 +1,5 @@
1#ifndef RUN_COMMAND_H 1#ifndef __PERF_RUN_COMMAND_H
2#define RUN_COMMAND_H 2#define __PERF_RUN_COMMAND_H
3 3
4enum { 4enum {
5 ERR_RUN_COMMAND_FORK = 10000, 5 ERR_RUN_COMMAND_FORK = 10000,
@@ -85,4 +85,4 @@ struct async {
85int start_async(struct async *async); 85int start_async(struct async *async);
86int finish_async(struct async *async); 86int finish_async(struct async *async);
87 87
88#endif 88#endif /* __PERF_RUN_COMMAND_H */
diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h
index 618083bce0c6..1a53c11265fd 100644
--- a/tools/perf/util/sigchain.h
+++ b/tools/perf/util/sigchain.h
@@ -1,5 +1,5 @@
1#ifndef SIGCHAIN_H 1#ifndef __PERF_SIGCHAIN_H
2#define SIGCHAIN_H 2#define __PERF_SIGCHAIN_H
3 3
4typedef void (*sigchain_fun)(int); 4typedef void (*sigchain_fun)(int);
5 5
@@ -8,4 +8,4 @@ int sigchain_pop(int sig);
8 8
9void sigchain_push_common(sigchain_fun f); 9void sigchain_push_common(sigchain_fun f);
10 10
11#endif /* SIGCHAIN_H */ 11#endif /* __PERF_SIGCHAIN_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
new file mode 100644
index 000000000000..b490354d1b23
--- /dev/null
+++ b/tools/perf/util/sort.c
@@ -0,0 +1,290 @@
1#include "sort.h"
2
3regex_t parent_regex;
4char default_parent_pattern[] = "^sys_|^do_page_fault";
5char *parent_pattern = default_parent_pattern;
6char default_sort_order[] = "comm,dso,symbol";
7char *sort_order = default_sort_order;
8int sort__need_collapse = 0;
9int sort__has_parent = 0;
10
11enum sort_type sort__first_dimension;
12
13unsigned int dsos__col_width;
14unsigned int comms__col_width;
15unsigned int threads__col_width;
16static unsigned int parent_symbol__col_width;
17char * field_sep;
18
19LIST_HEAD(hist_entry__sort_list);
20
21struct sort_entry sort_thread = {
22 .header = "Command: Pid",
23 .cmp = sort__thread_cmp,
24 .print = sort__thread_print,
25 .width = &threads__col_width,
26};
27
28struct sort_entry sort_comm = {
29 .header = "Command",
30 .cmp = sort__comm_cmp,
31 .collapse = sort__comm_collapse,
32 .print = sort__comm_print,
33 .width = &comms__col_width,
34};
35
36struct sort_entry sort_dso = {
37 .header = "Shared Object",
38 .cmp = sort__dso_cmp,
39 .print = sort__dso_print,
40 .width = &dsos__col_width,
41};
42
43struct sort_entry sort_sym = {
44 .header = "Symbol",
45 .cmp = sort__sym_cmp,
46 .print = sort__sym_print,
47};
48
49struct sort_entry sort_parent = {
50 .header = "Parent symbol",
51 .cmp = sort__parent_cmp,
52 .print = sort__parent_print,
53 .width = &parent_symbol__col_width,
54};
55
56struct sort_dimension {
57 const char *name;
58 struct sort_entry *entry;
59 int taken;
60};
61
62static struct sort_dimension sort_dimensions[] = {
63 { .name = "pid", .entry = &sort_thread, },
64 { .name = "comm", .entry = &sort_comm, },
65 { .name = "dso", .entry = &sort_dso, },
66 { .name = "symbol", .entry = &sort_sym, },
67 { .name = "parent", .entry = &sort_parent, },
68};
69
70int64_t cmp_null(void *l, void *r)
71{
72 if (!l && !r)
73 return 0;
74 else if (!l)
75 return -1;
76 else
77 return 1;
78}
79
80/* --sort pid */
81
82int64_t
83sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
84{
85 return right->thread->pid - left->thread->pid;
86}
87
88int repsep_fprintf(FILE *fp, const char *fmt, ...)
89{
90 int n;
91 va_list ap;
92
93 va_start(ap, fmt);
94 if (!field_sep)
95 n = vfprintf(fp, fmt, ap);
96 else {
97 char *bf = NULL;
98 n = vasprintf(&bf, fmt, ap);
99 if (n > 0) {
100 char *sep = bf;
101
102 while (1) {
103 sep = strchr(sep, *field_sep);
104 if (sep == NULL)
105 break;
106 *sep = '.';
107 }
108 }
109 fputs(bf, fp);
110 free(bf);
111 }
112 va_end(ap);
113 return n;
114}
115
116size_t
117sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width)
118{
119 return repsep_fprintf(fp, "%*s:%5d", width - 6,
120 self->thread->comm ?: "", self->thread->pid);
121}
122
123size_t
124sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
125{
126 return repsep_fprintf(fp, "%*s", width, self->thread->comm);
127}
128
129/* --sort dso */
130
131int64_t
132sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
133{
134 struct dso *dso_l = left->map ? left->map->dso : NULL;
135 struct dso *dso_r = right->map ? right->map->dso : NULL;
136 const char *dso_name_l, *dso_name_r;
137
138 if (!dso_l || !dso_r)
139 return cmp_null(dso_l, dso_r);
140
141 if (verbose) {
142 dso_name_l = dso_l->long_name;
143 dso_name_r = dso_r->long_name;
144 } else {
145 dso_name_l = dso_l->short_name;
146 dso_name_r = dso_r->short_name;
147 }
148
149 return strcmp(dso_name_l, dso_name_r);
150}
151
152size_t
153sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
154{
155 if (self->map && self->map->dso) {
156 const char *dso_name = !verbose ? self->map->dso->short_name :
157 self->map->dso->long_name;
158 return repsep_fprintf(fp, "%-*s", width, dso_name);
159 }
160
161 return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
162}
163
164/* --sort symbol */
165
166int64_t
167sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
168{
169 u64 ip_l, ip_r;
170
171 if (left->sym == right->sym)
172 return 0;
173
174 ip_l = left->sym ? left->sym->start : left->ip;
175 ip_r = right->sym ? right->sym->start : right->ip;
176
177 return (int64_t)(ip_r - ip_l);
178}
179
180
181size_t
182sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
183{
184 size_t ret = 0;
185
186 if (verbose) {
187 char o = self->map ? dso__symtab_origin(self->map->dso) : '!';
188 ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, o);
189 }
190
191 ret += repsep_fprintf(fp, "[%c] ", self->level);
192 if (self->sym)
193 ret += repsep_fprintf(fp, "%s", self->sym->name);
194 else
195 ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
196
197 return ret;
198}
199
200/* --sort comm */
201
202int64_t
203sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
204{
205 return right->thread->pid - left->thread->pid;
206}
207
208int64_t
209sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
210{
211 char *comm_l = left->thread->comm;
212 char *comm_r = right->thread->comm;
213
214 if (!comm_l || !comm_r)
215 return cmp_null(comm_l, comm_r);
216
217 return strcmp(comm_l, comm_r);
218}
219
220/* --sort parent */
221
222int64_t
223sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
224{
225 struct symbol *sym_l = left->parent;
226 struct symbol *sym_r = right->parent;
227
228 if (!sym_l || !sym_r)
229 return cmp_null(sym_l, sym_r);
230
231 return strcmp(sym_l->name, sym_r->name);
232}
233
234size_t
235sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
236{
237 return repsep_fprintf(fp, "%-*s", width,
238 self->parent ? self->parent->name : "[other]");
239}
240
241int sort_dimension__add(const char *tok)
242{
243 unsigned int i;
244
245 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
246 struct sort_dimension *sd = &sort_dimensions[i];
247
248 if (sd->taken)
249 continue;
250
251 if (strncasecmp(tok, sd->name, strlen(tok)))
252 continue;
253
254 if (sd->entry->collapse)
255 sort__need_collapse = 1;
256
257 if (sd->entry == &sort_parent) {
258 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
259 if (ret) {
260 char err[BUFSIZ];
261
262 regerror(ret, &parent_regex, err, sizeof(err));
263 fprintf(stderr, "Invalid regex: %s\n%s",
264 parent_pattern, err);
265 exit(-1);
266 }
267 sort__has_parent = 1;
268 }
269
270 if (list_empty(&hist_entry__sort_list)) {
271 if (!strcmp(sd->name, "pid"))
272 sort__first_dimension = SORT_PID;
273 else if (!strcmp(sd->name, "comm"))
274 sort__first_dimension = SORT_COMM;
275 else if (!strcmp(sd->name, "dso"))
276 sort__first_dimension = SORT_DSO;
277 else if (!strcmp(sd->name, "symbol"))
278 sort__first_dimension = SORT_SYM;
279 else if (!strcmp(sd->name, "parent"))
280 sort__first_dimension = SORT_PARENT;
281 }
282
283 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
284 sd->taken = 1;
285
286 return 0;
287 }
288
289 return -ESRCH;
290}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
new file mode 100644
index 000000000000..333e664ff45f
--- /dev/null
+++ b/tools/perf/util/sort.h
@@ -0,0 +1,99 @@
1#ifndef __PERF_SORT_H
2#define __PERF_SORT_H
3#include "../builtin.h"
4
5#include "util.h"
6
7#include "color.h"
8#include <linux/list.h>
9#include "cache.h"
10#include <linux/rbtree.h>
11#include "symbol.h"
12#include "string.h"
13#include "callchain.h"
14#include "strlist.h"
15#include "values.h"
16
17#include "../perf.h"
18#include "debug.h"
19#include "header.h"
20
21#include "parse-options.h"
22#include "parse-events.h"
23
24#include "thread.h"
25#include "sort.h"
26
27extern regex_t parent_regex;
28extern char *sort_order;
29extern char default_parent_pattern[];
30extern char *parent_pattern;
31extern char default_sort_order[];
32extern int sort__need_collapse;
33extern int sort__has_parent;
34extern char *field_sep;
35extern struct sort_entry sort_comm;
36extern struct sort_entry sort_dso;
37extern struct sort_entry sort_sym;
38extern struct sort_entry sort_parent;
39extern unsigned int dsos__col_width;
40extern unsigned int comms__col_width;
41extern unsigned int threads__col_width;
42extern enum sort_type sort__first_dimension;
43
44struct hist_entry {
45 struct rb_node rb_node;
46 u64 count;
47 struct thread *thread;
48 struct map *map;
49 struct symbol *sym;
50 u64 ip;
51 char level;
52 struct symbol *parent;
53 struct callchain_node callchain;
54 struct rb_root sorted_chain;
55};
56
57enum sort_type {
58 SORT_PID,
59 SORT_COMM,
60 SORT_DSO,
61 SORT_SYM,
62 SORT_PARENT
63};
64
65/*
66 * configurable sorting bits
67 */
68
69struct sort_entry {
70 struct list_head list;
71
72 const char *header;
73
74 int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
75 int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
76 size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width);
77 unsigned int *width;
78 bool elide;
79};
80
81extern struct sort_entry sort_thread;
82extern struct list_head hist_entry__sort_list;
83
84extern int repsep_fprintf(FILE *fp, const char *fmt, ...);
85extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
86extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
87extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
88extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used);
89extern int64_t cmp_null(void *, void *);
90extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *);
91extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *);
92extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
93extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
94extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
95extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
96extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
97extern int sort_dimension__add(const char *);
98
99#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index d2aa86c014c1..a3d121d6c83e 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -1,5 +1,5 @@
1#ifndef STRBUF_H 1#ifndef __PERF_STRBUF_H
2#define STRBUF_H 2#define __PERF_STRBUF_H
3 3
4/* 4/*
5 * Strbuf's can be use in many ways: as a byte array, or to store arbitrary 5 * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
@@ -134,4 +134,4 @@ extern int launch_editor(const char *path, struct strbuf *buffer, const char *co
134extern int strbuf_branchname(struct strbuf *sb, const char *name); 134extern int strbuf_branchname(struct strbuf *sb, const char *name);
135extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); 135extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name);
136 136
137#endif /* STRBUF_H */ 137#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index c93eca9a7be3..f24a8cc933d5 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -1,4 +1,5 @@
1#include "string.h" 1#include "string.h"
2#include "util.h"
2 3
3static int hex(char ch) 4static int hex(char ch)
4{ 5{
@@ -32,3 +33,196 @@ int hex2u64(const char *ptr, u64 *long_val)
32 33
33 return p - ptr; 34 return p - ptr;
34} 35}
36
37char *strxfrchar(char *s, char from, char to)
38{
39 char *p = s;
40
41 while ((p = strchr(p, from)) != NULL)
42 *p++ = to;
43
44 return s;
45}
46
47#define K 1024LL
48/*
49 * perf_atoll()
50 * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
51 * and return its numeric value
52 */
53s64 perf_atoll(const char *str)
54{
55 unsigned int i;
56 s64 length = -1, unit = 1;
57
58 if (!isdigit(str[0]))
59 goto out_err;
60
61 for (i = 1; i < strlen(str); i++) {
62 switch (str[i]) {
63 case 'B':
64 case 'b':
65 break;
66 case 'K':
67 if (str[i + 1] != 'B')
68 goto out_err;
69 else
70 goto kilo;
71 case 'k':
72 if (str[i + 1] != 'b')
73 goto out_err;
74kilo:
75 unit = K;
76 break;
77 case 'M':
78 if (str[i + 1] != 'B')
79 goto out_err;
80 else
81 goto mega;
82 case 'm':
83 if (str[i + 1] != 'b')
84 goto out_err;
85mega:
86 unit = K * K;
87 break;
88 case 'G':
89 if (str[i + 1] != 'B')
90 goto out_err;
91 else
92 goto giga;
93 case 'g':
94 if (str[i + 1] != 'b')
95 goto out_err;
96giga:
97 unit = K * K * K;
98 break;
99 case 'T':
100 if (str[i + 1] != 'B')
101 goto out_err;
102 else
103 goto tera;
104 case 't':
105 if (str[i + 1] != 'b')
106 goto out_err;
107tera:
108 unit = K * K * K * K;
109 break;
110 case '\0': /* only specified figures */
111 unit = 1;
112 break;
113 default:
114 if (!isdigit(str[i]))
115 goto out_err;
116 break;
117 }
118 }
119
120 length = atoll(str) * unit;
121 goto out;
122
123out_err:
124 length = -1;
125out:
126 return length;
127}
128
129/*
130 * Helper function for splitting a string into an argv-like array.
131 * originaly copied from lib/argv_split.c
132 */
133static const char *skip_sep(const char *cp)
134{
135 while (*cp && isspace(*cp))
136 cp++;
137
138 return cp;
139}
140
141static const char *skip_arg(const char *cp)
142{
143 while (*cp && !isspace(*cp))
144 cp++;
145
146 return cp;
147}
148
149static int count_argc(const char *str)
150{
151 int count = 0;
152
153 while (*str) {
154 str = skip_sep(str);
155 if (*str) {
156 count++;
157 str = skip_arg(str);
158 }
159 }
160
161 return count;
162}
163
164/**
165 * argv_free - free an argv
166 * @argv - the argument vector to be freed
167 *
168 * Frees an argv and the strings it points to.
169 */
170void argv_free(char **argv)
171{
172 char **p;
173 for (p = argv; *p; p++)
174 free(*p);
175
176 free(argv);
177}
178
179/**
180 * argv_split - split a string at whitespace, returning an argv
181 * @str: the string to be split
182 * @argcp: returned argument count
183 *
184 * Returns an array of pointers to strings which are split out from
185 * @str. This is performed by strictly splitting on white-space; no
186 * quote processing is performed. Multiple whitespace characters are
187 * considered to be a single argument separator. The returned array
188 * is always NULL-terminated. Returns NULL on memory allocation
189 * failure.
190 */
191char **argv_split(const char *str, int *argcp)
192{
193 int argc = count_argc(str);
194 char **argv = zalloc(sizeof(*argv) * (argc+1));
195 char **argvp;
196
197 if (argv == NULL)
198 goto out;
199
200 if (argcp)
201 *argcp = argc;
202
203 argvp = argv;
204
205 while (*str) {
206 str = skip_sep(str);
207
208 if (*str) {
209 const char *p = str;
210 char *t;
211
212 str = skip_arg(str);
213
214 t = strndup(p, str-p);
215 if (t == NULL)
216 goto fail;
217 *argvp++ = t;
218 }
219 }
220 *argvp = NULL;
221
222out:
223 return argv;
224
225fail:
226 argv_free(argv);
227 return NULL;
228}
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
index bf39dfadfd24..bfecec265a1a 100644
--- a/tools/perf/util/string.h
+++ b/tools/perf/util/string.h
@@ -1,11 +1,15 @@
1#ifndef _PERF_STRING_H_ 1#ifndef __PERF_STRING_H_
2#define _PERF_STRING_H_ 2#define __PERF_STRING_H_
3 3
4#include "types.h" 4#include "types.h"
5 5
6int hex2u64(const char *ptr, u64 *val); 6int hex2u64(const char *ptr, u64 *val);
7char *strxfrchar(char *s, char from, char to);
8s64 perf_atoll(const char *str);
9char **argv_split(const char *str, int *argcp);
10void argv_free(char **argv);
7 11
8#define _STR(x) #x 12#define _STR(x) #x
9#define STR(x) _STR(x) 13#define STR(x) _STR(x)
10 14
11#endif 15#endif /* __PERF_STRING_H */
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index 921818e44a54..cb4659306d7b 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -1,5 +1,5 @@
1#ifndef STRLIST_H_ 1#ifndef __PERF_STRLIST_H
2#define STRLIST_H_ 2#define __PERF_STRLIST_H
3 3
4#include <linux/rbtree.h> 4#include <linux/rbtree.h>
5#include <stdbool.h> 5#include <stdbool.h>
@@ -36,4 +36,4 @@ static inline unsigned int strlist__nr_entries(const struct strlist *self)
36} 36}
37 37
38int strlist__parse_list(struct strlist *self, const char *s); 38int strlist__parse_list(struct strlist *self, const char *s);
39#endif /* STRLIST_H_ */ 39#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index cd93195aedb3..e0781989cc31 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h
@@ -1,5 +1,5 @@
1#ifndef _INCLUDE_GUARD_SVG_HELPER_ 1#ifndef __PERF_SVGHELPER_H
2#define _INCLUDE_GUARD_SVG_HELPER_ 2#define __PERF_SVGHELPER_H
3 3
4#include "types.h" 4#include "types.h"
5 5
@@ -25,4 +25,4 @@ extern void svg_close(void);
25 25
26extern int svg_page_width; 26extern int svg_page_width;
27 27
28#endif 28#endif /* __PERF_SVGHELPER_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 226f44a2357d..fffcb937cdcb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2,14 +2,20 @@
2#include "../perf.h" 2#include "../perf.h"
3#include "string.h" 3#include "string.h"
4#include "symbol.h" 4#include "symbol.h"
5#include "thread.h"
5 6
6#include "debug.h" 7#include "debug.h"
7 8
9#include <asm/bug.h>
8#include <libelf.h> 10#include <libelf.h>
9#include <gelf.h> 11#include <gelf.h>
10#include <elf.h> 12#include <elf.h>
13#include <limits.h>
14#include <sys/utsname.h>
11 15
12const char *sym_hist_filter; 16#ifndef NT_GNU_BUILD_ID
17#define NT_GNU_BUILD_ID 3
18#endif
13 19
14enum dso_origin { 20enum dso_origin {
15 DSO__ORIG_KERNEL = 0, 21 DSO__ORIG_KERNEL = 0,
@@ -18,94 +24,189 @@ enum dso_origin {
18 DSO__ORIG_UBUNTU, 24 DSO__ORIG_UBUNTU,
19 DSO__ORIG_BUILDID, 25 DSO__ORIG_BUILDID,
20 DSO__ORIG_DSO, 26 DSO__ORIG_DSO,
27 DSO__ORIG_KMODULE,
21 DSO__ORIG_NOT_FOUND, 28 DSO__ORIG_NOT_FOUND,
22}; 29};
23 30
24static struct symbol *symbol__new(u64 start, u64 len, 31static void dsos__add(struct list_head *head, struct dso *dso);
25 const char *name, unsigned int priv_size, 32static struct map *thread__find_map_by_name(struct thread *self, char *name);
26 u64 obj_start, int v) 33static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
34struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
35static int dso__load_kernel_sym(struct dso *self, struct map *map,
36 struct thread *thread, symbol_filter_t filter);
37unsigned int symbol__priv_size;
38static int vmlinux_path__nr_entries;
39static char **vmlinux_path;
40
41static struct symbol_conf symbol_conf__defaults = {
42 .use_modules = true,
43 .try_vmlinux_path = true,
44};
45
46static struct thread kthread_mem;
47struct thread *kthread = &kthread_mem;
48
49bool dso__loaded(const struct dso *self, enum map_type type)
27{ 50{
28 size_t namelen = strlen(name) + 1; 51 return self->loaded & (1 << type);
29 struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); 52}
30 53
31 if (!self) 54static void dso__set_loaded(struct dso *self, enum map_type type)
32 return NULL; 55{
56 self->loaded |= (1 << type);
57}
33 58
34 if (v >= 2) 59static void symbols__fixup_end(struct rb_root *self)
35 printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", 60{
36 (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); 61 struct rb_node *nd, *prevnd = rb_first(self);
62 struct symbol *curr, *prev;
63
64 if (prevnd == NULL)
65 return;
37 66
38 self->obj_start= obj_start; 67 curr = rb_entry(prevnd, struct symbol, rb_node);
39 self->hist = NULL;
40 self->hist_sum = 0;
41 68
42 if (sym_hist_filter && !strcmp(name, sym_hist_filter)) 69 for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
43 self->hist = calloc(sizeof(u64), len); 70 prev = curr;
71 curr = rb_entry(nd, struct symbol, rb_node);
44 72
45 if (priv_size) { 73 if (prev->end == prev->start)
46 memset(self, 0, priv_size); 74 prev->end = curr->start - 1;
47 self = ((void *)self) + priv_size;
48 } 75 }
76
77 /* Last entry */
78 if (curr->end == curr->start)
79 curr->end = roundup(curr->start, 4096);
80}
81
82static void __thread__fixup_maps_end(struct thread *self, enum map_type type)
83{
84 struct map *prev, *curr;
85 struct rb_node *nd, *prevnd = rb_first(&self->maps[type]);
86
87 if (prevnd == NULL)
88 return;
89
90 curr = rb_entry(prevnd, struct map, rb_node);
91
92 for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
93 prev = curr;
94 curr = rb_entry(nd, struct map, rb_node);
95 prev->end = curr->start - 1;
96 }
97
98 /*
99 * We still haven't the actual symbols, so guess the
100 * last map final address.
101 */
102 curr->end = ~0UL;
103}
104
105static void thread__fixup_maps_end(struct thread *self)
106{
107 int i;
108 for (i = 0; i < MAP__NR_TYPES; ++i)
109 __thread__fixup_maps_end(self, i);
110}
111
112static struct symbol *symbol__new(u64 start, u64 len, const char *name)
113{
114 size_t namelen = strlen(name) + 1;
115 struct symbol *self = zalloc(symbol__priv_size +
116 sizeof(*self) + namelen);
117 if (self == NULL)
118 return NULL;
119
120 if (symbol__priv_size)
121 self = ((void *)self) + symbol__priv_size;
122
49 self->start = start; 123 self->start = start;
50 self->end = len ? start + len - 1 : start; 124 self->end = len ? start + len - 1 : start;
125
126 pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end);
127
51 memcpy(self->name, name, namelen); 128 memcpy(self->name, name, namelen);
52 129
53 return self; 130 return self;
54} 131}
55 132
56static void symbol__delete(struct symbol *self, unsigned int priv_size) 133static void symbol__delete(struct symbol *self)
57{ 134{
58 free(((void *)self) - priv_size); 135 free(((void *)self) - symbol__priv_size);
59} 136}
60 137
61static size_t symbol__fprintf(struct symbol *self, FILE *fp) 138static size_t symbol__fprintf(struct symbol *self, FILE *fp)
62{ 139{
63 if (!self->module) 140 return fprintf(fp, " %llx-%llx %s\n",
64 return fprintf(fp, " %llx-%llx %s\n",
65 self->start, self->end, self->name); 141 self->start, self->end, self->name);
66 else
67 return fprintf(fp, " %llx-%llx %s \t[%s]\n",
68 self->start, self->end, self->name, self->module->name);
69} 142}
70 143
71struct dso *dso__new(const char *name, unsigned int sym_priv_size) 144static void dso__set_long_name(struct dso *self, char *name)
145{
146 if (name == NULL)
147 return;
148 self->long_name = name;
149 self->long_name_len = strlen(name);
150}
151
152static void dso__set_basename(struct dso *self)
153{
154 self->short_name = basename(self->long_name);
155}
156
157struct dso *dso__new(const char *name)
72{ 158{
73 struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); 159 struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
74 160
75 if (self != NULL) { 161 if (self != NULL) {
162 int i;
76 strcpy(self->name, name); 163 strcpy(self->name, name);
77 self->syms = RB_ROOT; 164 dso__set_long_name(self, self->name);
78 self->sym_priv_size = sym_priv_size; 165 self->short_name = self->name;
166 for (i = 0; i < MAP__NR_TYPES; ++i)
167 self->symbols[i] = RB_ROOT;
79 self->find_symbol = dso__find_symbol; 168 self->find_symbol = dso__find_symbol;
80 self->slen_calculated = 0; 169 self->slen_calculated = 0;
81 self->origin = DSO__ORIG_NOT_FOUND; 170 self->origin = DSO__ORIG_NOT_FOUND;
171 self->loaded = 0;
172 self->has_build_id = 0;
82 } 173 }
83 174
84 return self; 175 return self;
85} 176}
86 177
87static void dso__delete_symbols(struct dso *self) 178static void symbols__delete(struct rb_root *self)
88{ 179{
89 struct symbol *pos; 180 struct symbol *pos;
90 struct rb_node *next = rb_first(&self->syms); 181 struct rb_node *next = rb_first(self);
91 182
92 while (next) { 183 while (next) {
93 pos = rb_entry(next, struct symbol, rb_node); 184 pos = rb_entry(next, struct symbol, rb_node);
94 next = rb_next(&pos->rb_node); 185 next = rb_next(&pos->rb_node);
95 rb_erase(&pos->rb_node, &self->syms); 186 rb_erase(&pos->rb_node, self);
96 symbol__delete(pos, self->sym_priv_size); 187 symbol__delete(pos);
97 } 188 }
98} 189}
99 190
100void dso__delete(struct dso *self) 191void dso__delete(struct dso *self)
101{ 192{
102 dso__delete_symbols(self); 193 int i;
194 for (i = 0; i < MAP__NR_TYPES; ++i)
195 symbols__delete(&self->symbols[i]);
196 if (self->long_name != self->name)
197 free(self->long_name);
103 free(self); 198 free(self);
104} 199}
105 200
106static void dso__insert_symbol(struct dso *self, struct symbol *sym) 201void dso__set_build_id(struct dso *self, void *build_id)
107{ 202{
108 struct rb_node **p = &self->syms.rb_node; 203 memcpy(self->build_id, build_id, sizeof(self->build_id));
204 self->has_build_id = 1;
205}
206
207static void symbols__insert(struct rb_root *self, struct symbol *sym)
208{
209 struct rb_node **p = &self->rb_node;
109 struct rb_node *parent = NULL; 210 struct rb_node *parent = NULL;
110 const u64 ip = sym->start; 211 const u64 ip = sym->start;
111 struct symbol *s; 212 struct symbol *s;
@@ -119,17 +220,17 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym)
119 p = &(*p)->rb_right; 220 p = &(*p)->rb_right;
120 } 221 }
121 rb_link_node(&sym->rb_node, parent, p); 222 rb_link_node(&sym->rb_node, parent, p);
122 rb_insert_color(&sym->rb_node, &self->syms); 223 rb_insert_color(&sym->rb_node, self);
123} 224}
124 225
125struct symbol *dso__find_symbol(struct dso *self, u64 ip) 226static struct symbol *symbols__find(struct rb_root *self, u64 ip)
126{ 227{
127 struct rb_node *n; 228 struct rb_node *n;
128 229
129 if (self == NULL) 230 if (self == NULL)
130 return NULL; 231 return NULL;
131 232
132 n = self->syms.rb_node; 233 n = self->rb_node;
133 234
134 while (n) { 235 while (n) {
135 struct symbol *s = rb_entry(n, struct symbol, rb_node); 236 struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -145,12 +246,42 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip)
145 return NULL; 246 return NULL;
146} 247}
147 248
148size_t dso__fprintf(struct dso *self, FILE *fp) 249struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr)
149{ 250{
150 size_t ret = fprintf(fp, "dso: %s\n", self->name); 251 return symbols__find(&self->symbols[type], addr);
252}
253
254int build_id__sprintf(u8 *self, int len, char *bf)
255{
256 char *bid = bf;
257 u8 *raw = self;
258 int i;
259
260 for (i = 0; i < len; ++i) {
261 sprintf(bid, "%02x", *raw);
262 ++raw;
263 bid += 2;
264 }
265
266 return raw - self;
267}
268
269size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
270{
271 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
272
273 build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
274 return fprintf(fp, "%s", sbuild_id);
275}
151 276
277size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
278{
152 struct rb_node *nd; 279 struct rb_node *nd;
153 for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { 280 size_t ret = fprintf(fp, "dso: %s (", self->short_name);
281
282 ret += dso__fprintf_buildid(self, fp);
283 ret += fprintf(fp, ")\n");
284 for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) {
154 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 285 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
155 ret += symbol__fprintf(pos, fp); 286 ret += symbol__fprintf(pos, fp);
156 } 287 }
@@ -158,13 +289,17 @@ size_t dso__fprintf(struct dso *self, FILE *fp)
158 return ret; 289 return ret;
159} 290}
160 291
161static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) 292/*
293 * Loads the function entries in /proc/kallsyms into kernel_map->dso,
294 * so that we can in the next step set the symbol ->end address and then
295 * call kernel_maps__split_kallsyms.
296 */
297static int dso__load_all_kallsyms(struct dso *self, struct map *map)
162{ 298{
163 struct rb_node *nd, *prevnd;
164 char *line = NULL; 299 char *line = NULL;
165 size_t n; 300 size_t n;
301 struct rb_root *root = &self->symbols[map->type];
166 FILE *file = fopen("/proc/kallsyms", "r"); 302 FILE *file = fopen("/proc/kallsyms", "r");
167 int count = 0;
168 303
169 if (file == NULL) 304 if (file == NULL)
170 goto out_failure; 305 goto out_failure;
@@ -174,6 +309,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v)
174 struct symbol *sym; 309 struct symbol *sym;
175 int line_len, len; 310 int line_len, len;
176 char symbol_type; 311 char symbol_type;
312 char *symbol_name;
177 313
178 line_len = getline(&line, &n, file); 314 line_len = getline(&line, &n, file);
179 if (line_len < 0) 315 if (line_len < 0)
@@ -196,44 +332,26 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v)
196 */ 332 */
197 if (symbol_type != 'T' && symbol_type != 'W') 333 if (symbol_type != 'T' && symbol_type != 'W')
198 continue; 334 continue;
335
336 symbol_name = line + len + 2;
199 /* 337 /*
200 * Well fix up the end later, when we have all sorted. 338 * Will fix up the end later, when we have all symbols sorted.
201 */ 339 */
202 sym = symbol__new(start, 0xdead, line + len + 2, 340 sym = symbol__new(start, 0, symbol_name);
203 self->sym_priv_size, 0, v);
204 341
205 if (sym == NULL) 342 if (sym == NULL)
206 goto out_delete_line; 343 goto out_delete_line;
207 344 /*
208 if (filter && filter(self, sym)) 345 * We will pass the symbols to the filter later, in
209 symbol__delete(sym, self->sym_priv_size); 346 * map__split_kallsyms, when we have split the maps per module
210 else { 347 */
211 dso__insert_symbol(self, sym); 348 symbols__insert(root, sym);
212 count++;
213 }
214 }
215
216 /*
217 * Now that we have all sorted out, just set the ->end of all
218 * symbols
219 */
220 prevnd = rb_first(&self->syms);
221
222 if (prevnd == NULL)
223 goto out_delete_line;
224
225 for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
226 struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
227 *curr = rb_entry(nd, struct symbol, rb_node);
228
229 prev->end = curr->start - 1;
230 prevnd = nd;
231 } 349 }
232 350
233 free(line); 351 free(line);
234 fclose(file); 352 fclose(file);
235 353
236 return count; 354 return 0;
237 355
238out_delete_line: 356out_delete_line:
239 free(line); 357 free(line);
@@ -241,14 +359,114 @@ out_failure:
241 return -1; 359 return -1;
242} 360}
243 361
244static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) 362/*
363 * Split the symbols into maps, making sure there are no overlaps, i.e. the
364 * kernel range is broken in several maps, named [kernel].N, as we don't have
365 * the original ELF section names vmlinux have.
366 */
367static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread,
368 symbol_filter_t filter)
369{
370 struct map *curr_map = map;
371 struct symbol *pos;
372 int count = 0;
373 struct rb_root *root = &self->symbols[map->type];
374 struct rb_node *next = rb_first(root);
375 int kernel_range = 0;
376
377 while (next) {
378 char *module;
379
380 pos = rb_entry(next, struct symbol, rb_node);
381 next = rb_next(&pos->rb_node);
382
383 module = strchr(pos->name, '\t');
384 if (module) {
385 if (!thread->use_modules)
386 goto discard_symbol;
387
388 *module++ = '\0';
389
390 if (strcmp(self->name, module)) {
391 curr_map = thread__find_map_by_name(thread, module);
392 if (curr_map == NULL) {
393 pr_debug("/proc/{kallsyms,modules} "
394 "inconsistency!\n");
395 return -1;
396 }
397 }
398 /*
399 * So that we look just like we get from .ko files,
400 * i.e. not prelinked, relative to map->start.
401 */
402 pos->start = curr_map->map_ip(curr_map, pos->start);
403 pos->end = curr_map->map_ip(curr_map, pos->end);
404 } else if (curr_map != map) {
405 char dso_name[PATH_MAX];
406 struct dso *dso;
407
408 snprintf(dso_name, sizeof(dso_name), "[kernel].%d",
409 kernel_range++);
410
411 dso = dso__new(dso_name);
412 if (dso == NULL)
413 return -1;
414
415 curr_map = map__new2(pos->start, dso, map->type);
416 if (map == NULL) {
417 dso__delete(dso);
418 return -1;
419 }
420
421 curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
422 __thread__insert_map(thread, curr_map);
423 ++kernel_range;
424 }
425
426 if (filter && filter(curr_map, pos)) {
427discard_symbol: rb_erase(&pos->rb_node, root);
428 symbol__delete(pos);
429 } else {
430 if (curr_map != map) {
431 rb_erase(&pos->rb_node, root);
432 symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
433 }
434 count++;
435 }
436 }
437
438 return count;
439}
440
441
442static int dso__load_kallsyms(struct dso *self, struct map *map,
443 struct thread *thread, symbol_filter_t filter)
444{
445 if (dso__load_all_kallsyms(self, map) < 0)
446 return -1;
447
448 symbols__fixup_end(&self->symbols[map->type]);
449 self->origin = DSO__ORIG_KERNEL;
450
451 return dso__split_kallsyms(self, map, thread, filter);
452}
453
454size_t kernel_maps__fprintf(FILE *fp)
455{
456 size_t printed = fprintf(fp, "Kernel maps:\n");
457 printed += thread__fprintf_maps(kthread, fp);
458 return printed + fprintf(fp, "END kernel maps\n");
459}
460
461static int dso__load_perf_map(struct dso *self, struct map *map,
462 symbol_filter_t filter)
245{ 463{
246 char *line = NULL; 464 char *line = NULL;
247 size_t n; 465 size_t n;
248 FILE *file; 466 FILE *file;
249 int nr_syms = 0; 467 int nr_syms = 0;
250 468
251 file = fopen(self->name, "r"); 469 file = fopen(self->long_name, "r");
252 if (file == NULL) 470 if (file == NULL)
253 goto out_failure; 471 goto out_failure;
254 472
@@ -278,16 +496,15 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v)
278 if (len + 2 >= line_len) 496 if (len + 2 >= line_len)
279 continue; 497 continue;
280 498
281 sym = symbol__new(start, size, line + len, 499 sym = symbol__new(start, size, line + len);
282 self->sym_priv_size, start, v);
283 500
284 if (sym == NULL) 501 if (sym == NULL)
285 goto out_delete_line; 502 goto out_delete_line;
286 503
287 if (filter && filter(self, sym)) 504 if (filter && filter(map, sym))
288 symbol__delete(sym, self->sym_priv_size); 505 symbol__delete(sym);
289 else { 506 else {
290 dso__insert_symbol(self, sym); 507 symbols__insert(&self->symbols[map->type], sym);
291 nr_syms++; 508 nr_syms++;
292 } 509 }
293 } 510 }
@@ -393,7 +610,8 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
393 * And always look at the original dso, not at debuginfo packages, that 610 * And always look at the original dso, not at debuginfo packages, that
394 * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). 611 * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
395 */ 612 */
396static int dso__synthesize_plt_symbols(struct dso *self, int v) 613static int dso__synthesize_plt_symbols(struct dso *self, struct map *map,
614 symbol_filter_t filter)
397{ 615{
398 uint32_t nr_rel_entries, idx; 616 uint32_t nr_rel_entries, idx;
399 GElf_Sym sym; 617 GElf_Sym sym;
@@ -409,7 +627,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v)
409 Elf *elf; 627 Elf *elf;
410 int nr = 0, symidx, fd, err = 0; 628 int nr = 0, symidx, fd, err = 0;
411 629
412 fd = open(self->name, O_RDONLY); 630 fd = open(self->long_name, O_RDONLY);
413 if (fd < 0) 631 if (fd < 0)
414 goto out; 632 goto out;
415 633
@@ -477,12 +695,16 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v)
477 "%s@plt", elf_sym__name(&sym, symstrs)); 695 "%s@plt", elf_sym__name(&sym, symstrs));
478 696
479 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 697 f = symbol__new(plt_offset, shdr_plt.sh_entsize,
480 sympltname, self->sym_priv_size, 0, v); 698 sympltname);
481 if (!f) 699 if (!f)
482 goto out_elf_end; 700 goto out_elf_end;
483 701
484 dso__insert_symbol(self, f); 702 if (filter && filter(map, f))
485 ++nr; 703 symbol__delete(f);
704 else {
705 symbols__insert(&self->symbols[map->type], f);
706 ++nr;
707 }
486 } 708 }
487 } else if (shdr_rel_plt.sh_type == SHT_REL) { 709 } else if (shdr_rel_plt.sh_type == SHT_REL) {
488 GElf_Rel pos_mem, *pos; 710 GElf_Rel pos_mem, *pos;
@@ -495,12 +717,16 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v)
495 "%s@plt", elf_sym__name(&sym, symstrs)); 717 "%s@plt", elf_sym__name(&sym, symstrs));
496 718
497 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 719 f = symbol__new(plt_offset, shdr_plt.sh_entsize,
498 sympltname, self->sym_priv_size, 0, v); 720 sympltname);
499 if (!f) 721 if (!f)
500 goto out_elf_end; 722 goto out_elf_end;
501 723
502 dso__insert_symbol(self, f); 724 if (filter && filter(map, f))
503 ++nr; 725 symbol__delete(f);
726 else {
727 symbols__insert(&self->symbols[map->type], f);
728 ++nr;
729 }
504 } 730 }
505 } 731 }
506 732
@@ -513,14 +739,18 @@ out_close:
513 if (err == 0) 739 if (err == 0)
514 return nr; 740 return nr;
515out: 741out:
516 fprintf(stderr, "%s: problems reading %s PLT info.\n", 742 pr_warning("%s: problems reading %s PLT info.\n",
517 __func__, self->name); 743 __func__, self->long_name);
518 return 0; 744 return 0;
519} 745}
520 746
521static int dso__load_sym(struct dso *self, int fd, const char *name, 747static int dso__load_sym(struct dso *self, struct map *map,
522 symbol_filter_t filter, int v, struct module *mod) 748 struct thread *thread, const char *name, int fd,
749 symbol_filter_t filter, int kernel, int kmodule)
523{ 750{
751 struct map *curr_map = map;
752 struct dso *curr_dso = self;
753 size_t dso_name_len = strlen(self->short_name);
524 Elf_Data *symstrs, *secstrs; 754 Elf_Data *symstrs, *secstrs;
525 uint32_t nr_syms; 755 uint32_t nr_syms;
526 int err = -1; 756 int err = -1;
@@ -531,19 +761,16 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
531 GElf_Sym sym; 761 GElf_Sym sym;
532 Elf_Scn *sec, *sec_strndx; 762 Elf_Scn *sec, *sec_strndx;
533 Elf *elf; 763 Elf *elf;
534 int nr = 0, kernel = !strcmp("[kernel]", self->name); 764 int nr = 0;
535 765
536 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); 766 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
537 if (elf == NULL) { 767 if (elf == NULL) {
538 if (v) 768 pr_err("%s: cannot read %s ELF file.\n", __func__, name);
539 fprintf(stderr, "%s: cannot read %s ELF file.\n",
540 __func__, name);
541 goto out_close; 769 goto out_close;
542 } 770 }
543 771
544 if (gelf_getehdr(elf, &ehdr) == NULL) { 772 if (gelf_getehdr(elf, &ehdr) == NULL) {
545 if (v) 773 pr_err("%s: cannot get elf header.\n", __func__);
546 fprintf(stderr, "%s: cannot get elf header.\n", __func__);
547 goto out_elf_end; 774 goto out_elf_end;
548 } 775 }
549 776
@@ -587,9 +814,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
587 elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { 814 elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
588 struct symbol *f; 815 struct symbol *f;
589 const char *elf_name; 816 const char *elf_name;
590 char *demangled; 817 char *demangled = NULL;
591 u64 obj_start;
592 struct section *section = NULL;
593 int is_label = elf_sym__is_label(&sym); 818 int is_label = elf_sym__is_label(&sym);
594 const char *section_name; 819 const char *section_name;
595 820
@@ -605,52 +830,85 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
605 if (is_label && !elf_sec__is_text(&shdr, secstrs)) 830 if (is_label && !elf_sec__is_text(&shdr, secstrs))
606 continue; 831 continue;
607 832
833 elf_name = elf_sym__name(&sym, symstrs);
608 section_name = elf_sec__name(&shdr, secstrs); 834 section_name = elf_sec__name(&shdr, secstrs);
609 obj_start = sym.st_value;
610 835
611 if (self->adjust_symbols) { 836 if (kernel || kmodule) {
612 if (v >= 2) 837 char dso_name[PATH_MAX];
613 printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
614 (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
615 838
616 sym.st_value -= shdr.sh_addr - shdr.sh_offset; 839 if (strcmp(section_name,
617 } 840 curr_dso->short_name + dso_name_len) == 0)
841 goto new_symbol;
618 842
619 if (mod) { 843 if (strcmp(section_name, ".text") == 0) {
620 section = mod->sections->find_section(mod->sections, section_name); 844 curr_map = map;
621 if (section) 845 curr_dso = self;
622 sym.st_value += section->vma; 846 goto new_symbol;
623 else {
624 fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n",
625 mod->name, section_name);
626 goto out_elf_end;
627 } 847 }
848
849 snprintf(dso_name, sizeof(dso_name),
850 "%s%s", self->short_name, section_name);
851
852 curr_map = thread__find_map_by_name(thread, dso_name);
853 if (curr_map == NULL) {
854 u64 start = sym.st_value;
855
856 if (kmodule)
857 start += map->start + shdr.sh_offset;
858
859 curr_dso = dso__new(dso_name);
860 if (curr_dso == NULL)
861 goto out_elf_end;
862 curr_map = map__new2(start, curr_dso,
863 MAP__FUNCTION);
864 if (curr_map == NULL) {
865 dso__delete(curr_dso);
866 goto out_elf_end;
867 }
868 curr_map->map_ip = identity__map_ip;
869 curr_map->unmap_ip = identity__map_ip;
870 curr_dso->origin = DSO__ORIG_KERNEL;
871 __thread__insert_map(kthread, curr_map);
872 dsos__add(&dsos__kernel, curr_dso);
873 } else
874 curr_dso = curr_map->dso;
875
876 goto new_symbol;
877 }
878
879 if (curr_dso->adjust_symbols) {
880 pr_debug2("adjusting symbol: st_value: %Lx sh_addr: "
881 "%Lx sh_offset: %Lx\n", (u64)sym.st_value,
882 (u64)shdr.sh_addr, (u64)shdr.sh_offset);
883 sym.st_value -= shdr.sh_addr - shdr.sh_offset;
628 } 884 }
629 /* 885 /*
630 * We need to figure out if the object was created from C++ sources 886 * We need to figure out if the object was created from C++ sources
631 * DWARF DW_compile_unit has this, but we don't always have access 887 * DWARF DW_compile_unit has this, but we don't always have access
632 * to it... 888 * to it...
633 */ 889 */
634 elf_name = elf_sym__name(&sym, symstrs);
635 demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); 890 demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI);
636 if (demangled != NULL) 891 if (demangled != NULL)
637 elf_name = demangled; 892 elf_name = demangled;
638 893new_symbol:
639 f = symbol__new(sym.st_value, sym.st_size, elf_name, 894 f = symbol__new(sym.st_value, sym.st_size, elf_name);
640 self->sym_priv_size, obj_start, v);
641 free(demangled); 895 free(demangled);
642 if (!f) 896 if (!f)
643 goto out_elf_end; 897 goto out_elf_end;
644 898
645 if (filter && filter(self, f)) 899 if (filter && filter(curr_map, f))
646 symbol__delete(f, self->sym_priv_size); 900 symbol__delete(f);
647 else { 901 else {
648 f->module = mod; 902 symbols__insert(&curr_dso->symbols[curr_map->type], f);
649 dso__insert_symbol(self, f);
650 nr++; 903 nr++;
651 } 904 }
652 } 905 }
653 906
907 /*
908 * For misannotated, zeroed, ASM function sizes.
909 */
910 if (nr > 0)
911 symbols__fixup_end(&self->symbols[map->type]);
654 err = nr; 912 err = nr;
655out_elf_end: 913out_elf_end:
656 elf_end(elf); 914 elf_end(elf);
@@ -658,63 +916,153 @@ out_close:
658 return err; 916 return err;
659} 917}
660 918
661#define BUILD_ID_SIZE 128 919static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
920{
921 return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
922}
662 923
663static char *dso__read_build_id(struct dso *self, int v) 924static bool __dsos__read_build_ids(struct list_head *head)
664{ 925{
665 int i; 926 bool have_build_id = false;
927 struct dso *pos;
928
929 list_for_each_entry(pos, head, node)
930 if (filename__read_build_id(pos->long_name, pos->build_id,
931 sizeof(pos->build_id)) > 0) {
932 have_build_id = true;
933 pos->has_build_id = true;
934 }
935
936 return have_build_id;
937}
938
939bool dsos__read_build_ids(void)
940{
941 return __dsos__read_build_ids(&dsos__kernel) ||
942 __dsos__read_build_ids(&dsos__user);
943}
944
945/*
946 * Align offset to 4 bytes as needed for note name and descriptor data.
947 */
948#define NOTE_ALIGN(n) (((n) + 3) & -4U)
949
950int filename__read_build_id(const char *filename, void *bf, size_t size)
951{
952 int fd, err = -1;
666 GElf_Ehdr ehdr; 953 GElf_Ehdr ehdr;
667 GElf_Shdr shdr; 954 GElf_Shdr shdr;
668 Elf_Data *build_id_data; 955 Elf_Data *data;
669 Elf_Scn *sec; 956 Elf_Scn *sec;
670 char *build_id = NULL, *bid; 957 Elf_Kind ek;
671 unsigned char *raw; 958 void *ptr;
672 Elf *elf; 959 Elf *elf;
673 int fd = open(self->name, O_RDONLY);
674 960
961 if (size < BUILD_ID_SIZE)
962 goto out;
963
964 fd = open(filename, O_RDONLY);
675 if (fd < 0) 965 if (fd < 0)
676 goto out; 966 goto out;
677 967
678 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); 968 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
679 if (elf == NULL) { 969 if (elf == NULL) {
680 if (v) 970 pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
681 fprintf(stderr, "%s: cannot read %s ELF file.\n",
682 __func__, self->name);
683 goto out_close; 971 goto out_close;
684 } 972 }
685 973
974 ek = elf_kind(elf);
975 if (ek != ELF_K_ELF)
976 goto out_elf_end;
977
686 if (gelf_getehdr(elf, &ehdr) == NULL) { 978 if (gelf_getehdr(elf, &ehdr) == NULL) {
687 if (v) 979 pr_err("%s: cannot get elf header.\n", __func__);
688 fprintf(stderr, "%s: cannot get elf header.\n", __func__);
689 goto out_elf_end; 980 goto out_elf_end;
690 } 981 }
691 982
692 sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); 983 sec = elf_section_by_name(elf, &ehdr, &shdr,
693 if (sec == NULL) 984 ".note.gnu.build-id", NULL);
694 goto out_elf_end; 985 if (sec == NULL) {
986 sec = elf_section_by_name(elf, &ehdr, &shdr,
987 ".notes", NULL);
988 if (sec == NULL)
989 goto out_elf_end;
990 }
695 991
696 build_id_data = elf_getdata(sec, NULL); 992 data = elf_getdata(sec, NULL);
697 if (build_id_data == NULL) 993 if (data == NULL)
698 goto out_elf_end;
699 build_id = malloc(BUILD_ID_SIZE);
700 if (build_id == NULL)
701 goto out_elf_end; 994 goto out_elf_end;
702 raw = build_id_data->d_buf + 16;
703 bid = build_id;
704 995
705 for (i = 0; i < 20; ++i) { 996 ptr = data->d_buf;
706 sprintf(bid, "%02x", *raw); 997 while (ptr < (data->d_buf + data->d_size)) {
707 ++raw; 998 GElf_Nhdr *nhdr = ptr;
708 bid += 2; 999 int namesz = NOTE_ALIGN(nhdr->n_namesz),
1000 descsz = NOTE_ALIGN(nhdr->n_descsz);
1001 const char *name;
1002
1003 ptr += sizeof(*nhdr);
1004 name = ptr;
1005 ptr += namesz;
1006 if (nhdr->n_type == NT_GNU_BUILD_ID &&
1007 nhdr->n_namesz == sizeof("GNU")) {
1008 if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
1009 memcpy(bf, ptr, BUILD_ID_SIZE);
1010 err = BUILD_ID_SIZE;
1011 break;
1012 }
1013 }
1014 ptr += descsz;
709 } 1015 }
710 if (v >= 2)
711 printf("%s(%s): %s\n", __func__, self->name, build_id);
712out_elf_end: 1016out_elf_end:
713 elf_end(elf); 1017 elf_end(elf);
714out_close: 1018out_close:
715 close(fd); 1019 close(fd);
716out: 1020out:
717 return build_id; 1021 return err;
1022}
1023
1024int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
1025{
1026 int fd, err = -1;
1027
1028 if (size < BUILD_ID_SIZE)
1029 goto out;
1030
1031 fd = open(filename, O_RDONLY);
1032 if (fd < 0)
1033 goto out;
1034
1035 while (1) {
1036 char bf[BUFSIZ];
1037 GElf_Nhdr nhdr;
1038 int namesz, descsz;
1039
1040 if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
1041 break;
1042
1043 namesz = NOTE_ALIGN(nhdr.n_namesz);
1044 descsz = NOTE_ALIGN(nhdr.n_descsz);
1045 if (nhdr.n_type == NT_GNU_BUILD_ID &&
1046 nhdr.n_namesz == sizeof("GNU")) {
1047 if (read(fd, bf, namesz) != namesz)
1048 break;
1049 if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
1050 if (read(fd, build_id,
1051 BUILD_ID_SIZE) == BUILD_ID_SIZE) {
1052 err = 0;
1053 break;
1054 }
1055 } else if (read(fd, bf, descsz) != descsz)
1056 break;
1057 } else {
1058 int n = namesz + descsz;
1059 if (read(fd, bf, n) != n)
1060 break;
1061 }
1062 }
1063 close(fd);
1064out:
1065 return err;
718} 1066}
719 1067
720char dso__symtab_origin(const struct dso *self) 1068char dso__symtab_origin(const struct dso *self)
@@ -726,6 +1074,7 @@ char dso__symtab_origin(const struct dso *self)
726 [DSO__ORIG_UBUNTU] = 'u', 1074 [DSO__ORIG_UBUNTU] = 'u',
727 [DSO__ORIG_BUILDID] = 'b', 1075 [DSO__ORIG_BUILDID] = 'b',
728 [DSO__ORIG_DSO] = 'd', 1076 [DSO__ORIG_DSO] = 'd',
1077 [DSO__ORIG_KMODULE] = 'K',
729 }; 1078 };
730 1079
731 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) 1080 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
@@ -733,20 +1082,27 @@ char dso__symtab_origin(const struct dso *self)
733 return origin[self->origin]; 1082 return origin[self->origin];
734} 1083}
735 1084
736int dso__load(struct dso *self, symbol_filter_t filter, int v) 1085int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
737{ 1086{
738 int size = PATH_MAX; 1087 int size = PATH_MAX;
739 char *name = malloc(size), *build_id = NULL; 1088 char *name;
1089 u8 build_id[BUILD_ID_SIZE];
740 int ret = -1; 1090 int ret = -1;
741 int fd; 1091 int fd;
742 1092
1093 dso__set_loaded(self, map->type);
1094
1095 if (self->kernel)
1096 return dso__load_kernel_sym(self, map, kthread, filter);
1097
1098 name = malloc(size);
743 if (!name) 1099 if (!name)
744 return -1; 1100 return -1;
745 1101
746 self->adjust_symbols = 0; 1102 self->adjust_symbols = 0;
747 1103
748 if (strncmp(self->name, "/tmp/perf-", 10) == 0) { 1104 if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
749 ret = dso__load_perf_map(self, filter, v); 1105 ret = dso__load_perf_map(self, map, filter);
750 self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : 1106 self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
751 DSO__ORIG_NOT_FOUND; 1107 DSO__ORIG_NOT_FOUND;
752 return ret; 1108 return ret;
@@ -759,34 +1115,50 @@ more:
759 self->origin++; 1115 self->origin++;
760 switch (self->origin) { 1116 switch (self->origin) {
761 case DSO__ORIG_FEDORA: 1117 case DSO__ORIG_FEDORA:
762 snprintf(name, size, "/usr/lib/debug%s.debug", self->name); 1118 snprintf(name, size, "/usr/lib/debug%s.debug",
1119 self->long_name);
763 break; 1120 break;
764 case DSO__ORIG_UBUNTU: 1121 case DSO__ORIG_UBUNTU:
765 snprintf(name, size, "/usr/lib/debug%s", self->name); 1122 snprintf(name, size, "/usr/lib/debug%s",
1123 self->long_name);
766 break; 1124 break;
767 case DSO__ORIG_BUILDID: 1125 case DSO__ORIG_BUILDID:
768 build_id = dso__read_build_id(self, v); 1126 if (filename__read_build_id(self->long_name, build_id,
769 if (build_id != NULL) { 1127 sizeof(build_id))) {
1128 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
1129
1130 build_id__sprintf(build_id, sizeof(build_id),
1131 build_id_hex);
770 snprintf(name, size, 1132 snprintf(name, size,
771 "/usr/lib/debug/.build-id/%.2s/%s.debug", 1133 "/usr/lib/debug/.build-id/%.2s/%s.debug",
772 build_id, build_id + 2); 1134 build_id_hex, build_id_hex + 2);
773 free(build_id); 1135 if (self->has_build_id)
1136 goto compare_build_id;
774 break; 1137 break;
775 } 1138 }
776 self->origin++; 1139 self->origin++;
777 /* Fall thru */ 1140 /* Fall thru */
778 case DSO__ORIG_DSO: 1141 case DSO__ORIG_DSO:
779 snprintf(name, size, "%s", self->name); 1142 snprintf(name, size, "%s", self->long_name);
780 break; 1143 break;
781 1144
782 default: 1145 default:
783 goto out; 1146 goto out;
784 } 1147 }
785 1148
1149 if (self->has_build_id) {
1150 if (filename__read_build_id(name, build_id,
1151 sizeof(build_id)) < 0)
1152 goto more;
1153compare_build_id:
1154 if (!dso__build_id_equal(self, build_id))
1155 goto more;
1156 }
1157
786 fd = open(name, O_RDONLY); 1158 fd = open(name, O_RDONLY);
787 } while (fd < 0); 1159 } while (fd < 0);
788 1160
789 ret = dso__load_sym(self, fd, name, filter, v, NULL); 1161 ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0);
790 close(fd); 1162 close(fd);
791 1163
792 /* 1164 /*
@@ -796,7 +1168,7 @@ more:
796 goto more; 1168 goto more;
797 1169
798 if (ret > 0) { 1170 if (ret > 0) {
799 int nr_plt = dso__synthesize_plt_symbols(self, v); 1171 int nr_plt = dso__synthesize_plt_symbols(self, map, filter);
800 if (nr_plt > 0) 1172 if (nr_plt > 0)
801 ret += nr_plt; 1173 ret += nr_plt;
802 } 1174 }
@@ -807,151 +1179,279 @@ out:
807 return ret; 1179 return ret;
808} 1180}
809 1181
810static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, 1182static struct map *thread__find_map_by_name(struct thread *self, char *name)
811 symbol_filter_t filter, int v)
812{ 1183{
813 struct module *mod = mod_dso__find_module(mods, name); 1184 struct rb_node *nd;
814 int err = 0, fd;
815 1185
816 if (mod == NULL || !mod->active) 1186 for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
817 return err; 1187 struct map *map = rb_entry(nd, struct map, rb_node);
818 1188
819 fd = open(mod->path, O_RDONLY); 1189 if (map->dso && strcmp(map->dso->name, name) == 0)
1190 return map;
1191 }
820 1192
821 if (fd < 0) 1193 return NULL;
822 return err; 1194}
823 1195
824 err = dso__load_sym(self, fd, name, filter, v, mod); 1196static int dsos__set_modules_path_dir(char *dirname)
825 close(fd); 1197{
1198 struct dirent *dent;
1199 DIR *dir = opendir(dirname);
826 1200
827 return err; 1201 if (!dir) {
1202 pr_debug("%s: cannot open %s dir\n", __func__, dirname);
1203 return -1;
1204 }
1205
1206 while ((dent = readdir(dir)) != NULL) {
1207 char path[PATH_MAX];
1208
1209 if (dent->d_type == DT_DIR) {
1210 if (!strcmp(dent->d_name, ".") ||
1211 !strcmp(dent->d_name, ".."))
1212 continue;
1213
1214 snprintf(path, sizeof(path), "%s/%s",
1215 dirname, dent->d_name);
1216 if (dsos__set_modules_path_dir(path) < 0)
1217 goto failure;
1218 } else {
1219 char *dot = strrchr(dent->d_name, '.'),
1220 dso_name[PATH_MAX];
1221 struct map *map;
1222 char *long_name;
1223
1224 if (dot == NULL || strcmp(dot, ".ko"))
1225 continue;
1226 snprintf(dso_name, sizeof(dso_name), "[%.*s]",
1227 (int)(dot - dent->d_name), dent->d_name);
1228
1229 strxfrchar(dso_name, '-', '_');
1230 map = thread__find_map_by_name(kthread, dso_name);
1231 if (map == NULL)
1232 continue;
1233
1234 snprintf(path, sizeof(path), "%s/%s",
1235 dirname, dent->d_name);
1236
1237 long_name = strdup(path);
1238 if (long_name == NULL)
1239 goto failure;
1240 dso__set_long_name(map->dso, long_name);
1241 }
1242 }
1243
1244 return 0;
1245failure:
1246 closedir(dir);
1247 return -1;
828} 1248}
829 1249
830int dso__load_modules(struct dso *self, symbol_filter_t filter, int v) 1250static int dsos__set_modules_path(void)
831{ 1251{
832 struct mod_dso *mods = mod_dso__new_dso("modules"); 1252 struct utsname uts;
833 struct module *pos; 1253 char modules_path[PATH_MAX];
834 struct rb_node *next;
835 int err, count = 0;
836 1254
837 err = mod_dso__load_modules(mods); 1255 if (uname(&uts) < 0)
838 1256 return -1;
839 if (err <= 0)
840 return err;
841 1257
842 /* 1258 snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
843 * Iterate over modules, and load active symbols. 1259 uts.release);
844 */
845 next = rb_first(&mods->mods);
846 while (next) {
847 pos = rb_entry(next, struct module, rb_node);
848 err = dso__load_module(self, mods, pos->name, filter, v);
849 1260
850 if (err < 0) 1261 return dsos__set_modules_path_dir(modules_path);
851 break; 1262}
852 1263
853 next = rb_next(&pos->rb_node); 1264/*
854 count += err; 1265 * Constructor variant for modules (where we know from /proc/modules where
855 } 1266 * they are loaded) and for vmlinux, where only after we load all the
1267 * symbols we'll know where it starts and ends.
1268 */
1269static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
1270{
1271 struct map *self = malloc(sizeof(*self));
856 1272
857 if (err < 0) { 1273 if (self != NULL) {
858 mod_dso__delete_modules(mods); 1274 /*
859 mod_dso__delete_self(mods); 1275 * ->end will be filled after we load all the symbols
860 return err; 1276 */
1277 map__init(self, type, start, 0, 0, dso);
861 } 1278 }
862 1279
863 return count; 1280 return self;
864} 1281}
865 1282
866static inline void dso__fill_symbol_holes(struct dso *self) 1283static int thread__create_module_maps(struct thread *self)
867{ 1284{
868 struct symbol *prev = NULL; 1285 char *line = NULL;
869 struct rb_node *nd; 1286 size_t n;
1287 FILE *file = fopen("/proc/modules", "r");
1288 struct map *map;
870 1289
871 for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) { 1290 if (file == NULL)
872 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 1291 return -1;
873 1292
874 if (prev) { 1293 while (!feof(file)) {
875 u64 hole = 0; 1294 char name[PATH_MAX];
876 int alias = pos->start == prev->start; 1295 u64 start;
1296 struct dso *dso;
1297 char *sep;
1298 int line_len;
877 1299
878 if (!alias) 1300 line_len = getline(&line, &n, file);
879 hole = prev->start - pos->end - 1; 1301 if (line_len < 0)
1302 break;
880 1303
881 if (hole || alias) { 1304 if (!line)
882 if (alias) 1305 goto out_failure;
883 pos->end = prev->end; 1306
884 else if (hole) 1307 line[--line_len] = '\0'; /* \n */
885 pos->end = prev->start - 1; 1308
886 } 1309 sep = strrchr(line, 'x');
1310 if (sep == NULL)
1311 continue;
1312
1313 hex2u64(sep + 1, &start);
1314
1315 sep = strchr(line, ' ');
1316 if (sep == NULL)
1317 continue;
1318
1319 *sep = '\0';
1320
1321 snprintf(name, sizeof(name), "[%s]", line);
1322 dso = dso__new(name);
1323
1324 if (dso == NULL)
1325 goto out_delete_line;
1326
1327 map = map__new2(start, dso, MAP__FUNCTION);
1328 if (map == NULL) {
1329 dso__delete(dso);
1330 goto out_delete_line;
887 } 1331 }
888 prev = pos; 1332
1333 snprintf(name, sizeof(name),
1334 "/sys/module/%s/notes/.note.gnu.build-id", line);
1335 if (sysfs__read_build_id(name, dso->build_id,
1336 sizeof(dso->build_id)) == 0)
1337 dso->has_build_id = true;
1338
1339 dso->origin = DSO__ORIG_KMODULE;
1340 __thread__insert_map(self, map);
1341 dsos__add(&dsos__kernel, dso);
889 } 1342 }
1343
1344 free(line);
1345 fclose(file);
1346
1347 return dsos__set_modules_path();
1348
1349out_delete_line:
1350 free(line);
1351out_failure:
1352 return -1;
890} 1353}
891 1354
892static int dso__load_vmlinux(struct dso *self, const char *vmlinux, 1355static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread,
893 symbol_filter_t filter, int v) 1356 const char *vmlinux, symbol_filter_t filter)
894{ 1357{
895 int err, fd = open(vmlinux, O_RDONLY); 1358 int err = -1, fd;
896 1359
897 if (fd < 0) 1360 if (self->has_build_id) {
898 return -1; 1361 u8 build_id[BUILD_ID_SIZE];
899 1362
900 err = dso__load_sym(self, fd, vmlinux, filter, v, NULL); 1363 if (filename__read_build_id(vmlinux, build_id,
1364 sizeof(build_id)) < 0) {
1365 pr_debug("No build_id in %s, ignoring it\n", vmlinux);
1366 return -1;
1367 }
1368 if (!dso__build_id_equal(self, build_id)) {
1369 char expected_build_id[BUILD_ID_SIZE * 2 + 1],
1370 vmlinux_build_id[BUILD_ID_SIZE * 2 + 1];
1371
1372 build_id__sprintf(self->build_id,
1373 sizeof(self->build_id),
1374 expected_build_id);
1375 build_id__sprintf(build_id, sizeof(build_id),
1376 vmlinux_build_id);
1377 pr_debug("build_id in %s is %s while expected is %s, "
1378 "ignoring it\n", vmlinux, vmlinux_build_id,
1379 expected_build_id);
1380 return -1;
1381 }
1382 }
901 1383
902 if (err > 0) 1384 fd = open(vmlinux, O_RDONLY);
903 dso__fill_symbol_holes(self); 1385 if (fd < 0)
1386 return -1;
904 1387
1388 dso__set_loaded(self, map->type);
1389 err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0);
905 close(fd); 1390 close(fd);
906 1391
907 return err; 1392 return err;
908} 1393}
909 1394
910int dso__load_kernel(struct dso *self, const char *vmlinux, 1395static int dso__load_kernel_sym(struct dso *self, struct map *map,
911 symbol_filter_t filter, int v, int use_modules) 1396 struct thread *thread, symbol_filter_t filter)
912{ 1397{
913 int err = -1; 1398 int err;
914 1399 bool is_kallsyms;
915 if (vmlinux) { 1400
916 err = dso__load_vmlinux(self, vmlinux, filter, v); 1401 if (vmlinux_path != NULL) {
917 if (err > 0 && use_modules) { 1402 int i;
918 int syms = dso__load_modules(self, filter, v); 1403 pr_debug("Looking at the vmlinux_path (%d entries long)\n",
919 1404 vmlinux_path__nr_entries);
920 if (syms < 0) { 1405 for (i = 0; i < vmlinux_path__nr_entries; ++i) {
921 fprintf(stderr, "dso__load_modules failed!\n"); 1406 err = dso__load_vmlinux(self, map, thread,
922 return syms; 1407 vmlinux_path[i], filter);
1408 if (err > 0) {
1409 pr_debug("Using %s for symbols\n",
1410 vmlinux_path[i]);
1411 dso__set_long_name(self,
1412 strdup(vmlinux_path[i]));
1413 goto out_fixup;
923 } 1414 }
924 err += syms;
925 } 1415 }
926 } 1416 }
927 1417
928 if (err <= 0) 1418 is_kallsyms = self->long_name[0] == '[';
929 err = dso__load_kallsyms(self, filter, v); 1419 if (is_kallsyms)
1420 goto do_kallsyms;
930 1421
931 if (err > 0) 1422 err = dso__load_vmlinux(self, map, thread, self->long_name, filter);
932 self->origin = DSO__ORIG_KERNEL; 1423 if (err <= 0) {
1424 pr_info("The file %s cannot be used, "
1425 "trying to use /proc/kallsyms...", self->long_name);
1426do_kallsyms:
1427 err = dso__load_kallsyms(self, map, thread, filter);
1428 if (err > 0 && !is_kallsyms)
1429 dso__set_long_name(self, strdup("[kernel.kallsyms]"));
1430 }
1431
1432 if (err > 0) {
1433out_fixup:
1434 map__fixup_start(map);
1435 map__fixup_end(map);
1436 }
933 1437
934 return err; 1438 return err;
935} 1439}
936 1440
937LIST_HEAD(dsos); 1441LIST_HEAD(dsos__user);
938struct dso *kernel_dso; 1442LIST_HEAD(dsos__kernel);
939struct dso *vdso; 1443struct dso *vdso;
940struct dso *hypervisor_dso;
941
942const char *vmlinux_name = "vmlinux";
943int modules;
944 1444
945static void dsos__add(struct dso *dso) 1445static void dsos__add(struct list_head *head, struct dso *dso)
946{ 1446{
947 list_add_tail(&dso->node, &dsos); 1447 list_add_tail(&dso->node, head);
948} 1448}
949 1449
950static struct dso *dsos__find(const char *name) 1450static struct dso *dsos__find(struct list_head *head, const char *name)
951{ 1451{
952 struct dso *pos; 1452 struct dso *pos;
953 1453
954 list_for_each_entry(pos, &dsos, node) 1454 list_for_each_entry(pos, head, node)
955 if (strcmp(pos->name, name) == 0) 1455 if (strcmp(pos->name, name) == 0)
956 return pos; 1456 return pos;
957 return NULL; 1457 return NULL;
@@ -959,79 +1459,170 @@ static struct dso *dsos__find(const char *name)
959 1459
960struct dso *dsos__findnew(const char *name) 1460struct dso *dsos__findnew(const char *name)
961{ 1461{
962 struct dso *dso = dsos__find(name); 1462 struct dso *dso = dsos__find(&dsos__user, name);
963 int nr;
964
965 if (dso)
966 return dso;
967
968 dso = dso__new(name, 0);
969 if (!dso)
970 goto out_delete_dso;
971 1463
972 nr = dso__load(dso, NULL, verbose); 1464 if (!dso) {
973 if (nr < 0) { 1465 dso = dso__new(name);
974 eprintf("Failed to open: %s\n", name); 1466 if (dso != NULL) {
975 goto out_delete_dso; 1467 dsos__add(&dsos__user, dso);
1468 dso__set_basename(dso);
1469 }
976 } 1470 }
977 if (!nr)
978 eprintf("No symbols found in: %s, maybe install a debug package?\n", name);
979
980 dsos__add(dso);
981 1471
982 return dso; 1472 return dso;
1473}
983 1474
984out_delete_dso: 1475static void __dsos__fprintf(struct list_head *head, FILE *fp)
985 dso__delete(dso); 1476{
986 return NULL; 1477 struct dso *pos;
1478
1479 list_for_each_entry(pos, head, node) {
1480 int i;
1481 for (i = 0; i < MAP__NR_TYPES; ++i)
1482 dso__fprintf(pos, i, fp);
1483 }
987} 1484}
988 1485
989void dsos__fprintf(FILE *fp) 1486void dsos__fprintf(FILE *fp)
990{ 1487{
1488 __dsos__fprintf(&dsos__kernel, fp);
1489 __dsos__fprintf(&dsos__user, fp);
1490}
1491
1492static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
1493{
991 struct dso *pos; 1494 struct dso *pos;
1495 size_t ret = 0;
992 1496
993 list_for_each_entry(pos, &dsos, node) 1497 list_for_each_entry(pos, head, node) {
994 dso__fprintf(pos, fp); 1498 ret += dso__fprintf_buildid(pos, fp);
1499 ret += fprintf(fp, " %s\n", pos->long_name);
1500 }
1501 return ret;
995} 1502}
996 1503
997static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) 1504size_t dsos__fprintf_buildid(FILE *fp)
998{ 1505{
999 return dso__find_symbol(dso, ip); 1506 return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
1507 __dsos__fprintf_buildid(&dsos__user, fp));
1000} 1508}
1001 1509
1002int load_kernel(void) 1510static int thread__create_kernel_map(struct thread *self, const char *vmlinux)
1003{ 1511{
1004 int err; 1512 struct map *kmap;
1513 struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
1005 1514
1006 kernel_dso = dso__new("[kernel]", 0); 1515 if (kernel == NULL)
1007 if (!kernel_dso)
1008 return -1; 1516 return -1;
1009 1517
1010 err = dso__load_kernel(kernel_dso, vmlinux_name, NULL, verbose, modules); 1518 kmap = map__new2(0, kernel, MAP__FUNCTION);
1011 if (err <= 0) { 1519 if (kmap == NULL)
1012 dso__delete(kernel_dso); 1520 goto out_delete_kernel_dso;
1013 kernel_dso = NULL;
1014 } else
1015 dsos__add(kernel_dso);
1016 1521
1017 vdso = dso__new("[vdso]", 0); 1522 kmap->map_ip = kmap->unmap_ip = identity__map_ip;
1018 if (!vdso) 1523 kernel->short_name = "[kernel]";
1019 return -1; 1524 kernel->kernel = 1;
1020 1525
1021 vdso->find_symbol = vdso__find_symbol; 1526 vdso = dso__new("[vdso]");
1527 if (vdso == NULL)
1528 goto out_delete_kernel_map;
1529 dso__set_loaded(vdso, MAP__FUNCTION);
1022 1530
1023 dsos__add(vdso); 1531 if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
1532 sizeof(kernel->build_id)) == 0)
1533 kernel->has_build_id = true;
1024 1534
1025 hypervisor_dso = dso__new("[hypervisor]", 0); 1535 __thread__insert_map(self, kmap);
1026 if (!hypervisor_dso) 1536 dsos__add(&dsos__kernel, kernel);
1027 return -1; 1537 dsos__add(&dsos__user, vdso);
1028 dsos__add(hypervisor_dso);
1029 1538
1030 return err; 1539 return 0;
1540
1541out_delete_kernel_map:
1542 map__delete(kmap);
1543out_delete_kernel_dso:
1544 dso__delete(kernel);
1545 return -1;
1546}
1547
1548static void vmlinux_path__exit(void)
1549{
1550 while (--vmlinux_path__nr_entries >= 0) {
1551 free(vmlinux_path[vmlinux_path__nr_entries]);
1552 vmlinux_path[vmlinux_path__nr_entries] = NULL;
1553 }
1554
1555 free(vmlinux_path);
1556 vmlinux_path = NULL;
1031} 1557}
1032 1558
1559static int vmlinux_path__init(void)
1560{
1561 struct utsname uts;
1562 char bf[PATH_MAX];
1563
1564 if (uname(&uts) < 0)
1565 return -1;
1566
1567 vmlinux_path = malloc(sizeof(char *) * 5);
1568 if (vmlinux_path == NULL)
1569 return -1;
1570
1571 vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
1572 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
1573 goto out_fail;
1574 ++vmlinux_path__nr_entries;
1575 vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
1576 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
1577 goto out_fail;
1578 ++vmlinux_path__nr_entries;
1579 snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
1580 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
1581 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
1582 goto out_fail;
1583 ++vmlinux_path__nr_entries;
1584 snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release);
1585 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
1586 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
1587 goto out_fail;
1588 ++vmlinux_path__nr_entries;
1589 snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
1590 uts.release);
1591 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
1592 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
1593 goto out_fail;
1594 ++vmlinux_path__nr_entries;
1595
1596 return 0;
1597
1598out_fail:
1599 vmlinux_path__exit();
1600 return -1;
1601}
1033 1602
1034void symbol__init(void) 1603int symbol__init(struct symbol_conf *conf)
1035{ 1604{
1605 const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults;
1606
1036 elf_version(EV_CURRENT); 1607 elf_version(EV_CURRENT);
1608 symbol__priv_size = pconf->priv_size;
1609 thread__init(kthread, 0);
1610
1611 if (pconf->try_vmlinux_path && vmlinux_path__init() < 0)
1612 return -1;
1613
1614 if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) {
1615 vmlinux_path__exit();
1616 return -1;
1617 }
1618
1619 kthread->use_modules = pconf->use_modules;
1620 if (pconf->use_modules && thread__create_module_maps(kthread) < 0)
1621 pr_debug("Failed to load list of modules in use, "
1622 "continuing...\n");
1623 /*
1624 * Now that we have all the maps created, just set the ->end of them:
1625 */
1626 thread__fixup_maps_end(kthread);
1627 return 0;
1037} 1628}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 829da9edba64..17003efa0b39 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -1,11 +1,11 @@
1#ifndef _PERF_SYMBOL_ 1#ifndef __PERF_SYMBOL
2#define _PERF_SYMBOL_ 1 2#define __PERF_SYMBOL 1
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <stdbool.h>
5#include "types.h" 6#include "types.h"
6#include <linux/list.h> 7#include <linux/list.h>
7#include <linux/rbtree.h> 8#include <linux/rbtree.h>
8#include "module.h"
9#include "event.h" 9#include "event.h"
10 10
11#ifdef HAVE_CPLUS_DEMANGLE 11#ifdef HAVE_CPLUS_DEMANGLE
@@ -46,57 +46,75 @@ struct symbol {
46 struct rb_node rb_node; 46 struct rb_node rb_node;
47 u64 start; 47 u64 start;
48 u64 end; 48 u64 end;
49 u64 obj_start;
50 u64 hist_sum;
51 u64 *hist;
52 struct module *module;
53 void *priv;
54 char name[0]; 49 char name[0];
55}; 50};
56 51
52struct symbol_conf {
53 unsigned short priv_size;
54 bool try_vmlinux_path,
55 use_modules;
56 const char *vmlinux_name;
57};
58
59extern unsigned int symbol__priv_size;
60
61static inline void *symbol__priv(struct symbol *self)
62{
63 return ((void *)self) - symbol__priv_size;
64}
65
66struct addr_location {
67 struct thread *thread;
68 struct map *map;
69 struct symbol *sym;
70 u64 addr;
71 char level;
72};
73
57struct dso { 74struct dso {
58 struct list_head node; 75 struct list_head node;
59 struct rb_root syms; 76 struct rb_root symbols[MAP__NR_TYPES];
60 struct symbol *(*find_symbol)(struct dso *, u64 ip); 77 struct symbol *(*find_symbol)(struct dso *self,
61 unsigned int sym_priv_size; 78 enum map_type type, u64 addr);
62 unsigned char adjust_symbols; 79 u8 adjust_symbols:1;
63 unsigned char slen_calculated; 80 u8 slen_calculated:1;
81 u8 has_build_id:1;
82 u8 kernel:1;
64 unsigned char origin; 83 unsigned char origin;
84 u8 loaded;
85 u8 build_id[BUILD_ID_SIZE];
86 u16 long_name_len;
87 const char *short_name;
88 char *long_name;
65 char name[0]; 89 char name[0];
66}; 90};
67 91
68extern const char *sym_hist_filter; 92struct dso *dso__new(const char *name);
69
70typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym);
71
72struct dso *dso__new(const char *name, unsigned int sym_priv_size);
73void dso__delete(struct dso *self); 93void dso__delete(struct dso *self);
74 94
75static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) 95bool dso__loaded(const struct dso *self, enum map_type type);
76{
77 return ((void *)sym) - self->sym_priv_size;
78}
79
80struct symbol *dso__find_symbol(struct dso *self, u64 ip);
81 96
82int dso__load_kernel(struct dso *self, const char *vmlinux,
83 symbol_filter_t filter, int verbose, int modules);
84int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
85int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
86struct dso *dsos__findnew(const char *name); 97struct dso *dsos__findnew(const char *name);
98int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
87void dsos__fprintf(FILE *fp); 99void dsos__fprintf(FILE *fp);
100size_t dsos__fprintf_buildid(FILE *fp);
88 101
89size_t dso__fprintf(struct dso *self, FILE *fp); 102size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
103size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
90char dso__symtab_origin(const struct dso *self); 104char dso__symtab_origin(const struct dso *self);
105void dso__set_build_id(struct dso *self, void *build_id);
106
107int filename__read_build_id(const char *filename, void *bf, size_t size);
108int sysfs__read_build_id(const char *filename, void *bf, size_t size);
109bool dsos__read_build_ids(void);
110int build_id__sprintf(u8 *self, int len, char *bf);
91 111
92int load_kernel(void); 112size_t kernel_maps__fprintf(FILE *fp);
93 113
94void symbol__init(void); 114int symbol__init(struct symbol_conf *conf);
95 115
96extern struct list_head dsos; 116struct thread;
97extern struct dso *kernel_dso; 117struct thread *kthread;
118extern struct list_head dsos__user, dsos__kernel;
98extern struct dso *vdso; 119extern struct dso *vdso;
99extern struct dso *hypervisor_dso; 120#endif /* __PERF_SYMBOL */
100extern const char *vmlinux_name;
101extern int modules;
102#endif /* _PERF_SYMBOL_ */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45efb5db0d19..603f5610861b 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,16 +6,29 @@
6#include "util.h" 6#include "util.h"
7#include "debug.h" 7#include "debug.h"
8 8
9static struct rb_root threads;
10static struct thread *last_match;
11
12void thread__init(struct thread *self, pid_t pid)
13{
14 int i;
15 self->pid = pid;
16 self->comm = NULL;
17 for (i = 0; i < MAP__NR_TYPES; ++i) {
18 self->maps[i] = RB_ROOT;
19 INIT_LIST_HEAD(&self->removed_maps[i]);
20 }
21}
22
9static struct thread *thread__new(pid_t pid) 23static struct thread *thread__new(pid_t pid)
10{ 24{
11 struct thread *self = calloc(1, sizeof(*self)); 25 struct thread *self = zalloc(sizeof(*self));
12 26
13 if (self != NULL) { 27 if (self != NULL) {
14 self->pid = pid; 28 thread__init(self, pid);
15 self->comm = malloc(32); 29 self->comm = malloc(32);
16 if (self->comm) 30 if (self->comm)
17 snprintf(self->comm, 32, ":%d", self->pid); 31 snprintf(self->comm, 32, ":%d", self->pid);
18 INIT_LIST_HEAD(&self->maps);
19 } 32 }
20 33
21 return self; 34 return self;
@@ -29,21 +42,84 @@ int thread__set_comm(struct thread *self, const char *comm)
29 return self->comm ? 0 : -ENOMEM; 42 return self->comm ? 0 : -ENOMEM;
30} 43}
31 44
32static size_t thread__fprintf(struct thread *self, FILE *fp) 45int thread__comm_len(struct thread *self)
46{
47 if (!self->comm_len) {
48 if (!self->comm)
49 return 0;
50 self->comm_len = strlen(self->comm);
51 }
52
53 return self->comm_len;
54}
55
56static const char *map_type__name[MAP__NR_TYPES] = {
57 [MAP__FUNCTION] = "Functions",
58};
59
60static size_t __thread__fprintf_maps(struct thread *self,
61 enum map_type type, FILE *fp)
62{
63 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
64 struct rb_node *nd;
65
66 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
67 struct map *pos = rb_entry(nd, struct map, rb_node);
68 printed += fprintf(fp, "Map:");
69 printed += map__fprintf(pos, fp);
70 if (verbose > 1) {
71 printed += dso__fprintf(pos->dso, type, fp);
72 printed += fprintf(fp, "--\n");
73 }
74 }
75
76 return printed;
77}
78
79size_t thread__fprintf_maps(struct thread *self, FILE *fp)
80{
81 size_t printed = 0, i;
82 for (i = 0; i < MAP__NR_TYPES; ++i)
83 printed += __thread__fprintf_maps(self, i, fp);
84 return printed;
85}
86
87static size_t __thread__fprintf_removed_maps(struct thread *self,
88 enum map_type type, FILE *fp)
33{ 89{
34 struct map *pos; 90 struct map *pos;
35 size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); 91 size_t printed = 0;
92
93 list_for_each_entry(pos, &self->removed_maps[type], node) {
94 printed += fprintf(fp, "Map:");
95 printed += map__fprintf(pos, fp);
96 if (verbose > 1) {
97 printed += dso__fprintf(pos->dso, type, fp);
98 printed += fprintf(fp, "--\n");
99 }
100 }
101 return printed;
102}
36 103
37 list_for_each_entry(pos, &self->maps, node) 104static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp)
38 ret += map__fprintf(pos, fp); 105{
106 size_t printed = 0, i;
107 for (i = 0; i < MAP__NR_TYPES; ++i)
108 printed += __thread__fprintf_removed_maps(self, i, fp);
109 return printed;
110}
39 111
40 return ret; 112static size_t thread__fprintf(struct thread *self, FILE *fp)
113{
114 size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
115 printed += thread__fprintf_removed_maps(self, fp);
116 printed += fprintf(fp, "Removed maps:\n");
117 return printed + thread__fprintf_removed_maps(self, fp);
41} 118}
42 119
43struct thread * 120struct thread *threads__findnew(pid_t pid)
44threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
45{ 121{
46 struct rb_node **p = &threads->rb_node; 122 struct rb_node **p = &threads.rb_node;
47 struct rb_node *parent = NULL; 123 struct rb_node *parent = NULL;
48 struct thread *th; 124 struct thread *th;
49 125
@@ -52,15 +128,15 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
52 * so most of the time we dont have to look up 128 * so most of the time we dont have to look up
53 * the full rbtree: 129 * the full rbtree:
54 */ 130 */
55 if (*last_match && (*last_match)->pid == pid) 131 if (last_match && last_match->pid == pid)
56 return *last_match; 132 return last_match;
57 133
58 while (*p != NULL) { 134 while (*p != NULL) {
59 parent = *p; 135 parent = *p;
60 th = rb_entry(parent, struct thread, rb_node); 136 th = rb_entry(parent, struct thread, rb_node);
61 137
62 if (th->pid == pid) { 138 if (th->pid == pid) {
63 *last_match = th; 139 last_match = th;
64 return th; 140 return th;
65 } 141 }
66 142
@@ -73,17 +149,16 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match)
73 th = thread__new(pid); 149 th = thread__new(pid);
74 if (th != NULL) { 150 if (th != NULL) {
75 rb_link_node(&th->rb_node, parent, p); 151 rb_link_node(&th->rb_node, parent, p);
76 rb_insert_color(&th->rb_node, threads); 152 rb_insert_color(&th->rb_node, &threads);
77 *last_match = th; 153 last_match = th;
78 } 154 }
79 155
80 return th; 156 return th;
81} 157}
82 158
83struct thread * 159struct thread *register_idle_thread(void)
84register_idle_thread(struct rb_root *threads, struct thread **last_match)
85{ 160{
86 struct thread *thread = threads__findnew(0, threads, last_match); 161 struct thread *thread = threads__findnew(0);
87 162
88 if (!thread || thread__set_comm(thread, "swapper")) { 163 if (!thread || thread__set_comm(thread, "swapper")) {
89 fprintf(stderr, "problem inserting idle task.\n"); 164 fprintf(stderr, "problem inserting idle task.\n");
@@ -93,79 +168,116 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match)
93 return thread; 168 return thread;
94} 169}
95 170
96void thread__insert_map(struct thread *self, struct map *map) 171static void thread__remove_overlappings(struct thread *self, struct map *map)
97{ 172{
98 struct map *pos, *tmp; 173 struct rb_root *root = &self->maps[map->type];
174 struct rb_node *next = rb_first(root);
99 175
100 list_for_each_entry_safe(pos, tmp, &self->maps, node) { 176 while (next) {
101 if (map__overlap(pos, map)) { 177 struct map *pos = rb_entry(next, struct map, rb_node);
102 if (verbose >= 2) { 178 next = rb_next(&pos->rb_node);
103 printf("overlapping maps:\n");
104 map__fprintf(map, stdout);
105 map__fprintf(pos, stdout);
106 }
107 179
108 if (map->start <= pos->start && map->end > pos->start) 180 if (!map__overlap(pos, map))
109 pos->start = map->end; 181 continue;
110 182
111 if (map->end >= pos->end && map->start < pos->end) 183 if (verbose >= 2) {
112 pos->end = map->start; 184 fputs("overlapping maps:\n", stderr);
185 map__fprintf(map, stderr);
186 map__fprintf(pos, stderr);
187 }
113 188
114 if (verbose >= 2) { 189 rb_erase(&pos->rb_node, root);
115 printf("after collision:\n"); 190 /*
116 map__fprintf(pos, stdout); 191 * We may have references to this map, for instance in some
117 } 192 * hist_entry instances, so just move them to a separate
193 * list.
194 */
195 list_add_tail(&pos->node, &self->removed_maps[map->type]);
196 }
197}
118 198
119 if (pos->start >= pos->end) { 199void maps__insert(struct rb_root *maps, struct map *map)
120 list_del_init(&pos->node); 200{
121 free(pos); 201 struct rb_node **p = &maps->rb_node;
122 } 202 struct rb_node *parent = NULL;
123 } 203 const u64 ip = map->start;
204 struct map *m;
205
206 while (*p != NULL) {
207 parent = *p;
208 m = rb_entry(parent, struct map, rb_node);
209 if (ip < m->start)
210 p = &(*p)->rb_left;
211 else
212 p = &(*p)->rb_right;
124 } 213 }
125 214
126 list_add_tail(&map->node, &self->maps); 215 rb_link_node(&map->rb_node, parent, p);
216 rb_insert_color(&map->rb_node, maps);
127} 217}
128 218
129int thread__fork(struct thread *self, struct thread *parent) 219struct map *maps__find(struct rb_root *maps, u64 ip)
130{ 220{
131 struct map *map; 221 struct rb_node **p = &maps->rb_node;
222 struct rb_node *parent = NULL;
223 struct map *m;
132 224
133 if (self->comm) 225 while (*p != NULL) {
134 free(self->comm); 226 parent = *p;
135 self->comm = strdup(parent->comm); 227 m = rb_entry(parent, struct map, rb_node);
136 if (!self->comm) 228 if (ip < m->start)
137 return -ENOMEM; 229 p = &(*p)->rb_left;
230 else if (ip > m->end)
231 p = &(*p)->rb_right;
232 else
233 return m;
234 }
235
236 return NULL;
237}
238
239void thread__insert_map(struct thread *self, struct map *map)
240{
241 thread__remove_overlappings(self, map);
242 maps__insert(&self->maps[map->type], map);
243}
138 244
139 list_for_each_entry(map, &parent->maps, node) { 245static int thread__clone_maps(struct thread *self, struct thread *parent,
246 enum map_type type)
247{
248 struct rb_node *nd;
249 for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
250 struct map *map = rb_entry(nd, struct map, rb_node);
140 struct map *new = map__clone(map); 251 struct map *new = map__clone(map);
141 if (!new) 252 if (new == NULL)
142 return -ENOMEM; 253 return -ENOMEM;
143 thread__insert_map(self, new); 254 thread__insert_map(self, new);
144 } 255 }
145
146 return 0; 256 return 0;
147} 257}
148 258
149struct map *thread__find_map(struct thread *self, u64 ip) 259int thread__fork(struct thread *self, struct thread *parent)
150{ 260{
151 struct map *pos; 261 int i;
152 262
153 if (self == NULL) 263 if (self->comm)
154 return NULL; 264 free(self->comm);
155 265 self->comm = strdup(parent->comm);
156 list_for_each_entry(pos, &self->maps, node) 266 if (!self->comm)
157 if (ip >= pos->start && ip <= pos->end) 267 return -ENOMEM;
158 return pos;
159 268
160 return NULL; 269 for (i = 0; i < MAP__NR_TYPES; ++i)
270 if (thread__clone_maps(self, parent, i) < 0)
271 return -ENOMEM;
272 return 0;
161} 273}
162 274
163size_t threads__fprintf(FILE *fp, struct rb_root *threads) 275size_t threads__fprintf(FILE *fp)
164{ 276{
165 size_t ret = 0; 277 size_t ret = 0;
166 struct rb_node *nd; 278 struct rb_node *nd;
167 279
168 for (nd = rb_first(threads); nd; nd = rb_next(nd)) { 280 for (nd = rb_first(&threads); nd; nd = rb_next(nd)) {
169 struct thread *pos = rb_entry(nd, struct thread, rb_node); 281 struct thread *pos = rb_entry(nd, struct thread, rb_node);
170 282
171 ret += thread__fprintf(pos, fp); 283 ret += thread__fprintf(pos, fp);
@@ -173,3 +285,15 @@ size_t threads__fprintf(FILE *fp, struct rb_root *threads)
173 285
174 return ret; 286 return ret;
175} 287}
288
289struct symbol *thread__find_symbol(struct thread *self,
290 enum map_type type, u64 addr,
291 symbol_filter_t filter)
292{
293 struct map *map = thread__find_map(self, type, addr);
294
295 if (map != NULL)
296 return map__find_symbol(map, map->map_ip(map, addr), filter);
297
298 return NULL;
299}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 32aea3c1c2ad..686d6e914d9e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -1,22 +1,56 @@
1#ifndef __PERF_THREAD_H
2#define __PERF_THREAD_H
3
1#include <linux/rbtree.h> 4#include <linux/rbtree.h>
2#include <linux/list.h>
3#include <unistd.h> 5#include <unistd.h>
4#include "symbol.h" 6#include "symbol.h"
5 7
6struct thread { 8struct thread {
7 struct rb_node rb_node; 9 struct rb_node rb_node;
8 struct list_head maps; 10 struct rb_root maps[MAP__NR_TYPES];
11 struct list_head removed_maps[MAP__NR_TYPES];
9 pid_t pid; 12 pid_t pid;
13 bool use_modules;
10 char shortname[3]; 14 char shortname[3];
11 char *comm; 15 char *comm;
16 int comm_len;
12}; 17};
13 18
19void thread__init(struct thread *self, pid_t pid);
14int thread__set_comm(struct thread *self, const char *comm); 20int thread__set_comm(struct thread *self, const char *comm);
15struct thread * 21int thread__comm_len(struct thread *self);
16threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); 22struct thread *threads__findnew(pid_t pid);
17struct thread * 23struct thread *register_idle_thread(void);
18register_idle_thread(struct rb_root *threads, struct thread **last_match);
19void thread__insert_map(struct thread *self, struct map *map); 24void thread__insert_map(struct thread *self, struct map *map);
20int thread__fork(struct thread *self, struct thread *parent); 25int thread__fork(struct thread *self, struct thread *parent);
21struct map *thread__find_map(struct thread *self, u64 ip); 26size_t thread__fprintf_maps(struct thread *self, FILE *fp);
22size_t threads__fprintf(FILE *fp, struct rb_root *threads); 27size_t threads__fprintf(FILE *fp);
28
29void maps__insert(struct rb_root *maps, struct map *map);
30struct map *maps__find(struct rb_root *maps, u64 addr);
31
32static inline struct map *thread__find_map(struct thread *self,
33 enum map_type type, u64 addr)
34{
35 return self ? maps__find(&self->maps[type], addr) : NULL;
36}
37
38static inline void __thread__insert_map(struct thread *self, struct map *map)
39{
40 maps__insert(&self->maps[map->type], map);
41}
42
43void thread__find_addr_location(struct thread *self, u8 cpumode,
44 enum map_type type, u64 addr,
45 struct addr_location *al,
46 symbol_filter_t filter);
47struct symbol *thread__find_symbol(struct thread *self,
48 enum map_type type, u64 addr,
49 symbol_filter_t filter);
50
51static inline struct symbol *
52thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter)
53{
54 return thread__find_symbol(self, MAP__FUNCTION, addr, filter);
55}
56#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index af4b0573b37f..cace35595530 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -33,11 +33,11 @@
33#include <ctype.h> 33#include <ctype.h>
34#include <errno.h> 34#include <errno.h>
35#include <stdbool.h> 35#include <stdbool.h>
36#include <linux/kernel.h>
36 37
37#include "../perf.h" 38#include "../perf.h"
38#include "trace-event.h" 39#include "trace-event.h"
39 40
40
41#define VERSION "0.5" 41#define VERSION "0.5"
42 42
43#define _STR(x) #x 43#define _STR(x) #x
@@ -483,27 +483,33 @@ static struct tracepoint_path *
483get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) 483get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
484{ 484{
485 struct tracepoint_path path, *ppath = &path; 485 struct tracepoint_path path, *ppath = &path;
486 int i; 486 int i, nr_tracepoints = 0;
487 487
488 for (i = 0; i < nb_events; i++) { 488 for (i = 0; i < nb_events; i++) {
489 if (pattrs[i].type != PERF_TYPE_TRACEPOINT) 489 if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
490 continue; 490 continue;
491 ++nr_tracepoints;
491 ppath->next = tracepoint_id_to_path(pattrs[i].config); 492 ppath->next = tracepoint_id_to_path(pattrs[i].config);
492 if (!ppath->next) 493 if (!ppath->next)
493 die("%s\n", "No memory to alloc tracepoints list"); 494 die("%s\n", "No memory to alloc tracepoints list");
494 ppath = ppath->next; 495 ppath = ppath->next;
495 } 496 }
496 497
497 return path.next; 498 return nr_tracepoints > 0 ? path.next : NULL;
498} 499}
499void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) 500
501int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
500{ 502{
501 char buf[BUFSIZ]; 503 char buf[BUFSIZ];
502 struct tracepoint_path *tps; 504 struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
505
506 /*
507 * What? No tracepoints? No sense writing anything here, bail out.
508 */
509 if (tps == NULL)
510 return -1;
503 511
504 output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); 512 output_fd = fd;
505 if (output_fd < 0)
506 die("creating file '%s'", output_file);
507 513
508 buf[0] = 23; 514 buf[0] = 23;
509 buf[1] = 8; 515 buf[1] = 8;
@@ -530,11 +536,11 @@ void read_tracing_data(struct perf_event_attr *pattrs, int nb_events)
530 page_size = getpagesize(); 536 page_size = getpagesize();
531 write_or_die(&page_size, 4); 537 write_or_die(&page_size, 4);
532 538
533 tps = get_tracepoints_path(pattrs, nb_events);
534
535 read_header_files(); 539 read_header_files();
536 read_ftrace_files(tps); 540 read_ftrace_files(tps);
537 read_event_files(tps); 541 read_event_files(tps);
538 read_proc_kallsyms(); 542 read_proc_kallsyms();
539 read_ftrace_printk(); 543 read_ftrace_printk();
544
545 return 0;
540} 546}
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 55c9659a56e2..0302405aa2ca 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -40,12 +40,19 @@ int header_page_size_size;
40int header_page_data_offset; 40int header_page_data_offset;
41int header_page_data_size; 41int header_page_data_size;
42 42
43int latency_format;
44
43static char *input_buf; 45static char *input_buf;
44static unsigned long long input_buf_ptr; 46static unsigned long long input_buf_ptr;
45static unsigned long long input_buf_siz; 47static unsigned long long input_buf_siz;
46 48
47static int cpus; 49static int cpus;
48static int long_size; 50static int long_size;
51static int is_flag_field;
52static int is_symbolic_field;
53
54static struct format_field *
55find_any_field(struct event *event, const char *name);
49 56
50static void init_input_buf(char *buf, unsigned long long size) 57static void init_input_buf(char *buf, unsigned long long size)
51{ 58{
@@ -284,18 +291,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused)
284 char *line; 291 char *line;
285 char *next = NULL; 292 char *next = NULL;
286 char *addr_str; 293 char *addr_str;
287 int ret;
288 int i; 294 int i;
289 295
290 line = strtok_r(file, "\n", &next); 296 line = strtok_r(file, "\n", &next);
291 while (line) { 297 while (line) {
298 addr_str = strsep(&line, ":");
299 if (!line) {
300 warning("error parsing print strings");
301 break;
302 }
292 item = malloc_or_die(sizeof(*item)); 303 item = malloc_or_die(sizeof(*item));
293 ret = sscanf(line, "%as : %as",
294 (float *)(void *)&addr_str, /* workaround gcc warning */
295 (float *)(void *)&item->printk);
296 item->addr = strtoull(addr_str, NULL, 16); 304 item->addr = strtoull(addr_str, NULL, 16);
297 free(addr_str); 305 /* fmt still has a space, skip it */
298 306 item->printk = strdup(line+1);
299 item->next = list; 307 item->next = list;
300 list = item; 308 list = item;
301 line = strtok_r(NULL, "\n", &next); 309 line = strtok_r(NULL, "\n", &next);
@@ -522,7 +530,10 @@ static enum event_type __read_token(char **tok)
522 last_ch = ch; 530 last_ch = ch;
523 ch = __read_char(); 531 ch = __read_char();
524 buf[i++] = ch; 532 buf[i++] = ch;
525 } while (ch != quote_ch && last_ch != '\\'); 533 /* the '\' '\' will cancel itself */
534 if (ch == '\\' && last_ch == '\\')
535 last_ch = 0;
536 } while (ch != quote_ch || last_ch == '\\');
526 /* remove the last quote */ 537 /* remove the last quote */
527 i--; 538 i--;
528 goto out; 539 goto out;
@@ -610,7 +621,7 @@ static enum event_type read_token_item(char **tok)
610static int test_type(enum event_type type, enum event_type expect) 621static int test_type(enum event_type type, enum event_type expect)
611{ 622{
612 if (type != expect) { 623 if (type != expect) {
613 die("Error: expected type %d but read %d", 624 warning("Error: expected type %d but read %d",
614 expect, type); 625 expect, type);
615 return -1; 626 return -1;
616 } 627 }
@@ -621,13 +632,13 @@ static int test_type_token(enum event_type type, char *token,
621 enum event_type expect, const char *expect_tok) 632 enum event_type expect, const char *expect_tok)
622{ 633{
623 if (type != expect) { 634 if (type != expect) {
624 die("Error: expected type %d but read %d", 635 warning("Error: expected type %d but read %d",
625 expect, type); 636 expect, type);
626 return -1; 637 return -1;
627 } 638 }
628 639
629 if (strcmp(token, expect_tok) != 0) { 640 if (strcmp(token, expect_tok) != 0) {
630 die("Error: expected '%s' but read '%s'", 641 warning("Error: expected '%s' but read '%s'",
631 expect_tok, token); 642 expect_tok, token);
632 return -1; 643 return -1;
633 } 644 }
@@ -665,7 +676,7 @@ static int __read_expected(enum event_type expect, const char *str, int newline_
665 676
666 free_token(token); 677 free_token(token);
667 678
668 return 0; 679 return ret;
669} 680}
670 681
671static int read_expected(enum event_type expect, const char *str) 682static int read_expected(enum event_type expect, const char *str)
@@ -682,10 +693,10 @@ static char *event_read_name(void)
682{ 693{
683 char *token; 694 char *token;
684 695
685 if (read_expected(EVENT_ITEM, (char *)"name") < 0) 696 if (read_expected(EVENT_ITEM, "name") < 0)
686 return NULL; 697 return NULL;
687 698
688 if (read_expected(EVENT_OP, (char *)":") < 0) 699 if (read_expected(EVENT_OP, ":") < 0)
689 return NULL; 700 return NULL;
690 701
691 if (read_expect_type(EVENT_ITEM, &token) < 0) 702 if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -703,10 +714,10 @@ static int event_read_id(void)
703 char *token; 714 char *token;
704 int id; 715 int id;
705 716
706 if (read_expected_item(EVENT_ITEM, (char *)"ID") < 0) 717 if (read_expected_item(EVENT_ITEM, "ID") < 0)
707 return -1; 718 return -1;
708 719
709 if (read_expected(EVENT_OP, (char *)":") < 0) 720 if (read_expected(EVENT_OP, ":") < 0)
710 return -1; 721 return -1;
711 722
712 if (read_expect_type(EVENT_ITEM, &token) < 0) 723 if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -721,6 +732,24 @@ static int event_read_id(void)
721 return -1; 732 return -1;
722} 733}
723 734
735static int field_is_string(struct format_field *field)
736{
737 if ((field->flags & FIELD_IS_ARRAY) &&
738 (!strstr(field->type, "char") || !strstr(field->type, "u8") ||
739 !strstr(field->type, "s8")))
740 return 1;
741
742 return 0;
743}
744
745static int field_is_dynamic(struct format_field *field)
746{
747 if (!strcmp(field->type, "__data_loc"))
748 return 1;
749
750 return 0;
751}
752
724static int event_read_fields(struct event *event, struct format_field **fields) 753static int event_read_fields(struct event *event, struct format_field **fields)
725{ 754{
726 struct format_field *field = NULL; 755 struct format_field *field = NULL;
@@ -738,7 +767,7 @@ static int event_read_fields(struct event *event, struct format_field **fields)
738 767
739 count++; 768 count++;
740 769
741 if (test_type_token(type, token, EVENT_ITEM, (char *)"field")) 770 if (test_type_token(type, token, EVENT_ITEM, "field"))
742 goto fail; 771 goto fail;
743 free_token(token); 772 free_token(token);
744 773
@@ -753,7 +782,7 @@ static int event_read_fields(struct event *event, struct format_field **fields)
753 type = read_token(&token); 782 type = read_token(&token);
754 } 783 }
755 784
756 if (test_type_token(type, token, EVENT_OP, (char *)":") < 0) 785 if (test_type_token(type, token, EVENT_OP, ":") < 0)
757 return -1; 786 return -1;
758 787
759 if (read_expect_type(EVENT_ITEM, &token) < 0) 788 if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -865,14 +894,20 @@ static int event_read_fields(struct event *event, struct format_field **fields)
865 free(brackets); 894 free(brackets);
866 } 895 }
867 896
868 if (test_type_token(type, token, EVENT_OP, (char *)";")) 897 if (field_is_string(field)) {
898 field->flags |= FIELD_IS_STRING;
899 if (field_is_dynamic(field))
900 field->flags |= FIELD_IS_DYNAMIC;
901 }
902
903 if (test_type_token(type, token, EVENT_OP, ";"))
869 goto fail; 904 goto fail;
870 free_token(token); 905 free_token(token);
871 906
872 if (read_expected(EVENT_ITEM, (char *)"offset") < 0) 907 if (read_expected(EVENT_ITEM, "offset") < 0)
873 goto fail_expect; 908 goto fail_expect;
874 909
875 if (read_expected(EVENT_OP, (char *)":") < 0) 910 if (read_expected(EVENT_OP, ":") < 0)
876 goto fail_expect; 911 goto fail_expect;
877 912
878 if (read_expect_type(EVENT_ITEM, &token)) 913 if (read_expect_type(EVENT_ITEM, &token))
@@ -880,13 +915,13 @@ static int event_read_fields(struct event *event, struct format_field **fields)
880 field->offset = strtoul(token, NULL, 0); 915 field->offset = strtoul(token, NULL, 0);
881 free_token(token); 916 free_token(token);
882 917
883 if (read_expected(EVENT_OP, (char *)";") < 0) 918 if (read_expected(EVENT_OP, ";") < 0)
884 goto fail_expect; 919 goto fail_expect;
885 920
886 if (read_expected(EVENT_ITEM, (char *)"size") < 0) 921 if (read_expected(EVENT_ITEM, "size") < 0)
887 goto fail_expect; 922 goto fail_expect;
888 923
889 if (read_expected(EVENT_OP, (char *)":") < 0) 924 if (read_expected(EVENT_OP, ":") < 0)
890 goto fail_expect; 925 goto fail_expect;
891 926
892 if (read_expect_type(EVENT_ITEM, &token)) 927 if (read_expect_type(EVENT_ITEM, &token))
@@ -894,11 +929,34 @@ static int event_read_fields(struct event *event, struct format_field **fields)
894 field->size = strtoul(token, NULL, 0); 929 field->size = strtoul(token, NULL, 0);
895 free_token(token); 930 free_token(token);
896 931
897 if (read_expected(EVENT_OP, (char *)";") < 0) 932 if (read_expected(EVENT_OP, ";") < 0)
898 goto fail_expect; 933 goto fail_expect;
899 934
900 if (read_expect_type(EVENT_NEWLINE, &token) < 0) 935 type = read_token(&token);
901 goto fail; 936 if (type != EVENT_NEWLINE) {
937 /* newer versions of the kernel have a "signed" type */
938 if (test_type_token(type, token, EVENT_ITEM, "signed"))
939 goto fail;
940
941 free_token(token);
942
943 if (read_expected(EVENT_OP, ":") < 0)
944 goto fail_expect;
945
946 if (read_expect_type(EVENT_ITEM, &token))
947 goto fail;
948
949 if (strtoul(token, NULL, 0))
950 field->flags |= FIELD_IS_SIGNED;
951
952 free_token(token);
953 if (read_expected(EVENT_OP, ";") < 0)
954 goto fail_expect;
955
956 if (read_expect_type(EVENT_NEWLINE, &token))
957 goto fail;
958 }
959
902 free_token(token); 960 free_token(token);
903 961
904 *fields = field; 962 *fields = field;
@@ -921,10 +979,10 @@ static int event_read_format(struct event *event)
921 char *token; 979 char *token;
922 int ret; 980 int ret;
923 981
924 if (read_expected_item(EVENT_ITEM, (char *)"format") < 0) 982 if (read_expected_item(EVENT_ITEM, "format") < 0)
925 return -1; 983 return -1;
926 984
927 if (read_expected(EVENT_OP, (char *)":") < 0) 985 if (read_expected(EVENT_OP, ":") < 0)
928 return -1; 986 return -1;
929 987
930 if (read_expect_type(EVENT_NEWLINE, &token)) 988 if (read_expect_type(EVENT_NEWLINE, &token))
@@ -984,7 +1042,7 @@ process_cond(struct event *event, struct print_arg *top, char **tok)
984 1042
985 *tok = NULL; 1043 *tok = NULL;
986 type = process_arg(event, left, &token); 1044 type = process_arg(event, left, &token);
987 if (test_type_token(type, token, EVENT_OP, (char *)":")) 1045 if (test_type_token(type, token, EVENT_OP, ":"))
988 goto out_free; 1046 goto out_free;
989 1047
990 arg->op.op = token; 1048 arg->op.op = token;
@@ -1004,6 +1062,35 @@ out_free:
1004 return EVENT_ERROR; 1062 return EVENT_ERROR;
1005} 1063}
1006 1064
1065static enum event_type
1066process_array(struct event *event, struct print_arg *top, char **tok)
1067{
1068 struct print_arg *arg;
1069 enum event_type type;
1070 char *token = NULL;
1071
1072 arg = malloc_or_die(sizeof(*arg));
1073 memset(arg, 0, sizeof(*arg));
1074
1075 *tok = NULL;
1076 type = process_arg(event, arg, &token);
1077 if (test_type_token(type, token, EVENT_OP, "]"))
1078 goto out_free;
1079
1080 top->op.right = arg;
1081
1082 free_token(token);
1083 type = read_token_item(&token);
1084 *tok = token;
1085
1086 return type;
1087
1088out_free:
1089 free_token(*tok);
1090 free_arg(arg);
1091 return EVENT_ERROR;
1092}
1093
1007static int get_op_prio(char *op) 1094static int get_op_prio(char *op)
1008{ 1095{
1009 if (!op[1]) { 1096 if (!op[1]) {
@@ -1128,6 +1215,8 @@ process_op(struct event *event, struct print_arg *arg, char **tok)
1128 strcmp(token, "*") == 0 || 1215 strcmp(token, "*") == 0 ||
1129 strcmp(token, "^") == 0 || 1216 strcmp(token, "^") == 0 ||
1130 strcmp(token, "/") == 0 || 1217 strcmp(token, "/") == 0 ||
1218 strcmp(token, "<") == 0 ||
1219 strcmp(token, ">") == 0 ||
1131 strcmp(token, "==") == 0 || 1220 strcmp(token, "==") == 0 ||
1132 strcmp(token, "!=") == 0) { 1221 strcmp(token, "!=") == 0) {
1133 1222
@@ -1144,17 +1233,46 @@ process_op(struct event *event, struct print_arg *arg, char **tok)
1144 1233
1145 right = malloc_or_die(sizeof(*right)); 1234 right = malloc_or_die(sizeof(*right));
1146 1235
1147 type = process_arg(event, right, tok); 1236 type = read_token_item(&token);
1237 *tok = token;
1238
1239 /* could just be a type pointer */
1240 if ((strcmp(arg->op.op, "*") == 0) &&
1241 type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
1242 if (left->type != PRINT_ATOM)
1243 die("bad pointer type");
1244 left->atom.atom = realloc(left->atom.atom,
1245 sizeof(left->atom.atom) + 3);
1246 strcat(left->atom.atom, " *");
1247 *arg = *left;
1248 free(arg);
1249
1250 return type;
1251 }
1252
1253 type = process_arg_token(event, right, tok, type);
1148 1254
1149 arg->op.right = right; 1255 arg->op.right = right;
1150 1256
1257 } else if (strcmp(token, "[") == 0) {
1258
1259 left = malloc_or_die(sizeof(*left));
1260 *left = *arg;
1261
1262 arg->type = PRINT_OP;
1263 arg->op.op = token;
1264 arg->op.left = left;
1265
1266 arg->op.prio = 0;
1267 type = process_array(event, arg, tok);
1268
1151 } else { 1269 } else {
1152 die("unknown op '%s'", token); 1270 warning("unknown op '%s'", token);
1271 event->flags |= EVENT_FL_FAILED;
1153 /* the arg is now the left side */ 1272 /* the arg is now the left side */
1154 return EVENT_NONE; 1273 return EVENT_NONE;
1155 } 1274 }
1156 1275
1157
1158 if (type == EVENT_OP) { 1276 if (type == EVENT_OP) {
1159 int prio; 1277 int prio;
1160 1278
@@ -1178,7 +1296,7 @@ process_entry(struct event *event __unused, struct print_arg *arg,
1178 char *field; 1296 char *field;
1179 char *token; 1297 char *token;
1180 1298
1181 if (read_expected(EVENT_OP, (char *)"->") < 0) 1299 if (read_expected(EVENT_OP, "->") < 0)
1182 return EVENT_ERROR; 1300 return EVENT_ERROR;
1183 1301
1184 if (read_expect_type(EVENT_ITEM, &token) < 0) 1302 if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -1188,6 +1306,16 @@ process_entry(struct event *event __unused, struct print_arg *arg,
1188 arg->type = PRINT_FIELD; 1306 arg->type = PRINT_FIELD;
1189 arg->field.name = field; 1307 arg->field.name = field;
1190 1308
1309 if (is_flag_field) {
1310 arg->field.field = find_any_field(event, arg->field.name);
1311 arg->field.field->flags |= FIELD_IS_FLAG;
1312 is_flag_field = 0;
1313 } else if (is_symbolic_field) {
1314 arg->field.field = find_any_field(event, arg->field.name);
1315 arg->field.field->flags |= FIELD_IS_SYMBOLIC;
1316 is_symbolic_field = 0;
1317 }
1318
1191 type = read_token(&token); 1319 type = read_token(&token);
1192 *tok = token; 1320 *tok = token;
1193 1321
@@ -1338,14 +1466,14 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
1338 do { 1466 do {
1339 free_token(token); 1467 free_token(token);
1340 type = read_token_item(&token); 1468 type = read_token_item(&token);
1341 if (test_type_token(type, token, EVENT_OP, (char *)"{")) 1469 if (test_type_token(type, token, EVENT_OP, "{"))
1342 break; 1470 break;
1343 1471
1344 arg = malloc_or_die(sizeof(*arg)); 1472 arg = malloc_or_die(sizeof(*arg));
1345 1473
1346 free_token(token); 1474 free_token(token);
1347 type = process_arg(event, arg, &token); 1475 type = process_arg(event, arg, &token);
1348 if (test_type_token(type, token, EVENT_DELIM, (char *)",")) 1476 if (test_type_token(type, token, EVENT_DELIM, ","))
1349 goto out_free; 1477 goto out_free;
1350 1478
1351 field = malloc_or_die(sizeof(*field)); 1479 field = malloc_or_die(sizeof(*field));
@@ -1356,7 +1484,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
1356 1484
1357 free_token(token); 1485 free_token(token);
1358 type = process_arg(event, arg, &token); 1486 type = process_arg(event, arg, &token);
1359 if (test_type_token(type, token, EVENT_OP, (char *)"}")) 1487 if (test_type_token(type, token, EVENT_OP, "}"))
1360 goto out_free; 1488 goto out_free;
1361 1489
1362 value = arg_eval(arg); 1490 value = arg_eval(arg);
@@ -1391,13 +1519,13 @@ process_flags(struct event *event, struct print_arg *arg, char **tok)
1391 memset(arg, 0, sizeof(*arg)); 1519 memset(arg, 0, sizeof(*arg));
1392 arg->type = PRINT_FLAGS; 1520 arg->type = PRINT_FLAGS;
1393 1521
1394 if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) 1522 if (read_expected_item(EVENT_DELIM, "(") < 0)
1395 return EVENT_ERROR; 1523 return EVENT_ERROR;
1396 1524
1397 field = malloc_or_die(sizeof(*field)); 1525 field = malloc_or_die(sizeof(*field));
1398 1526
1399 type = process_arg(event, field, &token); 1527 type = process_arg(event, field, &token);
1400 if (test_type_token(type, token, EVENT_DELIM, (char *)",")) 1528 if (test_type_token(type, token, EVENT_DELIM, ","))
1401 goto out_free; 1529 goto out_free;
1402 1530
1403 arg->flags.field = field; 1531 arg->flags.field = field;
@@ -1408,11 +1536,11 @@ process_flags(struct event *event, struct print_arg *arg, char **tok)
1408 type = read_token_item(&token); 1536 type = read_token_item(&token);
1409 } 1537 }
1410 1538
1411 if (test_type_token(type, token, EVENT_DELIM, (char *)",")) 1539 if (test_type_token(type, token, EVENT_DELIM, ","))
1412 goto out_free; 1540 goto out_free;
1413 1541
1414 type = process_fields(event, &arg->flags.flags, &token); 1542 type = process_fields(event, &arg->flags.flags, &token);
1415 if (test_type_token(type, token, EVENT_DELIM, (char *)")")) 1543 if (test_type_token(type, token, EVENT_DELIM, ")"))
1416 goto out_free; 1544 goto out_free;
1417 1545
1418 free_token(token); 1546 free_token(token);
@@ -1434,19 +1562,19 @@ process_symbols(struct event *event, struct print_arg *arg, char **tok)
1434 memset(arg, 0, sizeof(*arg)); 1562 memset(arg, 0, sizeof(*arg));
1435 arg->type = PRINT_SYMBOL; 1563 arg->type = PRINT_SYMBOL;
1436 1564
1437 if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) 1565 if (read_expected_item(EVENT_DELIM, "(") < 0)
1438 return EVENT_ERROR; 1566 return EVENT_ERROR;
1439 1567
1440 field = malloc_or_die(sizeof(*field)); 1568 field = malloc_or_die(sizeof(*field));
1441 1569
1442 type = process_arg(event, field, &token); 1570 type = process_arg(event, field, &token);
1443 if (test_type_token(type, token, EVENT_DELIM, (char *)",")) 1571 if (test_type_token(type, token, EVENT_DELIM, ","))
1444 goto out_free; 1572 goto out_free;
1445 1573
1446 arg->symbol.field = field; 1574 arg->symbol.field = field;
1447 1575
1448 type = process_fields(event, &arg->symbol.symbols, &token); 1576 type = process_fields(event, &arg->symbol.symbols, &token);
1449 if (test_type_token(type, token, EVENT_DELIM, (char *)")")) 1577 if (test_type_token(type, token, EVENT_DELIM, ")"))
1450 goto out_free; 1578 goto out_free;
1451 1579
1452 free_token(token); 1580 free_token(token);
@@ -1463,7 +1591,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok)
1463{ 1591{
1464 struct print_arg *item_arg; 1592 struct print_arg *item_arg;
1465 enum event_type type; 1593 enum event_type type;
1466 int ptr_cast = 0;
1467 char *token; 1594 char *token;
1468 1595
1469 type = process_arg(event, arg, &token); 1596 type = process_arg(event, arg, &token);
@@ -1471,28 +1598,13 @@ process_paren(struct event *event, struct print_arg *arg, char **tok)
1471 if (type == EVENT_ERROR) 1598 if (type == EVENT_ERROR)
1472 return EVENT_ERROR; 1599 return EVENT_ERROR;
1473 1600
1474 if (type == EVENT_OP) { 1601 if (type == EVENT_OP)
1475 /* handle the ptr casts */ 1602 type = process_op(event, arg, &token);
1476 if (!strcmp(token, "*")) {
1477 /*
1478 * FIXME: should we zapp whitespaces before ')' ?
1479 * (may require a peek_token_item())
1480 */
1481 if (__peek_char() == ')') {
1482 ptr_cast = 1;
1483 free_token(token);
1484 type = read_token_item(&token);
1485 }
1486 }
1487 if (!ptr_cast) {
1488 type = process_op(event, arg, &token);
1489 1603
1490 if (type == EVENT_ERROR) 1604 if (type == EVENT_ERROR)
1491 return EVENT_ERROR; 1605 return EVENT_ERROR;
1492 }
1493 }
1494 1606
1495 if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { 1607 if (test_type_token(type, token, EVENT_DELIM, ")")) {
1496 free_token(token); 1608 free_token(token);
1497 return EVENT_ERROR; 1609 return EVENT_ERROR;
1498 } 1610 }
@@ -1516,13 +1628,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok)
1516 item_arg = malloc_or_die(sizeof(*item_arg)); 1628 item_arg = malloc_or_die(sizeof(*item_arg));
1517 1629
1518 arg->type = PRINT_TYPE; 1630 arg->type = PRINT_TYPE;
1519 if (ptr_cast) {
1520 char *old = arg->atom.atom;
1521
1522 arg->atom.atom = malloc_or_die(strlen(old + 3));
1523 sprintf(arg->atom.atom, "%s *", old);
1524 free(old);
1525 }
1526 arg->typecast.type = arg->atom.atom; 1631 arg->typecast.type = arg->atom.atom;
1527 arg->typecast.item = item_arg; 1632 arg->typecast.item = item_arg;
1528 type = process_arg_token(event, item_arg, &token, type); 1633 type = process_arg_token(event, item_arg, &token, type);
@@ -1540,7 +1645,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok)
1540 enum event_type type; 1645 enum event_type type;
1541 char *token; 1646 char *token;
1542 1647
1543 if (read_expected(EVENT_DELIM, (char *)"(") < 0) 1648 if (read_expected(EVENT_DELIM, "(") < 0)
1544 return EVENT_ERROR; 1649 return EVENT_ERROR;
1545 1650
1546 if (read_expect_type(EVENT_ITEM, &token) < 0) 1651 if (read_expect_type(EVENT_ITEM, &token) < 0)
@@ -1550,7 +1655,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok)
1550 arg->string.string = token; 1655 arg->string.string = token;
1551 arg->string.offset = -1; 1656 arg->string.offset = -1;
1552 1657
1553 if (read_expected(EVENT_DELIM, (char *)")") < 0) 1658 if (read_expected(EVENT_DELIM, ")") < 0)
1554 return EVENT_ERROR; 1659 return EVENT_ERROR;
1555 1660
1556 type = read_token(&token); 1661 type = read_token(&token);
@@ -1578,9 +1683,11 @@ process_arg_token(struct event *event, struct print_arg *arg,
1578 type = process_entry(event, arg, &token); 1683 type = process_entry(event, arg, &token);
1579 } else if (strcmp(token, "__print_flags") == 0) { 1684 } else if (strcmp(token, "__print_flags") == 0) {
1580 free_token(token); 1685 free_token(token);
1686 is_flag_field = 1;
1581 type = process_flags(event, arg, &token); 1687 type = process_flags(event, arg, &token);
1582 } else if (strcmp(token, "__print_symbolic") == 0) { 1688 } else if (strcmp(token, "__print_symbolic") == 0) {
1583 free_token(token); 1689 free_token(token);
1690 is_symbolic_field = 1;
1584 type = process_symbols(event, arg, &token); 1691 type = process_symbols(event, arg, &token);
1585 } else if (strcmp(token, "__get_str") == 0) { 1692 } else if (strcmp(token, "__get_str") == 0) {
1586 free_token(token); 1693 free_token(token);
@@ -1637,12 +1744,18 @@ process_arg_token(struct event *event, struct print_arg *arg,
1637 1744
1638static int event_read_print_args(struct event *event, struct print_arg **list) 1745static int event_read_print_args(struct event *event, struct print_arg **list)
1639{ 1746{
1640 enum event_type type; 1747 enum event_type type = EVENT_ERROR;
1641 struct print_arg *arg; 1748 struct print_arg *arg;
1642 char *token; 1749 char *token;
1643 int args = 0; 1750 int args = 0;
1644 1751
1645 do { 1752 do {
1753 if (type == EVENT_NEWLINE) {
1754 free_token(token);
1755 type = read_token_item(&token);
1756 continue;
1757 }
1758
1646 arg = malloc_or_die(sizeof(*arg)); 1759 arg = malloc_or_die(sizeof(*arg));
1647 memset(arg, 0, sizeof(*arg)); 1760 memset(arg, 0, sizeof(*arg));
1648 1761
@@ -1683,18 +1796,19 @@ static int event_read_print(struct event *event)
1683 char *token; 1796 char *token;
1684 int ret; 1797 int ret;
1685 1798
1686 if (read_expected_item(EVENT_ITEM, (char *)"print") < 0) 1799 if (read_expected_item(EVENT_ITEM, "print") < 0)
1687 return -1; 1800 return -1;
1688 1801
1689 if (read_expected(EVENT_ITEM, (char *)"fmt") < 0) 1802 if (read_expected(EVENT_ITEM, "fmt") < 0)
1690 return -1; 1803 return -1;
1691 1804
1692 if (read_expected(EVENT_OP, (char *)":") < 0) 1805 if (read_expected(EVENT_OP, ":") < 0)
1693 return -1; 1806 return -1;
1694 1807
1695 if (read_expect_type(EVENT_DQUOTE, &token) < 0) 1808 if (read_expect_type(EVENT_DQUOTE, &token) < 0)
1696 goto fail; 1809 goto fail;
1697 1810
1811 concat:
1698 event->print_fmt.format = token; 1812 event->print_fmt.format = token;
1699 event->print_fmt.args = NULL; 1813 event->print_fmt.args = NULL;
1700 1814
@@ -1704,7 +1818,22 @@ static int event_read_print(struct event *event)
1704 if (type == EVENT_NONE) 1818 if (type == EVENT_NONE)
1705 return 0; 1819 return 0;
1706 1820
1707 if (test_type_token(type, token, EVENT_DELIM, (char *)",")) 1821 /* Handle concatination of print lines */
1822 if (type == EVENT_DQUOTE) {
1823 char *cat;
1824
1825 cat = malloc_or_die(strlen(event->print_fmt.format) +
1826 strlen(token) + 1);
1827 strcpy(cat, event->print_fmt.format);
1828 strcat(cat, token);
1829 free_token(token);
1830 free_token(event->print_fmt.format);
1831 event->print_fmt.format = NULL;
1832 token = cat;
1833 goto concat;
1834 }
1835
1836 if (test_type_token(type, token, EVENT_DELIM, ","))
1708 goto fail; 1837 goto fail;
1709 1838
1710 free_token(token); 1839 free_token(token);
@@ -1713,7 +1842,7 @@ static int event_read_print(struct event *event)
1713 if (ret < 0) 1842 if (ret < 0)
1714 return -1; 1843 return -1;
1715 1844
1716 return 0; 1845 return ret;
1717 1846
1718 fail: 1847 fail:
1719 free_token(token); 1848 free_token(token);
@@ -1759,7 +1888,7 @@ find_any_field(struct event *event, const char *name)
1759 return find_field(event, name); 1888 return find_field(event, name);
1760} 1889}
1761 1890
1762static unsigned long long read_size(void *ptr, int size) 1891unsigned long long read_size(void *ptr, int size)
1763{ 1892{
1764 switch (size) { 1893 switch (size) {
1765 case 1: 1894 case 1:
@@ -1822,37 +1951,67 @@ static int get_common_info(const char *type, int *offset, int *size)
1822 return 0; 1951 return 0;
1823} 1952}
1824 1953
1825int trace_parse_common_type(void *data) 1954static int __parse_common(void *data, int *size, int *offset,
1955 const char *name)
1826{ 1956{
1827 static int type_offset;
1828 static int type_size;
1829 int ret; 1957 int ret;
1830 1958
1831 if (!type_size) { 1959 if (!*size) {
1832 ret = get_common_info("common_type", 1960 ret = get_common_info(name, offset, size);
1833 &type_offset,
1834 &type_size);
1835 if (ret < 0) 1961 if (ret < 0)
1836 return ret; 1962 return ret;
1837 } 1963 }
1838 return read_size(data + type_offset, type_size); 1964 return read_size(data + *offset, *size);
1965}
1966
1967int trace_parse_common_type(void *data)
1968{
1969 static int type_offset;
1970 static int type_size;
1971
1972 return __parse_common(data, &type_size, &type_offset,
1973 "common_type");
1839} 1974}
1840 1975
1841static int parse_common_pid(void *data) 1976int trace_parse_common_pid(void *data)
1842{ 1977{
1843 static int pid_offset; 1978 static int pid_offset;
1844 static int pid_size; 1979 static int pid_size;
1980
1981 return __parse_common(data, &pid_size, &pid_offset,
1982 "common_pid");
1983}
1984
1985int parse_common_pc(void *data)
1986{
1987 static int pc_offset;
1988 static int pc_size;
1989
1990 return __parse_common(data, &pc_size, &pc_offset,
1991 "common_preempt_count");
1992}
1993
1994int parse_common_flags(void *data)
1995{
1996 static int flags_offset;
1997 static int flags_size;
1998
1999 return __parse_common(data, &flags_size, &flags_offset,
2000 "common_flags");
2001}
2002
2003int parse_common_lock_depth(void *data)
2004{
2005 static int ld_offset;
2006 static int ld_size;
1845 int ret; 2007 int ret;
1846 2008
1847 if (!pid_size) { 2009 ret = __parse_common(data, &ld_size, &ld_offset,
1848 ret = get_common_info("common_pid", 2010 "common_lock_depth");
1849 &pid_offset, 2011 if (ret < 0)
1850 &pid_size); 2012 return -1;
1851 if (ret < 0)
1852 return ret;
1853 }
1854 2013
1855 return read_size(data + pid_offset, pid_size); 2014 return ret;
1856} 2015}
1857 2016
1858struct event *trace_find_event(int id) 2017struct event *trace_find_event(int id)
@@ -1866,11 +2025,20 @@ struct event *trace_find_event(int id)
1866 return event; 2025 return event;
1867} 2026}
1868 2027
2028struct event *trace_find_next_event(struct event *event)
2029{
2030 if (!event)
2031 return event_list;
2032
2033 return event->next;
2034}
2035
1869static unsigned long long eval_num_arg(void *data, int size, 2036static unsigned long long eval_num_arg(void *data, int size,
1870 struct event *event, struct print_arg *arg) 2037 struct event *event, struct print_arg *arg)
1871{ 2038{
1872 unsigned long long val = 0; 2039 unsigned long long val = 0;
1873 unsigned long long left, right; 2040 unsigned long long left, right;
2041 struct print_arg *larg;
1874 2042
1875 switch (arg->type) { 2043 switch (arg->type) {
1876 case PRINT_NULL: 2044 case PRINT_NULL:
@@ -1897,6 +2065,26 @@ static unsigned long long eval_num_arg(void *data, int size,
1897 return 0; 2065 return 0;
1898 break; 2066 break;
1899 case PRINT_OP: 2067 case PRINT_OP:
2068 if (strcmp(arg->op.op, "[") == 0) {
2069 /*
2070 * Arrays are special, since we don't want
2071 * to read the arg as is.
2072 */
2073 if (arg->op.left->type != PRINT_FIELD)
2074 goto default_op; /* oops, all bets off */
2075 larg = arg->op.left;
2076 if (!larg->field.field) {
2077 larg->field.field =
2078 find_any_field(event, larg->field.name);
2079 if (!larg->field.field)
2080 die("field %s not found", larg->field.name);
2081 }
2082 right = eval_num_arg(data, size, event, arg->op.right);
2083 val = read_size(data + larg->field.field->offset +
2084 right * long_size, long_size);
2085 break;
2086 }
2087 default_op:
1900 left = eval_num_arg(data, size, event, arg->op.left); 2088 left = eval_num_arg(data, size, event, arg->op.left);
1901 right = eval_num_arg(data, size, event, arg->op.right); 2089 right = eval_num_arg(data, size, event, arg->op.right);
1902 switch (arg->op.op[0]) { 2090 switch (arg->op.op[0]) {
@@ -1947,6 +2135,12 @@ static unsigned long long eval_num_arg(void *data, int size,
1947 die("unknown op '%s'", arg->op.op); 2135 die("unknown op '%s'", arg->op.op);
1948 val = left == right; 2136 val = left == right;
1949 break; 2137 break;
2138 case '-':
2139 val = left - right;
2140 break;
2141 case '+':
2142 val = left + right;
2143 break;
1950 default: 2144 default:
1951 die("unknown op '%s'", arg->op.op); 2145 die("unknown op '%s'", arg->op.op);
1952 } 2146 }
@@ -1978,7 +2172,7 @@ static const struct flag flags[] = {
1978 { "HRTIMER_RESTART", 1 }, 2172 { "HRTIMER_RESTART", 1 },
1979}; 2173};
1980 2174
1981static unsigned long long eval_flag(const char *flag) 2175unsigned long long eval_flag(const char *flag)
1982{ 2176{
1983 int i; 2177 int i;
1984 2178
@@ -2145,8 +2339,9 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
2145 case 'u': 2339 case 'u':
2146 case 'x': 2340 case 'x':
2147 case 'i': 2341 case 'i':
2148 bptr = (void *)(((unsigned long)bptr + (long_size - 1)) & 2342 /* the pointers are always 4 bytes aligned */
2149 ~(long_size - 1)); 2343 bptr = (void *)(((unsigned long)bptr + 3) &
2344 ~3);
2150 switch (ls) { 2345 switch (ls) {
2151 case 0: 2346 case 0:
2152 case 1: 2347 case 1:
@@ -2270,7 +2465,27 @@ static void pretty_print(void *data, int size, struct event *event)
2270 2465
2271 for (; *ptr; ptr++) { 2466 for (; *ptr; ptr++) {
2272 ls = 0; 2467 ls = 0;
2273 if (*ptr == '%') { 2468 if (*ptr == '\\') {
2469 ptr++;
2470 switch (*ptr) {
2471 case 'n':
2472 printf("\n");
2473 break;
2474 case 't':
2475 printf("\t");
2476 break;
2477 case 'r':
2478 printf("\r");
2479 break;
2480 case '\\':
2481 printf("\\");
2482 break;
2483 default:
2484 printf("%c", *ptr);
2485 break;
2486 }
2487
2488 } else if (*ptr == '%') {
2274 saveptr = ptr; 2489 saveptr = ptr;
2275 show_func = 0; 2490 show_func = 0;
2276 cont_process: 2491 cont_process:
@@ -2377,6 +2592,41 @@ static inline int log10_cpu(int nb)
2377 return 1; 2592 return 1;
2378} 2593}
2379 2594
2595static void print_lat_fmt(void *data, int size __unused)
2596{
2597 unsigned int lat_flags;
2598 unsigned int pc;
2599 int lock_depth;
2600 int hardirq;
2601 int softirq;
2602
2603 lat_flags = parse_common_flags(data);
2604 pc = parse_common_pc(data);
2605 lock_depth = parse_common_lock_depth(data);
2606
2607 hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
2608 softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
2609
2610 printf("%c%c%c",
2611 (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
2612 (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
2613 'X' : '.',
2614 (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
2615 'N' : '.',
2616 (hardirq && softirq) ? 'H' :
2617 hardirq ? 'h' : softirq ? 's' : '.');
2618
2619 if (pc)
2620 printf("%x", pc);
2621 else
2622 printf(".");
2623
2624 if (lock_depth < 0)
2625 printf(".");
2626 else
2627 printf("%d", lock_depth);
2628}
2629
2380/* taken from Linux, written by Frederic Weisbecker */ 2630/* taken from Linux, written by Frederic Weisbecker */
2381static void print_graph_cpu(int cpu) 2631static void print_graph_cpu(int cpu)
2382{ 2632{
@@ -2452,7 +2702,7 @@ get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func,
2452 if (!(event->flags & EVENT_FL_ISFUNCRET)) 2702 if (!(event->flags & EVENT_FL_ISFUNCRET))
2453 return NULL; 2703 return NULL;
2454 2704
2455 pid = parse_common_pid(next->data); 2705 pid = trace_parse_common_pid(next->data);
2456 field = find_field(event, "func"); 2706 field = find_field(event, "func");
2457 if (!field) 2707 if (!field)
2458 die("function return does not have field func"); 2708 die("function return does not have field func");
@@ -2620,6 +2870,11 @@ pretty_print_func_ent(void *data, int size, struct event *event,
2620 2870
2621 printf(" | "); 2871 printf(" | ");
2622 2872
2873 if (latency_format) {
2874 print_lat_fmt(data, size);
2875 printf(" | ");
2876 }
2877
2623 field = find_field(event, "func"); 2878 field = find_field(event, "func");
2624 if (!field) 2879 if (!field)
2625 die("function entry does not have func field"); 2880 die("function entry does not have func field");
@@ -2663,6 +2918,11 @@ pretty_print_func_ret(void *data, int size __unused, struct event *event,
2663 2918
2664 printf(" | "); 2919 printf(" | ");
2665 2920
2921 if (latency_format) {
2922 print_lat_fmt(data, size);
2923 printf(" | ");
2924 }
2925
2666 field = find_field(event, "rettime"); 2926 field = find_field(event, "rettime");
2667 if (!field) 2927 if (!field)
2668 die("can't find rettime in return graph"); 2928 die("can't find rettime in return graph");
@@ -2724,19 +2984,30 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
2724 2984
2725 event = trace_find_event(type); 2985 event = trace_find_event(type);
2726 if (!event) { 2986 if (!event) {
2727 printf("ug! no event found for type %d\n", type); 2987 warning("ug! no event found for type %d", type);
2728 return; 2988 return;
2729 } 2989 }
2730 2990
2731 pid = parse_common_pid(data); 2991 pid = trace_parse_common_pid(data);
2732 2992
2733 if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) 2993 if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET))
2734 return pretty_print_func_graph(data, size, event, cpu, 2994 return pretty_print_func_graph(data, size, event, cpu,
2735 pid, comm, secs, usecs); 2995 pid, comm, secs, usecs);
2736 2996
2737 printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ", 2997 if (latency_format) {
2738 comm, pid, cpu, 2998 printf("%8.8s-%-5d %3d",
2739 secs, nsecs, event->name); 2999 comm, pid, cpu);
3000 print_lat_fmt(data, size);
3001 } else
3002 printf("%16s-%-5d [%03d]", comm, pid, cpu);
3003
3004 printf(" %5lu.%06lu: %s: ", secs, usecs, event->name);
3005
3006 if (event->flags & EVENT_FL_FAILED) {
3007 printf("EVENT '%s' FAILED TO PARSE\n",
3008 event->name);
3009 return;
3010 }
2740 3011
2741 pretty_print(data, size, event); 3012 pretty_print(data, size, event);
2742 printf("\n"); 3013 printf("\n");
@@ -2807,46 +3078,71 @@ static void print_args(struct print_arg *args)
2807 } 3078 }
2808} 3079}
2809 3080
2810static void parse_header_field(char *type, 3081static void parse_header_field(const char *field,
2811 int *offset, int *size) 3082 int *offset, int *size)
2812{ 3083{
2813 char *token; 3084 char *token;
3085 int type;
2814 3086
2815 if (read_expected(EVENT_ITEM, (char *)"field") < 0) 3087 if (read_expected(EVENT_ITEM, "field") < 0)
2816 return; 3088 return;
2817 if (read_expected(EVENT_OP, (char *)":") < 0) 3089 if (read_expected(EVENT_OP, ":") < 0)
2818 return; 3090 return;
3091
2819 /* type */ 3092 /* type */
2820 if (read_expect_type(EVENT_ITEM, &token) < 0) 3093 if (read_expect_type(EVENT_ITEM, &token) < 0)
2821 return; 3094 goto fail;
2822 free_token(token); 3095 free_token(token);
2823 3096
2824 if (read_expected(EVENT_ITEM, type) < 0) 3097 if (read_expected(EVENT_ITEM, field) < 0)
2825 return; 3098 return;
2826 if (read_expected(EVENT_OP, (char *)";") < 0) 3099 if (read_expected(EVENT_OP, ";") < 0)
2827 return; 3100 return;
2828 if (read_expected(EVENT_ITEM, (char *)"offset") < 0) 3101 if (read_expected(EVENT_ITEM, "offset") < 0)
2829 return; 3102 return;
2830 if (read_expected(EVENT_OP, (char *)":") < 0) 3103 if (read_expected(EVENT_OP, ":") < 0)
2831 return; 3104 return;
2832 if (read_expect_type(EVENT_ITEM, &token) < 0) 3105 if (read_expect_type(EVENT_ITEM, &token) < 0)
2833 return; 3106 goto fail;
2834 *offset = atoi(token); 3107 *offset = atoi(token);
2835 free_token(token); 3108 free_token(token);
2836 if (read_expected(EVENT_OP, (char *)";") < 0) 3109 if (read_expected(EVENT_OP, ";") < 0)
2837 return; 3110 return;
2838 if (read_expected(EVENT_ITEM, (char *)"size") < 0) 3111 if (read_expected(EVENT_ITEM, "size") < 0)
2839 return; 3112 return;
2840 if (read_expected(EVENT_OP, (char *)":") < 0) 3113 if (read_expected(EVENT_OP, ":") < 0)
2841 return; 3114 return;
2842 if (read_expect_type(EVENT_ITEM, &token) < 0) 3115 if (read_expect_type(EVENT_ITEM, &token) < 0)
2843 return; 3116 goto fail;
2844 *size = atoi(token); 3117 *size = atoi(token);
2845 free_token(token); 3118 free_token(token);
2846 if (read_expected(EVENT_OP, (char *)";") < 0) 3119 if (read_expected(EVENT_OP, ";") < 0)
2847 return;
2848 if (read_expect_type(EVENT_NEWLINE, &token) < 0)
2849 return; 3120 return;
3121 type = read_token(&token);
3122 if (type != EVENT_NEWLINE) {
3123 /* newer versions of the kernel have a "signed" type */
3124 if (type != EVENT_ITEM)
3125 goto fail;
3126
3127 if (strcmp(token, "signed") != 0)
3128 goto fail;
3129
3130 free_token(token);
3131
3132 if (read_expected(EVENT_OP, ":") < 0)
3133 return;
3134
3135 if (read_expect_type(EVENT_ITEM, &token))
3136 goto fail;
3137
3138 free_token(token);
3139 if (read_expected(EVENT_OP, ";") < 0)
3140 return;
3141
3142 if (read_expect_type(EVENT_NEWLINE, &token))
3143 goto fail;
3144 }
3145 fail:
2850 free_token(token); 3146 free_token(token);
2851} 3147}
2852 3148
@@ -2854,11 +3150,11 @@ int parse_header_page(char *buf, unsigned long size)
2854{ 3150{
2855 init_input_buf(buf, size); 3151 init_input_buf(buf, size);
2856 3152
2857 parse_header_field((char *)"timestamp", &header_page_ts_offset, 3153 parse_header_field("timestamp", &header_page_ts_offset,
2858 &header_page_ts_size); 3154 &header_page_ts_size);
2859 parse_header_field((char *)"commit", &header_page_size_offset, 3155 parse_header_field("commit", &header_page_size_offset,
2860 &header_page_size_size); 3156 &header_page_size_size);
2861 parse_header_field((char *)"data", &header_page_data_offset, 3157 parse_header_field("data", &header_page_data_offset,
2862 &header_page_data_size); 3158 &header_page_data_size);
2863 3159
2864 return 0; 3160 return 0;
@@ -2909,6 +3205,9 @@ int parse_ftrace_file(char *buf, unsigned long size)
2909 if (ret < 0) 3205 if (ret < 0)
2910 die("failed to read ftrace event print fmt"); 3206 die("failed to read ftrace event print fmt");
2911 3207
3208 /* New ftrace handles args */
3209 if (ret > 0)
3210 return 0;
2912 /* 3211 /*
2913 * The arguments for ftrace files are parsed by the fields. 3212 * The arguments for ftrace files are parsed by the fields.
2914 * Set up the fields as their arguments. 3213 * Set up the fields as their arguments.
@@ -2926,7 +3225,7 @@ int parse_ftrace_file(char *buf, unsigned long size)
2926 return 0; 3225 return 0;
2927} 3226}
2928 3227
2929int parse_event_file(char *buf, unsigned long size, char *system__unused __unused) 3228int parse_event_file(char *buf, unsigned long size, char *sys)
2930{ 3229{
2931 struct event *event; 3230 struct event *event;
2932 int ret; 3231 int ret;
@@ -2946,12 +3245,18 @@ int parse_event_file(char *buf, unsigned long size, char *system__unused __unuse
2946 die("failed to read event id"); 3245 die("failed to read event id");
2947 3246
2948 ret = event_read_format(event); 3247 ret = event_read_format(event);
2949 if (ret < 0) 3248 if (ret < 0) {
2950 die("failed to read event format"); 3249 warning("failed to read event format for %s", event->name);
3250 goto event_failed;
3251 }
2951 3252
2952 ret = event_read_print(event); 3253 ret = event_read_print(event);
2953 if (ret < 0) 3254 if (ret < 0) {
2954 die("failed to read event print fmt"); 3255 warning("failed to read event print fmt for %s", event->name);
3256 goto event_failed;
3257 }
3258
3259 event->system = strdup(sys);
2955 3260
2956#define PRINT_ARGS 0 3261#define PRINT_ARGS 0
2957 if (PRINT_ARGS && event->print_fmt.args) 3262 if (PRINT_ARGS && event->print_fmt.args)
@@ -2959,6 +3264,12 @@ int parse_event_file(char *buf, unsigned long size, char *system__unused __unuse
2959 3264
2960 add_event(event); 3265 add_event(event);
2961 return 0; 3266 return 0;
3267
3268 event_failed:
3269 event->flags |= EVENT_FL_FAILED;
3270 /* still add it even if it failed */
3271 add_event(event);
3272 return -1;
2962} 3273}
2963 3274
2964void parse_set_info(int nr_cpus, int long_sz) 3275void parse_set_info(int nr_cpus, int long_sz)
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
new file mode 100644
index 000000000000..51e833fd58c3
--- /dev/null
+++ b/tools/perf/util/trace-event-perl.c
@@ -0,0 +1,598 @@
1/*
2 * trace-event-perl. Feed perf trace events to an embedded Perl interpreter.
3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <ctype.h>
26#include <errno.h>
27
28#include "../perf.h"
29#include "util.h"
30#include "trace-event.h"
31#include "trace-event-perl.h"
32
33void xs_init(pTHX);
34
35void boot_Perf__Trace__Context(pTHX_ CV *cv);
36void boot_DynaLoader(pTHX_ CV *cv);
37
38void xs_init(pTHX)
39{
40 const char *file = __FILE__;
41 dXSUB_SYS;
42
43 newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
44 file);
45 newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
46}
47
48INTERP my_perl;
49
50#define FTRACE_MAX_EVENT \
51 ((1 << (sizeof(unsigned short) * 8)) - 1)
52
53struct event *events[FTRACE_MAX_EVENT];
54
55static struct scripting_context *scripting_context;
56
57static char *cur_field_name;
58static int zero_flag_atom;
59
60static void define_symbolic_value(const char *ev_name,
61 const char *field_name,
62 const char *field_value,
63 const char *field_str)
64{
65 unsigned long long value;
66 dSP;
67
68 value = eval_flag(field_value);
69
70 ENTER;
71 SAVETMPS;
72 PUSHMARK(SP);
73
74 XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
75 XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
76 XPUSHs(sv_2mortal(newSVuv(value)));
77 XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
78
79 PUTBACK;
80 if (get_cv("main::define_symbolic_value", 0))
81 call_pv("main::define_symbolic_value", G_SCALAR);
82 SPAGAIN;
83 PUTBACK;
84 FREETMPS;
85 LEAVE;
86}
87
88static void define_symbolic_values(struct print_flag_sym *field,
89 const char *ev_name,
90 const char *field_name)
91{
92 define_symbolic_value(ev_name, field_name, field->value, field->str);
93 if (field->next)
94 define_symbolic_values(field->next, ev_name, field_name);
95}
96
97static void define_symbolic_field(const char *ev_name,
98 const char *field_name)
99{
100 dSP;
101
102 ENTER;
103 SAVETMPS;
104 PUSHMARK(SP);
105
106 XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
107 XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
108
109 PUTBACK;
110 if (get_cv("main::define_symbolic_field", 0))
111 call_pv("main::define_symbolic_field", G_SCALAR);
112 SPAGAIN;
113 PUTBACK;
114 FREETMPS;
115 LEAVE;
116}
117
118static void define_flag_value(const char *ev_name,
119 const char *field_name,
120 const char *field_value,
121 const char *field_str)
122{
123 unsigned long long value;
124 dSP;
125
126 value = eval_flag(field_value);
127
128 ENTER;
129 SAVETMPS;
130 PUSHMARK(SP);
131
132 XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
133 XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
134 XPUSHs(sv_2mortal(newSVuv(value)));
135 XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
136
137 PUTBACK;
138 if (get_cv("main::define_flag_value", 0))
139 call_pv("main::define_flag_value", G_SCALAR);
140 SPAGAIN;
141 PUTBACK;
142 FREETMPS;
143 LEAVE;
144}
145
146static void define_flag_values(struct print_flag_sym *field,
147 const char *ev_name,
148 const char *field_name)
149{
150 define_flag_value(ev_name, field_name, field->value, field->str);
151 if (field->next)
152 define_flag_values(field->next, ev_name, field_name);
153}
154
155static void define_flag_field(const char *ev_name,
156 const char *field_name,
157 const char *delim)
158{
159 dSP;
160
161 ENTER;
162 SAVETMPS;
163 PUSHMARK(SP);
164
165 XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
166 XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
167 XPUSHs(sv_2mortal(newSVpv(delim, 0)));
168
169 PUTBACK;
170 if (get_cv("main::define_flag_field", 0))
171 call_pv("main::define_flag_field", G_SCALAR);
172 SPAGAIN;
173 PUTBACK;
174 FREETMPS;
175 LEAVE;
176}
177
178static void define_event_symbols(struct event *event,
179 const char *ev_name,
180 struct print_arg *args)
181{
182 switch (args->type) {
183 case PRINT_NULL:
184 break;
185 case PRINT_ATOM:
186 define_flag_value(ev_name, cur_field_name, "0",
187 args->atom.atom);
188 zero_flag_atom = 0;
189 break;
190 case PRINT_FIELD:
191 if (cur_field_name)
192 free(cur_field_name);
193 cur_field_name = strdup(args->field.name);
194 break;
195 case PRINT_FLAGS:
196 define_event_symbols(event, ev_name, args->flags.field);
197 define_flag_field(ev_name, cur_field_name, args->flags.delim);
198 define_flag_values(args->flags.flags, ev_name, cur_field_name);
199 break;
200 case PRINT_SYMBOL:
201 define_event_symbols(event, ev_name, args->symbol.field);
202 define_symbolic_field(ev_name, cur_field_name);
203 define_symbolic_values(args->symbol.symbols, ev_name,
204 cur_field_name);
205 break;
206 case PRINT_STRING:
207 break;
208 case PRINT_TYPE:
209 define_event_symbols(event, ev_name, args->typecast.item);
210 break;
211 case PRINT_OP:
212 if (strcmp(args->op.op, ":") == 0)
213 zero_flag_atom = 1;
214 define_event_symbols(event, ev_name, args->op.left);
215 define_event_symbols(event, ev_name, args->op.right);
216 break;
217 default:
218 /* we should warn... */
219 return;
220 }
221
222 if (args->next)
223 define_event_symbols(event, ev_name, args->next);
224}
225
226static inline struct event *find_cache_event(int type)
227{
228 static char ev_name[256];
229 struct event *event;
230
231 if (events[type])
232 return events[type];
233
234 events[type] = event = trace_find_event(type);
235 if (!event)
236 return NULL;
237
238 sprintf(ev_name, "%s::%s", event->system, event->name);
239
240 define_event_symbols(event, ev_name, event->print_fmt.args);
241
242 return event;
243}
244
245int common_pc(struct scripting_context *context)
246{
247 int pc;
248
249 pc = parse_common_pc(context->event_data);
250
251 return pc;
252}
253
254int common_flags(struct scripting_context *context)
255{
256 int flags;
257
258 flags = parse_common_flags(context->event_data);
259
260 return flags;
261}
262
263int common_lock_depth(struct scripting_context *context)
264{
265 int lock_depth;
266
267 lock_depth = parse_common_lock_depth(context->event_data);
268
269 return lock_depth;
270}
271
272static void perl_process_event(int cpu, void *data,
273 int size __attribute((unused)),
274 unsigned long long nsecs, char *comm)
275{
276 struct format_field *field;
277 static char handler[256];
278 unsigned long long val;
279 unsigned long s, ns;
280 struct event *event;
281 int type;
282 int pid;
283
284 dSP;
285
286 type = trace_parse_common_type(data);
287
288 event = find_cache_event(type);
289 if (!event)
290 die("ug! no event found for type %d", type);
291
292 pid = trace_parse_common_pid(data);
293
294 sprintf(handler, "%s::%s", event->system, event->name);
295
296 s = nsecs / NSECS_PER_SEC;
297 ns = nsecs - s * NSECS_PER_SEC;
298
299 scripting_context->event_data = data;
300
301 ENTER;
302 SAVETMPS;
303 PUSHMARK(SP);
304
305 XPUSHs(sv_2mortal(newSVpv(handler, 0)));
306 XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
307 XPUSHs(sv_2mortal(newSVuv(cpu)));
308 XPUSHs(sv_2mortal(newSVuv(s)));
309 XPUSHs(sv_2mortal(newSVuv(ns)));
310 XPUSHs(sv_2mortal(newSViv(pid)));
311 XPUSHs(sv_2mortal(newSVpv(comm, 0)));
312
313 /* common fields other than pid can be accessed via xsub fns */
314
315 for (field = event->format.fields; field; field = field->next) {
316 if (field->flags & FIELD_IS_STRING) {
317 int offset;
318 if (field->flags & FIELD_IS_DYNAMIC) {
319 offset = *(int *)(data + field->offset);
320 offset &= 0xffff;
321 } else
322 offset = field->offset;
323 XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
324 } else { /* FIELD_IS_NUMERIC */
325 val = read_size(data + field->offset, field->size);
326 if (field->flags & FIELD_IS_SIGNED) {
327 XPUSHs(sv_2mortal(newSViv(val)));
328 } else {
329 XPUSHs(sv_2mortal(newSVuv(val)));
330 }
331 }
332 }
333
334 PUTBACK;
335
336 if (get_cv(handler, 0))
337 call_pv(handler, G_SCALAR);
338 else if (get_cv("main::trace_unhandled", 0)) {
339 XPUSHs(sv_2mortal(newSVpv(handler, 0)));
340 XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
341 XPUSHs(sv_2mortal(newSVuv(cpu)));
342 XPUSHs(sv_2mortal(newSVuv(nsecs)));
343 XPUSHs(sv_2mortal(newSViv(pid)));
344 XPUSHs(sv_2mortal(newSVpv(comm, 0)));
345 call_pv("main::trace_unhandled", G_SCALAR);
346 }
347 SPAGAIN;
348 PUTBACK;
349 FREETMPS;
350 LEAVE;
351}
352
353static void run_start_sub(void)
354{
355 dSP; /* access to Perl stack */
356 PUSHMARK(SP);
357
358 if (get_cv("main::trace_begin", 0))
359 call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
360}
361
362/*
363 * Start trace script
364 */
365static int perl_start_script(const char *script)
366{
367 const char *command_line[2] = { "", NULL };
368
369 command_line[1] = script;
370
371 my_perl = perl_alloc();
372 perl_construct(my_perl);
373
374 if (perl_parse(my_perl, xs_init, 2, (char **)command_line,
375 (char **)NULL))
376 return -1;
377
378 perl_run(my_perl);
379 if (SvTRUE(ERRSV))
380 return -1;
381
382 run_start_sub();
383
384 fprintf(stderr, "perf trace started with Perl script %s\n\n", script);
385
386 return 0;
387}
388
389/*
390 * Stop trace script
391 */
392static int perl_stop_script(void)
393{
394 dSP; /* access to Perl stack */
395 PUSHMARK(SP);
396
397 if (get_cv("main::trace_end", 0))
398 call_pv("main::trace_end", G_DISCARD | G_NOARGS);
399
400 perl_destruct(my_perl);
401 perl_free(my_perl);
402
403 fprintf(stderr, "\nperf trace Perl script stopped\n");
404
405 return 0;
406}
407
408static int perl_generate_script(const char *outfile)
409{
410 struct event *event = NULL;
411 struct format_field *f;
412 char fname[PATH_MAX];
413 int not_first, count;
414 FILE *ofp;
415
416 sprintf(fname, "%s.pl", outfile);
417 ofp = fopen(fname, "w");
418 if (ofp == NULL) {
419 fprintf(stderr, "couldn't open %s\n", fname);
420 return -1;
421 }
422
423 fprintf(ofp, "# perf trace event handlers, "
424 "generated by perf trace -g perl\n");
425
426 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
427 " License version 2\n\n");
428
429 fprintf(ofp, "# The common_* event handler fields are the most useful "
430 "fields common to\n");
431
432 fprintf(ofp, "# all events. They don't necessarily correspond to "
433 "the 'common_*' fields\n");
434
435 fprintf(ofp, "# in the format files. Those fields not available as "
436 "handler params can\n");
437
438 fprintf(ofp, "# be retrieved using Perl functions of the form "
439 "common_*($context).\n");
440
441 fprintf(ofp, "# See Context.pm for the list of available "
442 "functions.\n\n");
443
444 fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
445 "Perf-Trace-Util/lib\";\n");
446
447 fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
448 fprintf(ofp, "use Perf::Trace::Core;\n");
449 fprintf(ofp, "use Perf::Trace::Context;\n");
450 fprintf(ofp, "use Perf::Trace::Util;\n\n");
451
452 fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
453 fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
454
455 while ((event = trace_find_next_event(event))) {
456 fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
457 fprintf(ofp, "\tmy (");
458
459 fprintf(ofp, "$event_name, ");
460 fprintf(ofp, "$context, ");
461 fprintf(ofp, "$common_cpu, ");
462 fprintf(ofp, "$common_secs, ");
463 fprintf(ofp, "$common_nsecs,\n");
464 fprintf(ofp, "\t $common_pid, ");
465 fprintf(ofp, "$common_comm,\n\t ");
466
467 not_first = 0;
468 count = 0;
469
470 for (f = event->format.fields; f; f = f->next) {
471 if (not_first++)
472 fprintf(ofp, ", ");
473 if (++count % 5 == 0)
474 fprintf(ofp, "\n\t ");
475
476 fprintf(ofp, "$%s", f->name);
477 }
478 fprintf(ofp, ") = @_;\n\n");
479
480 fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
481 "$common_secs, $common_nsecs,\n\t "
482 "$common_pid, $common_comm);\n\n");
483
484 fprintf(ofp, "\tprintf(\"");
485
486 not_first = 0;
487 count = 0;
488
489 for (f = event->format.fields; f; f = f->next) {
490 if (not_first++)
491 fprintf(ofp, ", ");
492 if (count && count % 4 == 0) {
493 fprintf(ofp, "\".\n\t \"");
494 }
495 count++;
496
497 fprintf(ofp, "%s=", f->name);
498 if (f->flags & FIELD_IS_STRING ||
499 f->flags & FIELD_IS_FLAG ||
500 f->flags & FIELD_IS_SYMBOLIC)
501 fprintf(ofp, "%%s");
502 else if (f->flags & FIELD_IS_SIGNED)
503 fprintf(ofp, "%%d");
504 else
505 fprintf(ofp, "%%u");
506 }
507
508 fprintf(ofp, "\\n\",\n\t ");
509
510 not_first = 0;
511 count = 0;
512
513 for (f = event->format.fields; f; f = f->next) {
514 if (not_first++)
515 fprintf(ofp, ", ");
516
517 if (++count % 5 == 0)
518 fprintf(ofp, "\n\t ");
519
520 if (f->flags & FIELD_IS_FLAG) {
521 if ((count - 1) % 5 != 0) {
522 fprintf(ofp, "\n\t ");
523 count = 4;
524 }
525 fprintf(ofp, "flag_str(\"");
526 fprintf(ofp, "%s::%s\", ", event->system,
527 event->name);
528 fprintf(ofp, "\"%s\", $%s)", f->name,
529 f->name);
530 } else if (f->flags & FIELD_IS_SYMBOLIC) {
531 if ((count - 1) % 5 != 0) {
532 fprintf(ofp, "\n\t ");
533 count = 4;
534 }
535 fprintf(ofp, "symbol_str(\"");
536 fprintf(ofp, "%s::%s\", ", event->system,
537 event->name);
538 fprintf(ofp, "\"%s\", $%s)", f->name,
539 f->name);
540 } else
541 fprintf(ofp, "$%s", f->name);
542 }
543
544 fprintf(ofp, ");\n");
545 fprintf(ofp, "}\n\n");
546 }
547
548 fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
549 "$common_cpu, $common_secs, $common_nsecs,\n\t "
550 "$common_pid, $common_comm) = @_;\n\n");
551
552 fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
553 "$common_secs, $common_nsecs,\n\t $common_pid, "
554 "$common_comm);\n}\n\n");
555
556 fprintf(ofp, "sub print_header\n{\n"
557 "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
558 "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t "
559 "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}");
560
561 fclose(ofp);
562
563 fprintf(stderr, "generated Perl script: %s\n", fname);
564
565 return 0;
566}
567
568struct scripting_ops perl_scripting_ops = {
569 .name = "Perl",
570 .start_script = perl_start_script,
571 .stop_script = perl_stop_script,
572 .process_event = perl_process_event,
573 .generate_script = perl_generate_script,
574};
575
576#ifdef NO_LIBPERL
577void setup_perl_scripting(void)
578{
579 fprintf(stderr, "Perl scripting not supported."
580 " Install libperl and rebuild perf to enable it. e.g. "
581 "apt-get install libperl-dev (ubuntu), yum install "
582 "perl-ExtUtils-Embed (Fedora), etc.\n");
583}
584#else
585void setup_perl_scripting(void)
586{
587 int err;
588 err = script_spec_register("Perl", &perl_scripting_ops);
589 if (err)
590 die("error registering Perl script extension");
591
592 err = script_spec_register("pl", &perl_scripting_ops);
593 if (err)
594 die("error registering pl script extension");
595
596 scripting_context = malloc(sizeof(struct scripting_context));
597}
598#endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
new file mode 100644
index 000000000000..8fe0d866fe1a
--- /dev/null
+++ b/tools/perf/util/trace-event-perl.h
@@ -0,0 +1,51 @@
1#ifndef __PERF_TRACE_EVENT_PERL_H
2#define __PERF_TRACE_EVENT_PERL_H
3#ifdef NO_LIBPERL
4typedef int INTERP;
5#define dSP
6#define ENTER
7#define SAVETMPS
8#define PUTBACK
9#define SPAGAIN
10#define FREETMPS
11#define LEAVE
12#define SP
13#define ERRSV
14#define G_SCALAR (0)
15#define G_DISCARD (0)
16#define G_NOARGS (0)
17#define PUSHMARK(a)
18#define SvTRUE(a) (0)
19#define XPUSHs(s)
20#define sv_2mortal(a)
21#define newSVpv(a,b)
22#define newSVuv(a)
23#define newSViv(a)
24#define get_cv(a,b) (0)
25#define call_pv(a,b) (0)
26#define perl_alloc() (0)
27#define perl_construct(a) (0)
28#define perl_parse(a,b,c,d,e) (0)
29#define perl_run(a) (0)
30#define perl_destruct(a) (0)
31#define perl_free(a) (0)
32#define pTHX void
33#define CV void
34#define dXSUB_SYS
35#define pTHX_
36static inline void newXS(const char *a, void *b, const char *c) {}
37#else
38#include <EXTERN.h>
39#include <perl.h>
40typedef PerlInterpreter * INTERP;
41#endif
42
43struct scripting_context {
44 void *event_data;
45};
46
47int common_pc(struct scripting_context *context);
48int common_flags(struct scripting_context *context);
49int common_lock_depth(struct scripting_context *context);
50
51#endif /* __PERF_TRACE_EVENT_PERL_H */
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 1b5c847d2c22..342dfdd43f87 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -458,9 +458,8 @@ struct record *trace_read_data(int cpu)
458 return data; 458 return data;
459} 459}
460 460
461void trace_report(void) 461void trace_report(int fd)
462{ 462{
463 const char *input_file = "trace.info";
464 char buf[BUFSIZ]; 463 char buf[BUFSIZ];
465 char test[] = { 23, 8, 68 }; 464 char test[] = { 23, 8, 68 };
466 char *version; 465 char *version;
@@ -468,17 +467,15 @@ void trace_report(void)
468 int show_funcs = 0; 467 int show_funcs = 0;
469 int show_printk = 0; 468 int show_printk = 0;
470 469
471 input_fd = open(input_file, O_RDONLY); 470 input_fd = fd;
472 if (input_fd < 0)
473 die("opening '%s'\n", input_file);
474 471
475 read_or_die(buf, 3); 472 read_or_die(buf, 3);
476 if (memcmp(buf, test, 3) != 0) 473 if (memcmp(buf, test, 3) != 0)
477 die("not an trace data file"); 474 die("no trace data in the file");
478 475
479 read_or_die(buf, 7); 476 read_or_die(buf, 7);
480 if (memcmp(buf, "tracing", 7) != 0) 477 if (memcmp(buf, "tracing", 7) != 0)
481 die("not a trace file (missing tracing)"); 478 die("not a trace file (missing 'tracing' tag)");
482 479
483 version = read_string(); 480 version = read_string();
484 if (show_version) 481 if (show_version)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 693f815c9429..81698d5e6503 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -1,5 +1,5 @@
1#ifndef _TRACE_EVENTS_H 1#ifndef __PERF_TRACE_EVENTS_H
2#define _TRACE_EVENTS_H 2#define __PERF_TRACE_EVENTS_H
3 3
4#include "parse-events.h" 4#include "parse-events.h"
5 5
@@ -26,6 +26,11 @@ enum {
26enum format_flags { 26enum format_flags {
27 FIELD_IS_ARRAY = 1, 27 FIELD_IS_ARRAY = 1,
28 FIELD_IS_POINTER = 2, 28 FIELD_IS_POINTER = 2,
29 FIELD_IS_SIGNED = 4,
30 FIELD_IS_STRING = 8,
31 FIELD_IS_DYNAMIC = 16,
32 FIELD_IS_FLAG = 32,
33 FIELD_IS_SYMBOLIC = 64,
29}; 34};
30 35
31struct format_field { 36struct format_field {
@@ -132,15 +137,18 @@ struct event {
132 int flags; 137 int flags;
133 struct format format; 138 struct format format;
134 struct print_fmt print_fmt; 139 struct print_fmt print_fmt;
140 char *system;
135}; 141};
136 142
137enum { 143enum {
138 EVENT_FL_ISFTRACE = 1, 144 EVENT_FL_ISFTRACE = 0x01,
139 EVENT_FL_ISPRINT = 2, 145 EVENT_FL_ISPRINT = 0x02,
140 EVENT_FL_ISBPRINT = 4, 146 EVENT_FL_ISBPRINT = 0x04,
141 EVENT_FL_ISFUNC = 8, 147 EVENT_FL_ISFUNC = 0x08,
142 EVENT_FL_ISFUNCENT = 16, 148 EVENT_FL_ISFUNCENT = 0x10,
143 EVENT_FL_ISFUNCRET = 32, 149 EVENT_FL_ISFUNCRET = 0x20,
150
151 EVENT_FL_FAILED = 0x80000000
144}; 152};
145 153
146struct record { 154struct record {
@@ -154,7 +162,7 @@ struct record *trace_read_data(int cpu);
154 162
155void parse_set_info(int nr_cpus, int long_sz); 163void parse_set_info(int nr_cpus, int long_sz);
156 164
157void trace_report(void); 165void trace_report(int fd);
158 166
159void *malloc_or_die(unsigned int size); 167void *malloc_or_die(unsigned int size);
160 168
@@ -166,7 +174,7 @@ void print_funcs(void);
166void print_printk(void); 174void print_printk(void);
167 175
168int parse_ftrace_file(char *buf, unsigned long size); 176int parse_ftrace_file(char *buf, unsigned long size);
169int parse_event_file(char *buf, unsigned long size, char *system); 177int parse_event_file(char *buf, unsigned long size, char *sys);
170void print_event(int cpu, void *data, int size, unsigned long long nsecs, 178void print_event(int cpu, void *data, int size, unsigned long long nsecs,
171 char *comm); 179 char *comm);
172 180
@@ -233,13 +241,45 @@ extern int header_page_size_size;
233extern int header_page_data_offset; 241extern int header_page_data_offset;
234extern int header_page_data_size; 242extern int header_page_data_size;
235 243
244extern int latency_format;
245
236int parse_header_page(char *buf, unsigned long size); 246int parse_header_page(char *buf, unsigned long size);
237int trace_parse_common_type(void *data); 247int trace_parse_common_type(void *data);
248int trace_parse_common_pid(void *data);
249int parse_common_pc(void *data);
250int parse_common_flags(void *data);
251int parse_common_lock_depth(void *data);
238struct event *trace_find_event(int id); 252struct event *trace_find_event(int id);
253struct event *trace_find_next_event(struct event *event);
254unsigned long long read_size(void *ptr, int size);
239unsigned long long 255unsigned long long
240raw_field_value(struct event *event, const char *name, void *data); 256raw_field_value(struct event *event, const char *name, void *data);
241void *raw_field_ptr(struct event *event, const char *name, void *data); 257void *raw_field_ptr(struct event *event, const char *name, void *data);
258unsigned long long eval_flag(const char *flag);
259
260int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
261
262/* taken from kernel/trace/trace.h */
263enum trace_flag_type {
264 TRACE_FLAG_IRQS_OFF = 0x01,
265 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
266 TRACE_FLAG_NEED_RESCHED = 0x04,
267 TRACE_FLAG_HARDIRQ = 0x08,
268 TRACE_FLAG_SOFTIRQ = 0x10,
269};
270
271struct scripting_ops {
272 const char *name;
273 int (*start_script) (const char *);
274 int (*stop_script) (void);
275 void (*process_event) (int cpu, void *data, int size,
276 unsigned long long nsecs, char *comm);
277 int (*generate_script) (const char *outfile);
278};
279
280int script_spec_register(const char *spec, struct scripting_ops *ops);
242 281
243void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); 282extern struct scripting_ops perl_scripting_ops;
283void setup_perl_scripting(void);
244 284
245#endif /* _TRACE_EVENTS_H */ 285#endif /* __PERF_TRACE_EVENTS_H */
diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h
index 5e75f9005940..7d6b8331f898 100644
--- a/tools/perf/util/types.h
+++ b/tools/perf/util/types.h
@@ -1,5 +1,5 @@
1#ifndef _PERF_TYPES_H 1#ifndef __PERF_TYPES_H
2#define _PERF_TYPES_H 2#define __PERF_TYPES_H
3 3
4/* 4/*
5 * We define u64 as unsigned long long for every architecture 5 * We define u64 as unsigned long long for every architecture
@@ -14,4 +14,4 @@ typedef signed short s16;
14typedef unsigned char u8; 14typedef unsigned char u8;
15typedef signed char s8; 15typedef signed char s8;
16 16
17#endif /* _PERF_TYPES_H */ 17#endif /* __PERF_TYPES_H */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9de2329dd44d..c673d8825883 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -84,6 +84,9 @@
84#include <iconv.h> 84#include <iconv.h>
85#endif 85#endif
86 86
87extern const char *graph_line;
88extern const char *graph_dotted_line;
89
87/* On most systems <limits.h> would have given us this, but 90/* On most systems <limits.h> would have given us this, but
88 * not on some systems (e.g. GNU/Hurd). 91 * not on some systems (e.g. GNU/Hurd).
89 */ 92 */
@@ -134,6 +137,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1,
134extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); 137extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
135extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); 138extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
136 139
140#include "../../../include/linux/stringify.h"
141
142#define DIE_IF(cnd) \
143 do { if (cnd) \
144 die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \
145 __stringify(cnd) "\n"); \
146 } while (0)
147
148
137extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); 149extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
138 150
139extern int prefixcmp(const char *str, const char *prefix); 151extern int prefixcmp(const char *str, const char *prefix);
@@ -278,17 +290,15 @@ static inline char *gitstrchrnul(const char *s, int c)
278 * Wrappers: 290 * Wrappers:
279 */ 291 */
280extern char *xstrdup(const char *str); 292extern char *xstrdup(const char *str);
281extern void *xmalloc(size_t size); 293extern void *xmalloc(size_t size) __attribute__((weak));
282extern void *xmemdupz(const void *data, size_t len); 294extern void *xmemdupz(const void *data, size_t len);
283extern char *xstrndup(const char *str, size_t len); 295extern char *xstrndup(const char *str, size_t len);
284extern void *xrealloc(void *ptr, size_t size); 296extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
285extern void *xcalloc(size_t nmemb, size_t size); 297
286extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); 298static inline void *zalloc(size_t size)
287extern ssize_t xread(int fd, void *buf, size_t len); 299{
288extern ssize_t xwrite(int fd, const void *buf, size_t len); 300 return calloc(1, size);
289extern int xdup(int fd); 301}
290extern FILE *xfdopen(int fd, const char *mode);
291extern int xmkstemp(char *template);
292 302
293static inline size_t xsize_t(off_t len) 303static inline size_t xsize_t(off_t len)
294{ 304{
@@ -306,6 +316,7 @@ static inline int has_extension(const char *filename, const char *ext)
306#undef isascii 316#undef isascii
307#undef isspace 317#undef isspace
308#undef isdigit 318#undef isdigit
319#undef isxdigit
309#undef isalpha 320#undef isalpha
310#undef isprint 321#undef isprint
311#undef isalnum 322#undef isalnum
@@ -323,6 +334,8 @@ extern unsigned char sane_ctype[256];
323#define isascii(x) (((x) & ~0x7f) == 0) 334#define isascii(x) (((x) & ~0x7f) == 0)
324#define isspace(x) sane_istest(x,GIT_SPACE) 335#define isspace(x) sane_istest(x,GIT_SPACE)
325#define isdigit(x) sane_istest(x,GIT_DIGIT) 336#define isdigit(x) sane_istest(x,GIT_DIGIT)
337#define isxdigit(x) \
338 (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G')
326#define isalpha(x) sane_istest(x,GIT_ALPHA) 339#define isalpha(x) sane_istest(x,GIT_ALPHA)
327#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 340#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
328#define isprint(x) sane_istest(x,GIT_PRINT) 341#define isprint(x) sane_istest(x,GIT_PRINT)
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
index cadf8cf2a590..2fa967e1a88a 100644
--- a/tools/perf/util/values.h
+++ b/tools/perf/util/values.h
@@ -1,5 +1,5 @@
1#ifndef _PERF_VALUES_H 1#ifndef __PERF_VALUES_H
2#define _PERF_VALUES_H 2#define __PERF_VALUES_H
3 3
4#include "types.h" 4#include "types.h"
5 5
@@ -24,4 +24,4 @@ void perf_read_values_add_value(struct perf_read_values *values,
24void perf_read_values_display(FILE *fp, struct perf_read_values *values, 24void perf_read_values_display(FILE *fp, struct perf_read_values *values,
25 int raw); 25 int raw);
26 26
27#endif /* _PERF_VALUES_H */ 27#endif /* __PERF_VALUES_H */
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 4574ac28396f..bf44ca85d23b 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c
@@ -79,43 +79,12 @@ void *xrealloc(void *ptr, size_t size)
79 return ret; 79 return ret;
80} 80}
81 81
82void *xcalloc(size_t nmemb, size_t size)
83{
84 void *ret = calloc(nmemb, size);
85 if (!ret && (!nmemb || !size))
86 ret = calloc(1, 1);
87 if (!ret) {
88 release_pack_memory(nmemb * size, -1);
89 ret = calloc(nmemb, size);
90 if (!ret && (!nmemb || !size))
91 ret = calloc(1, 1);
92 if (!ret)
93 die("Out of memory, calloc failed");
94 }
95 return ret;
96}
97
98void *xmmap(void *start, size_t length,
99 int prot, int flags, int fd, off_t offset)
100{
101 void *ret = mmap(start, length, prot, flags, fd, offset);
102 if (ret == MAP_FAILED) {
103 if (!length)
104 return NULL;
105 release_pack_memory(length, fd);
106 ret = mmap(start, length, prot, flags, fd, offset);
107 if (ret == MAP_FAILED)
108 die("Out of memory? mmap failed: %s", strerror(errno));
109 }
110 return ret;
111}
112
113/* 82/*
114 * xread() is the same a read(), but it automatically restarts read() 83 * xread() is the same a read(), but it automatically restarts read()
115 * operations with a recoverable error (EAGAIN and EINTR). xread() 84 * operations with a recoverable error (EAGAIN and EINTR). xread()
116 * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. 85 * DOES NOT GUARANTEE that "len" bytes is read even if the data is available.
117 */ 86 */
118ssize_t xread(int fd, void *buf, size_t len) 87static ssize_t xread(int fd, void *buf, size_t len)
119{ 88{
120 ssize_t nr; 89 ssize_t nr;
121 while (1) { 90 while (1) {
@@ -131,7 +100,7 @@ ssize_t xread(int fd, void *buf, size_t len)
131 * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT 100 * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT
132 * GUARANTEE that "len" bytes is written even if the operation is successful. 101 * GUARANTEE that "len" bytes is written even if the operation is successful.
133 */ 102 */
134ssize_t xwrite(int fd, const void *buf, size_t len) 103static ssize_t xwrite(int fd, const void *buf, size_t len)
135{ 104{
136 ssize_t nr; 105 ssize_t nr;
137 while (1) { 106 while (1) {
@@ -179,29 +148,3 @@ ssize_t write_in_full(int fd, const void *buf, size_t count)
179 148
180 return total; 149 return total;
181} 150}
182
183int xdup(int fd)
184{
185 int ret = dup(fd);
186 if (ret < 0)
187 die("dup failed: %s", strerror(errno));
188 return ret;
189}
190
191FILE *xfdopen(int fd, const char *mode)
192{
193 FILE *stream = fdopen(fd, mode);
194 if (stream == NULL)
195 die("Out of memory? fdopen failed: %s", strerror(errno));
196 return stream;
197}
198
199int xmkstemp(char *template)
200{
201 int fd;
202
203 fd = mkstemp(template);
204 if (fd < 0)
205 die("Unable to create temporary file: %s", strerror(errno));
206 return fd;
207}