aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
commit9c2b957db1772ebf942ae7a9346b14eba6c8ca66 (patch)
tree0dbb83e57260ea7fc0dc421f214d5f1b26262005
parent0bbfcaff9b2a69c71a95e6902253487ab30cb498 (diff)
parentbea95c152dee1791dd02cbc708afbb115bb00f9a (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes for v3.4 from Ingo Molnar: - New "hardware based branch profiling" feature both on the kernel and the tooling side, on CPUs that support it. (modern x86 Intel CPUs with the 'LBR' hardware feature currently.) This new feature is basically a sophisticated 'magnifying glass' for branch execution - something that is pretty difficult to extract from regular, function histogram centric profiles. The simplest mode is activated via 'perf record -b', and the result looks like this in perf report: $ perf record -b any_call,u -e cycles:u branchy $ perf report -b --sort=symbol 52.34% [.] main [.] f1 24.04% [.] f1 [.] f3 23.60% [.] f1 [.] f2 0.01% [k] _IO_new_file_xsputn [k] _IO_file_overflow 0.01% [k] _IO_vfprintf_internal [k] _IO_new_file_xsputn 0.01% [k] _IO_vfprintf_internal [k] strchrnul 0.01% [k] __printf [k] _IO_vfprintf_internal 0.01% [k] main [k] __printf This output shows from/to branch columns and shows the highest percentage (from,to) jump combinations - i.e. the most likely taken branches in the system. "branches" can also include function calls and any other synchronous and asynchronous transitions of the instruction pointer that are not 'next instruction' - such as system calls, traps, interrupts, etc. This feature comes with (hopefully intuitive) flat ascii and TUI support in perf report. - Various 'perf annotate' visual improvements for us assembly junkies. It will now recognize function calls in the TUI and by hitting enter you can follow the call (recursively) and back, amongst other improvements. - Multiple threads/processes recording support in perf record, perf stat, perf top - which is activated via a comma-list of PIDs: perf top -p 21483,21485 perf stat -p 21483,21485 -ddd perf record -p 21483,21485 - Support for per UID views, via the --uid paramter to perf top, perf report, etc. For example 'perf top --uid mingo' will only show the tasks that I am running, excluding other users, root, etc. - Jump label restructurings and improvements - this includes the factoring out of the (hopefully much clearer) include/linux/static_key.h generic facility: struct static_key key = STATIC_KEY_INIT_FALSE; ... if (static_key_false(&key)) do unlikely code else do likely code ... static_key_slow_inc(); ... static_key_slow_inc(); ... The static_key_false() branch will be generated into the code with as little impact to the likely code path as possible. the static_key_slow_*() APIs flip the branch via live kernel code patching. This facility can now be used more widely within the kernel to micro-optimize hot branches whose likelihood matches the static-key usage and fast/slow cost patterns. - SW function tracer improvements: perf support and filtering support. - Various hardenings of the perf.data ABI, to make older perf.data's smoother on newer tool versions, to make new features integrate more smoothly, to support cross-endian recording/analyzing workflows better, etc. - Restructuring of the kprobes code, the splitting out of 'optprobes', and a corner case bugfix. - Allow the tracing of kernel console output (printk). - Improvements/fixes to user-space RDPMC support, allowing user-space self-profiling code to extract PMU counts without performing any system calls, while playing nice with the kernel side. - 'perf bench' improvements - ... and lots of internal restructurings, cleanups and fixes that made these features possible. And, as usual this list is incomplete as there were also lots of other improvements * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (120 commits) perf report: Fix annotate double quit issue in branch view mode perf report: Remove duplicate annotate choice in branch view mode perf/x86: Prettify pmu config literals perf report: Enable TUI in branch view mode perf report: Auto-detect branch stack sampling mode perf record: Add HEADER_BRANCH_STACK tag perf record: Provide default branch stack sampling mode option perf tools: Make perf able to read files from older ABIs perf tools: Fix ABI compatibility bug in print_event_desc() perf tools: Enable reading of perf.data files from different ABI rev perf: Add ABI reference sizes perf report: Add support for taken branch sampling perf record: Add support for sampling taken branch perf tools: Add code to support PERF_SAMPLE_BRANCH_STACK x86/kprobes: Split out optprobe related code to kprobes-opt.c x86/kprobes: Fix a bug which can modify kernel code permanently x86/kprobes: Fix instruction recovery on optimized path perf: Add callback to flush branch_stack on context switch perf: Disable PERF_SAMPLE_BRANCH_* when not supported perf/x86: Add LBR software filter support for Intel CPUs ...
-rw-r--r--Documentation/lockup-watchdogs.txt63
-rw-r--r--Documentation/nmi_watchdog.txt83
-rw-r--r--Documentation/static-keys.txt286
-rw-r--r--Documentation/trace/ftrace.txt7
-rw-r--r--arch/Kconfig29
-rw-r--r--arch/alpha/kernel/perf_event.c4
-rw-r--r--arch/arm/include/asm/perf_event.h4
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/frv/include/asm/perf_event.h2
-rw-r--r--arch/hexagon/include/asm/perf_event.h2
-rw-r--r--arch/ia64/include/asm/paravirt.h6
-rw-r--r--arch/ia64/kernel/paravirt.c4
-rw-r--r--arch/mips/include/asm/jump_label.h2
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/powerpc/include/asm/jump_label.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/kernel/perf_event.c10
-rw-r--r--arch/s390/include/asm/jump_label.h2
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/sh/kernel/perf_event.c4
-rw-r--r--arch/sparc/include/asm/jump_label.h2
-rw-r--r--arch/sparc/kernel/perf_event.c4
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c167
-rw-r--r--arch/x86/kernel/cpu/perf_event.h50
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c141
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c526
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/process.c24
-rw-r--r--arch/x86/kvm/mmu_audit.c8
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--drivers/cpuidle/cpuidle.c8
-rw-r--r--fs/exec.c9
-rw-r--r--include/linux/ftrace.h77
-rw-r--r--include/linux/ftrace_event.h9
-rw-r--r--include/linux/interrupt.h7
-rw-r--r--include/linux/jump_label.h162
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/netfilter.h6
-rw-r--r--include/linux/perf_event.h108
-rw-r--r--include/linux/static_key.h1
-rw-r--r--include/linux/tracepoint.h28
-rw-r--r--include/net/sock.h6
-rw-r--r--include/trace/events/power.h2
-rw-r--r--include/trace/events/printk.h41
-rw-r--r--include/trace/events/sched.h27
-rw-r--r--include/trace/events/signal.h85
-rw-r--r--kernel/events/core.c246
-rw-r--r--kernel/events/hw_breakpoint.c13
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/jump_label.c135
-rw-r--r--kernel/printk.c5
-rw-r--r--kernel/sched/core.c18
-rw-r--r--kernel/sched/fair.c8
-rw-r--r--kernel/sched/sched.h14
-rw-r--r--kernel/signal.c28
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/trace/ftrace.c134
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace.h38
-rw-r--r--kernel/trace/trace_entries.h54
-rw-r--r--kernel/trace/trace_event_perf.c208
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_events_filter.c175
-rw-r--r--kernel/trace/trace_export.c64
-rw-r--r--kernel/trace/trace_kprobe.c8
-rw-r--r--kernel/trace/trace_output.c12
-rw-r--r--kernel/trace/trace_syscalls.c22
-rw-r--r--kernel/tracepoint.c20
-rw-r--r--kernel/watchdog.c24
-rw-r--r--lib/Kconfig.debug18
-rw-r--r--net/core/dev.c24
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sysctl_net_core.c4
-rw-r--r--net/ipv4/tcp_memcontrol.c6
-rw-r--r--net/netfilter/core.c6
-rw-r--r--tools/perf/Documentation/Makefile86
-rw-r--r--tools/perf/Documentation/perf-lock.txt20
-rw-r--r--tools/perf/Documentation/perf-record.txt38
-rw-r--r--tools/perf/Documentation/perf-report.txt10
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile26
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h8
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S6
-rw-r--r--tools/perf/bench/mem-memcpy.c12
-rw-r--r--tools/perf/bench/mem-memset-arch.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S13
-rw-r--r--tools/perf/bench/mem-memset.c297
-rw-r--r--tools/perf/builtin-bench.c3
-rw-r--r--tools/perf/builtin-lock.c4
-rw-r--r--tools/perf/builtin-probe.c12
-rw-r--r--tools/perf/builtin-record.c152
-rw-r--r--tools/perf/builtin-report.c178
-rw-r--r--tools/perf/builtin-script.c80
-rw-r--r--tools/perf/builtin-stat.c41
-rw-r--r--tools/perf/builtin-test.c188
-rw-r--r--tools/perf/builtin-top.c45
-rw-r--r--tools/perf/perf.h26
-rwxr-xr-xtools/perf/python/twatch.py2
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/bitmap.c10
-rw-r--r--tools/perf/util/cpumap.c11
-rw-r--r--tools/perf/util/cpumap.h4
-rw-r--r--tools/perf/util/ctype.c2
-rw-r--r--tools/perf/util/debugfs.c141
-rw-r--r--tools/perf/util/debugfs.h6
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evlist.c17
-rw-r--r--tools/perf/util/evlist.h9
-rw-r--r--tools/perf/util/evsel.c22
-rw-r--r--tools/perf/util/header.c588
-rw-r--r--tools/perf/util/header.h3
-rw-r--r--tools/perf/util/hist.c122
-rw-r--r--tools/perf/util/hist.h13
-rw-r--r--tools/perf/util/include/asm/dwarf2.h4
-rw-r--r--tools/perf/util/include/linux/bitmap.h11
-rw-r--r--tools/perf/util/map.c15
-rw-r--r--tools/perf/util/map.h1
-rw-r--r--tools/perf/util/probe-event.c33
-rw-r--r--tools/perf/util/probe-finder.c1
-rw-r--r--tools/perf/util/python-ext-sources19
-rw-r--r--tools/perf/util/python.c10
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1
-rw-r--r--tools/perf/util/session.c126
-rw-r--r--tools/perf/util/session.h6
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/sort.c287
-rw-r--r--tools/perf/util/sort.h11
-rw-r--r--tools/perf/util/symbol.c24
-rw-r--r--tools/perf/util/symbol.h24
-rw-r--r--tools/perf/util/sysfs.c60
-rw-r--r--tools/perf/util/sysfs.h6
-rw-r--r--tools/perf/util/thread_map.c237
-rw-r--r--tools/perf/util/thread_map.h11
-rw-r--r--tools/perf/util/top.c13
-rw-r--r--tools/perf/util/top.h6
-rw-r--r--tools/perf/util/trace-event-parse.c13
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/trace-event-scripting.c1
-rw-r--r--tools/perf/util/ui/browsers/annotate.c18
-rw-r--r--tools/perf/util/ui/browsers/hists.c105
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/usage.c39
-rw-r--r--tools/perf/util/util.c2
-rw-r--r--tools/perf/util/util.h6
165 files changed, 6107 insertions, 1984 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
new file mode 100644
index 000000000000..d2a36602ca8d
--- /dev/null
+++ b/Documentation/lockup-watchdogs.txt
@@ -0,0 +1,63 @@
1===============================================================
2Softlockup detector and hardlockup detector (aka nmi_watchdog)
3===============================================================
4
5The Linux kernel can act as a watchdog to detect both soft and hard
6lockups.
7
8A 'softlockup' is defined as a bug that causes the kernel to loop in
9kernel mode for more than 20 seconds (see "Implementation" below for
10details), without giving other tasks a chance to run. The current
11stack trace is displayed upon detection and, by default, the system
12will stay locked up. Alternatively, the kernel can be configured to
13panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
14"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
15details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are
16provided for this.
17
18A 'hardlockup' is defined as a bug that causes the CPU to loop in
19kernel mode for more than 10 seconds (see "Implementation" below for
20details), without letting other interrupts have a chance to run.
21Similarly to the softlockup case, the current stack trace is displayed
22upon detection and the system will stay locked up unless the default
23behavior is changed, which can be done through a compile time knob,
24"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
25(see "Documentation/kernel-parameters.txt" for details).
26
27The panic option can be used in combination with panic_timeout (this
28timeout is set through the confusingly named "kernel.panic" sysctl),
29to cause the system to reboot automatically after a specified amount
30of time.
31
32=== Implementation ===
33
34The soft and hard lockup detectors are built on top of the hrtimer and
35perf subsystems, respectively. A direct consequence of this is that,
36in principle, they should work in any architecture where these
37subsystems are present.
38
39A periodic hrtimer runs to generate interrupts and kick the watchdog
40task. An NMI perf event is generated every "watchdog_thresh"
41(compile-time initialized to 10 and configurable through sysctl of the
42same name) seconds to check for hardlockups. If any CPU in the system
43does not receive any hrtimer interrupt during that time the
44'hardlockup detector' (the handler for the NMI perf event) will
45generate a kernel warning or call panic, depending on the
46configuration.
47
48The watchdog task is a high priority kernel thread that updates a
49timestamp every time it is scheduled. If that timestamp is not updated
50for 2*watchdog_thresh seconds (the softlockup threshold) the
51'softlockup detector' (coded inside the hrtimer callback function)
52will dump useful debug information to the system log, after which it
53will call panic if it was instructed to do so or resume execution of
54other kernel code.
55
56The period of the hrtimer is 2*watchdog_thresh/5, which means it has
57two or three chances to generate an interrupt before the hardlockup
58detector kicks in.
59
60As explained above, a kernel knob is provided that allows
61administrators to configure the period of the hrtimer and the perf
62event. The right value for a particular environment is a trade-off
63between fast response to lockups and detection overhead.
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
deleted file mode 100644
index bf9f80a98282..000000000000
--- a/Documentation/nmi_watchdog.txt
+++ /dev/null
@@ -1,83 +0,0 @@
1
2[NMI watchdog is available for x86 and x86-64 architectures]
3
4Is your system locking up unpredictably? No keyboard activity, just
5a frustrating complete hard lockup? Do you want to help us debugging
6such lockups? If all yes then this document is definitely for you.
7
8On many x86/x86-64 type hardware there is a feature that enables
9us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt
10which get executed even if the system is otherwise locked up hard).
11This can be used to debug hard kernel lockups. By executing periodic
12NMI interrupts, the kernel can monitor whether any CPU has locked up,
13and print out debugging messages if so.
14
15In order to use the NMI watchdog, you need to have APIC support in your
16kernel. For SMP kernels, APIC support gets compiled in automatically. For
17UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local
18APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and
19features -> IO-APIC support on uniprocessors) in your kernel config.
20CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC.
21CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
22kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
23may implicitly disable the NMI watchdog.]
24
25For x86-64, the needed APIC is always compiled in.
26
27Using local APIC (nmi_watchdog=2) needs the first performance register, so
28you can't use it for other purposes (such as high precision performance
29profiling.) However, at least oprofile and the perfctr driver disable the
30local APIC NMI watchdog automatically.
31
32To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot
33parameter. Eg. the relevant lilo.conf entry:
34
35 append="nmi_watchdog=1"
36
37For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1.
38For UP machines without an IO-APIC use nmi_watchdog=2, this only works
39for some processor types. If in doubt, boot with nmi_watchdog=1 and
40check the NMI count in /proc/interrupts; if the count is zero then
41reboot with nmi_watchdog=2 and check the NMI count. If it is still
42zero then log a problem, you probably have a processor that needs to be
43added to the nmi code.
44
45A 'lockup' is the following scenario: if any CPU in the system does not
46execute the period local timer interrupt for more than 5 seconds, then
47the NMI handler generates an oops and kills the process. This
48'controlled crash' (and the resulting kernel messages) can be used to
49debug the lockup. Thus whenever the lockup happens, wait 5 seconds and
50the oops will show up automatically. If the kernel produces no messages
51then the system has crashed so hard (eg. hardware-wise) that either it
52cannot even accept NMI interrupts, or the crash has made the kernel
53unable to print messages.
54
55Be aware that when using local APIC, the frequency of NMI interrupts
56it generates, depends on the system load. The local APIC NMI watchdog,
57lacking a better source, uses the "cycles unhalted" event. As you may
58guess it doesn't tick when the CPU is in the halted state (which happens
59when the system is idle), but if your system locks up on anything but the
60"hlt" processor instruction, the watchdog will trigger very soon as the
61"cycles unhalted" event will happen every clock tick. If it locks up on
62"hlt", then you are out of luck -- the event will not happen at all and the
63watchdog won't trigger. This is a shortcoming of the local APIC watchdog
64-- unfortunately there is no "clock ticks" event that would work all the
65time. The I/O APIC watchdog is driven externally and has no such shortcoming.
66But its NMI frequency is much higher, resulting in a more significant hit
67to the overall system performance.
68
69On x86 nmi_watchdog is disabled by default so you have to enable it with
70a boot time parameter.
71
72It's possible to disable the NMI watchdog in run-time by writing "0" to
73/proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable
74the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter
75at boot time.
76
77NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
78on x86 SMP boxes.
79
80[ feel free to send bug reports, suggestions and patches to
81 Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
82 list at <linux-smp@vger.kernel.org> ]
83
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
new file mode 100644
index 000000000000..d93f3c00f245
--- /dev/null
+++ b/Documentation/static-keys.txt
@@ -0,0 +1,286 @@
1 Static Keys
2 -----------
3
4By: Jason Baron <jbaron@redhat.com>
5
60) Abstract
7
8Static keys allows the inclusion of seldom used features in
9performance-sensitive fast-path kernel code, via a GCC feature and a code
10patching technique. A quick example:
11
12 struct static_key key = STATIC_KEY_INIT_FALSE;
13
14 ...
15
16 if (static_key_false(&key))
17 do unlikely code
18 else
19 do likely code
20
21 ...
22 static_key_slow_inc();
23 ...
24 static_key_slow_inc();
25 ...
26
27The static_key_false() branch will be generated into the code with as little
28impact to the likely code path as possible.
29
30
311) Motivation
32
33
34Currently, tracepoints are implemented using a conditional branch. The
35conditional check requires checking a global variable for each tracepoint.
36Although the overhead of this check is small, it increases when the memory
37cache comes under pressure (memory cache lines for these global variables may
38be shared with other memory accesses). As we increase the number of tracepoints
39in the kernel this overhead may become more of an issue. In addition,
40tracepoints are often dormant (disabled) and provide no direct kernel
41functionality. Thus, it is highly desirable to reduce their impact as much as
42possible. Although tracepoints are the original motivation for this work, other
43kernel code paths should be able to make use of the static keys facility.
44
45
462) Solution
47
48
49gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label:
50
51http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html
52
53Using the 'asm goto', we can create branches that are either taken or not taken
54by default, without the need to check memory. Then, at run-time, we can patch
55the branch site to change the branch direction.
56
57For example, if we have a simple branch that is disabled by default:
58
59 if (static_key_false(&key))
60 printk("I am the true branch\n");
61
62Thus, by default the 'printk' will not be emitted. And the code generated will
63consist of a single atomic 'no-op' instruction (5 bytes on x86), in the
64straight-line code path. When the branch is 'flipped', we will patch the
65'no-op' in the straight-line codepath with a 'jump' instruction to the
66out-of-line true branch. Thus, changing branch direction is expensive but
67branch selection is basically 'free'. That is the basic tradeoff of this
68optimization.
69
70This lowlevel patching mechanism is called 'jump label patching', and it gives
71the basis for the static keys facility.
72
733) Static key label API, usage and examples:
74
75
76In order to make use of this optimization you must first define a key:
77
78 struct static_key key;
79
80Which is initialized as:
81
82 struct static_key key = STATIC_KEY_INIT_TRUE;
83
84or:
85
86 struct static_key key = STATIC_KEY_INIT_FALSE;
87
88If the key is not initialized, it is default false. The 'struct static_key',
89must be a 'global'. That is, it can't be allocated on the stack or dynamically
90allocated at run-time.
91
92The key is then used in code as:
93
94 if (static_key_false(&key))
95 do unlikely code
96 else
97 do likely code
98
99Or:
100
101 if (static_key_true(&key))
102 do likely code
103 else
104 do unlikely code
105
106A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
107'static_key_false()' construct. Likewise, a key initialized via
108'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
109single key can be used in many branches, but all the branches must match the
110way that the key has been initialized.
111
112The branch(es) can then be switched via:
113
114 static_key_slow_inc(&key);
115 ...
116 static_key_slow_dec(&key);
117
118Thus, 'static_key_slow_inc()' means 'make the branch true', and
119'static_key_slow_dec()' means 'make the the branch false' with appropriate
120reference counting. For example, if the key is initialized true, a
121static_key_slow_dec(), will switch the branch to false. And a subsequent
122static_key_slow_inc(), will change the branch back to true. Likewise, if the
123key is initialized false, a 'static_key_slow_inc()', will change the branch to
124true. And then a 'static_key_slow_dec()', will again make the branch false.
125
126An example usage in the kernel is the implementation of tracepoints:
127
128 static inline void trace_##name(proto) \
129 { \
130 if (static_key_false(&__tracepoint_##name.key)) \
131 __DO_TRACE(&__tracepoint_##name, \
132 TP_PROTO(data_proto), \
133 TP_ARGS(data_args), \
134 TP_CONDITION(cond)); \
135 }
136
137Tracepoints are disabled by default, and can be placed in performance critical
138pieces of the kernel. Thus, by using a static key, the tracepoints can have
139absolutely minimal impact when not in use.
140
141
1424) Architecture level code patching interface, 'jump labels'
143
144
145There are a few functions and macros that architectures must implement in order
146to take advantage of this optimization. If there is no architecture support, we
147simply fall back to a traditional, load, test, and jump sequence.
148
149* select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
150
151* #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
152
153* __always_inline bool arch_static_branch(struct static_key *key), see:
154 arch/x86/include/asm/jump_label.h
155
156* void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
157 see: arch/x86/kernel/jump_label.c
158
159* __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type),
160 see: arch/x86/kernel/jump_label.c
161
162
163* struct jump_entry, see: arch/x86/include/asm/jump_label.h
164
165
1665) Static keys / jump label analysis, results (x86_64):
167
168
169As an example, let's add the following branch to 'getppid()', such that the
170system call now looks like:
171
172SYSCALL_DEFINE0(getppid)
173{
174 int pid;
175
176+ if (static_key_false(&key))
177+ printk("I am the true branch\n");
178
179 rcu_read_lock();
180 pid = task_tgid_vnr(rcu_dereference(current->real_parent));
181 rcu_read_unlock();
182
183 return pid;
184}
185
186The resulting instructions with jump labels generated by GCC is:
187
188ffffffff81044290 <sys_getppid>:
189ffffffff81044290: 55 push %rbp
190ffffffff81044291: 48 89 e5 mov %rsp,%rbp
191ffffffff81044294: e9 00 00 00 00 jmpq ffffffff81044299 <sys_getppid+0x9>
192ffffffff81044299: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
193ffffffff810442a0: 00 00
194ffffffff810442a2: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
195ffffffff810442a9: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
196ffffffff810442b0: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
197ffffffff810442b7: e8 f4 d9 00 00 callq ffffffff81051cb0 <pid_vnr>
198ffffffff810442bc: 5d pop %rbp
199ffffffff810442bd: 48 98 cltq
200ffffffff810442bf: c3 retq
201ffffffff810442c0: 48 c7 c7 e3 54 98 81 mov $0xffffffff819854e3,%rdi
202ffffffff810442c7: 31 c0 xor %eax,%eax
203ffffffff810442c9: e8 71 13 6d 00 callq ffffffff8171563f <printk>
204ffffffff810442ce: eb c9 jmp ffffffff81044299 <sys_getppid+0x9>
205
206Without the jump label optimization it looks like:
207
208ffffffff810441f0 <sys_getppid>:
209ffffffff810441f0: 8b 05 8a 52 d8 00 mov 0xd8528a(%rip),%eax # ffffffff81dc9480 <key>
210ffffffff810441f6: 55 push %rbp
211ffffffff810441f7: 48 89 e5 mov %rsp,%rbp
212ffffffff810441fa: 85 c0 test %eax,%eax
213ffffffff810441fc: 75 27 jne ffffffff81044225 <sys_getppid+0x35>
214ffffffff810441fe: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
215ffffffff81044205: 00 00
216ffffffff81044207: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
217ffffffff8104420e: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
218ffffffff81044215: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
219ffffffff8104421c: e8 2f da 00 00 callq ffffffff81051c50 <pid_vnr>
220ffffffff81044221: 5d pop %rbp
221ffffffff81044222: 48 98 cltq
222ffffffff81044224: c3 retq
223ffffffff81044225: 48 c7 c7 13 53 98 81 mov $0xffffffff81985313,%rdi
224ffffffff8104422c: 31 c0 xor %eax,%eax
225ffffffff8104422e: e8 60 0f 6d 00 callq ffffffff81715193 <printk>
226ffffffff81044233: eb c9 jmp ffffffff810441fe <sys_getppid+0xe>
227ffffffff81044235: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1)
228ffffffff8104423c: 00 00 00 00
229
230Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction
231vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched
232to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump
233label case adds:
234
2356 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
236
237If we then include the padding bytes, the jump label code saves, 16 total bytes
238of instruction memory for this small fucntion. In this case the non-jump label
239function is 80 bytes long. Thus, we have have saved 20% of the instruction
240footprint. We can in fact improve this even further, since the 5-byte no-op
241really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp.
242However, we have not yet implemented optimal no-op sizes (they are currently
243hard-coded).
244
245Since there are a number of static key API uses in the scheduler paths,
246'pipe-test' (also known as 'perf bench sched pipe') can be used to show the
247performance improvement. Testing done on 3.3.0-rc2:
248
249jump label disabled:
250
251 Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
252
253 855.700314 task-clock # 0.534 CPUs utilized ( +- 0.11% )
254 200,003 context-switches # 0.234 M/sec ( +- 0.00% )
255 0 CPU-migrations # 0.000 M/sec ( +- 39.58% )
256 487 page-faults # 0.001 M/sec ( +- 0.02% )
257 1,474,374,262 cycles # 1.723 GHz ( +- 0.17% )
258 <not supported> stalled-cycles-frontend
259 <not supported> stalled-cycles-backend
260 1,178,049,567 instructions # 0.80 insns per cycle ( +- 0.06% )
261 208,368,926 branches # 243.507 M/sec ( +- 0.06% )
262 5,569,188 branch-misses # 2.67% of all branches ( +- 0.54% )
263
264 1.601607384 seconds time elapsed ( +- 0.07% )
265
266jump label enabled:
267
268 Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
269
270 841.043185 task-clock # 0.533 CPUs utilized ( +- 0.12% )
271 200,004 context-switches # 0.238 M/sec ( +- 0.00% )
272 0 CPU-migrations # 0.000 M/sec ( +- 40.87% )
273 487 page-faults # 0.001 M/sec ( +- 0.05% )
274 1,432,559,428 cycles # 1.703 GHz ( +- 0.18% )
275 <not supported> stalled-cycles-frontend
276 <not supported> stalled-cycles-backend
277 1,175,363,994 instructions # 0.82 insns per cycle ( +- 0.04% )
278 206,859,359 branches # 245.956 M/sec ( +- 0.04% )
279 4,884,119 branch-misses # 2.36% of all branches ( +- 0.85% )
280
281 1.579384366 seconds time elapsed
282
283The percentage of saved branches is .7%, and we've saved 12% on
284'branch-misses'. This is where we would expect to get the most savings, since
285this optimization is about reducing the number of branches. In addition, we've
286saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 1ebc24cf9a55..6f51fed45f2d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -226,6 +226,13 @@ Here is the list of current tracers that may be configured.
226 Traces and records the max latency that it takes for 226 Traces and records the max latency that it takes for
227 the highest priority task to get scheduled after 227 the highest priority task to get scheduled after
228 it has been woken up. 228 it has been woken up.
229 Traces all tasks as an average developer would expect.
230
231 "wakeup_rt"
232
233 Traces and records the max latency that it takes for just
234 RT tasks (as the current "wakeup" does). This is useful
235 for those interested in wake up timings of RT tasks.
229 236
230 "hw-branch-tracer" 237 "hw-branch-tracer"
231 238
diff --git a/arch/Kconfig b/arch/Kconfig
index 4f55c736be11..5b448a74d0f7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -47,18 +47,29 @@ config KPROBES
47 If in doubt, say "N". 47 If in doubt, say "N".
48 48
49config JUMP_LABEL 49config JUMP_LABEL
50 bool "Optimize trace point call sites" 50 bool "Optimize very unlikely/likely branches"
51 depends on HAVE_ARCH_JUMP_LABEL 51 depends on HAVE_ARCH_JUMP_LABEL
52 help 52 help
53 This option enables a transparent branch optimization that
54 makes certain almost-always-true or almost-always-false branch
55 conditions even cheaper to execute within the kernel.
56
57 Certain performance-sensitive kernel code, such as trace points,
58 scheduler functionality, networking code and KVM have such
59 branches and include support for this optimization technique.
60
53 If it is detected that the compiler has support for "asm goto", 61 If it is detected that the compiler has support for "asm goto",
54 the kernel will compile trace point locations with just a 62 the kernel will compile such branches with just a nop
55 nop instruction. When trace points are enabled, the nop will 63 instruction. When the condition flag is toggled to true, the
56 be converted to a jump to the trace function. This technique 64 nop will be converted to a jump instruction to execute the
57 lowers overhead and stress on the branch prediction of the 65 conditional block of instructions.
58 processor. 66
59 67 This technique lowers overhead and stress on the branch prediction
60 On i386, options added to the compiler flags may increase 68 of the processor and generally makes the kernel faster. The update
61 the size of the kernel slightly. 69 of the condition is slower, but those are always very rare.
70
71 ( On 32-bit x86, the necessary options added to the compiler
72 flags may increase the size of the kernel slightly. )
62 73
63config OPTPROBES 74config OPTPROBES
64 def_bool y 75 def_bool y
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 8143cd7cdbfb..0dae252f7a33 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
685{ 685{
686 int err; 686 int err;
687 687
688 /* does not support taken branch sampling */
689 if (has_branch_stack(event))
690 return -EOPNOTSUPP;
691
688 switch (event->attr.type) { 692 switch (event->attr.type) {
689 case PERF_TYPE_RAW: 693 case PERF_TYPE_RAW:
690 case PERF_TYPE_HARDWARE: 694 case PERF_TYPE_HARDWARE:
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe3607989..7523340afb8a 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
12#ifndef __ARM_PERF_EVENT_H__ 12#ifndef __ARM_PERF_EVENT_H__
13#define __ARM_PERF_EVENT_H__ 13#define __ARM_PERF_EVENT_H__
14 14
15/* ARM performance counters start from 1 (in the cp15 accesses) so use the
16 * same indexes here for consistency. */
17#define PERF_EVENT_INDEX_OFFSET 1
18
19/* ARM perf PMU IDs for use by internal perf clients. */ 15/* ARM perf PMU IDs for use by internal perf clients. */
20enum arm_perf_pmu_ids { 16enum arm_perf_pmu_ids {
21 ARM_PERF_PMU_ID_XSCALE1 = 0, 17 ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index b2abfa18f137..8a89d3b7626b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
539 int err = 0; 539 int err = 0;
540 atomic_t *active_events = &armpmu->active_events; 540 atomic_t *active_events = &armpmu->active_events;
541 541
542 /* does not support taken branch sampling */
543 if (has_branch_stack(event))
544 return -EOPNOTSUPP;
545
542 if (armpmu->map_event(event) == -ENOENT) 546 if (armpmu->map_event(event) == -ENOENT)
543 return -ENOENT; 547 return -ENOENT;
544 548
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e0155d146..c52ea5546b5b 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
12#ifndef _ASM_PERF_EVENT_H 12#ifndef _ASM_PERF_EVENT_H
13#define _ASM_PERF_EVENT_H 13#define _ASM_PERF_EVENT_H
14 14
15#define PERF_EVENT_INDEX_OFFSET 0
16
17#endif /* _ASM_PERF_EVENT_H */ 15#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f91180..8b8526b491c7 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
19#ifndef _ASM_PERF_EVENT_H 19#ifndef _ASM_PERF_EVENT_H
20#define _ASM_PERF_EVENT_H 20#define _ASM_PERF_EVENT_H
21 21
22#define PERF_EVENT_INDEX_OFFSET 0
23
24#endif /* _ASM_PERF_EVENT_H */ 22#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 32551d304cd7..b149b88ea795 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu)
281 pv_time_ops.init_missing_ticks_accounting(cpu); 281 pv_time_ops.init_missing_ticks_accounting(cpu);
282} 282}
283 283
284struct jump_label_key; 284struct static_key;
285extern struct jump_label_key paravirt_steal_enabled; 285extern struct static_key paravirt_steal_enabled;
286extern struct jump_label_key paravirt_steal_rq_enabled; 286extern struct static_key paravirt_steal_rq_enabled;
287 287
288static inline int 288static inline int
289paravirt_do_steal_accounting(unsigned long *new_itm) 289paravirt_do_steal_accounting(unsigned long *new_itm)
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 100868216c55..1b22f6de2932 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = {
634 * pv_time_ops 634 * pv_time_ops
635 * time operations 635 * time operations
636 */ 636 */
637struct jump_label_key paravirt_steal_enabled; 637struct static_key paravirt_steal_enabled;
638struct jump_label_key paravirt_steal_rq_enabled; 638struct static_key paravirt_steal_rq_enabled;
639 639
640static int 640static int
641ia64_native_do_steal_accounting(unsigned long *new_itm) 641ia64_native_do_steal_accounting(unsigned long *new_itm)
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 1881b316ca45..4d6d77ed9b9d 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,7 +20,7 @@
20#define WORD_INSN ".word" 20#define WORD_INSN ".word"
21#endif 21#endif
22 22
23static __always_inline bool arch_static_branch(struct jump_label_key *key) 23static __always_inline bool arch_static_branch(struct static_key *key)
24{ 24{
25 asm goto("1:\tnop\n\t" 25 asm goto("1:\tnop\n\t"
26 "nop\n\t" 26 "nop\n\t"
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index e3b897acfbc0..811084f4e422 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
606{ 606{
607 int err = 0; 607 int err = 0;
608 608
609 /* does not support taken branch sampling */
610 if (has_branch_stack(event))
611 return -EOPNOTSUPP;
612
609 switch (event->attr.type) { 613 switch (event->attr.type) {
610 case PERF_TYPE_RAW: 614 case PERF_TYPE_RAW:
611 case PERF_TYPE_HARDWARE: 615 case PERF_TYPE_HARDWARE:
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 938986e412f1..ae098c438f00 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@
17#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) 17#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
18#define JUMP_LABEL_NOP_SIZE 4 18#define JUMP_LABEL_NOP_SIZE 4
19 19
20static __always_inline bool arch_static_branch(struct jump_label_key *key) 20static __always_inline bool arch_static_branch(struct static_key *key)
21{ 21{
22 asm goto("1:\n\t" 22 asm goto("1:\n\t"
23 "nop\n\t" 23 "nop\n\t"
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df1208d23..1a8093fa8f71 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@ struct pt_regs;
61extern unsigned long perf_misc_flags(struct pt_regs *regs); 61extern unsigned long perf_misc_flags(struct pt_regs *regs);
62extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 62extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
63 63
64#define PERF_EVENT_INDEX_OFFSET 1
65
66/* 64/*
67 * Only override the default definitions in include/linux/perf_event.h 65 * Only override the default definitions in include/linux/perf_event.h
68 * if we have hardware PMU support. 66 * if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 64483fde95c6..c2e27ede07ec 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
1084 if (!ppmu) 1084 if (!ppmu)
1085 return -ENOENT; 1085 return -ENOENT;
1086 1086
1087 /* does not support taken branch sampling */
1088 if (has_branch_stack(event))
1089 return -EOPNOTSUPP;
1090
1087 switch (event->attr.type) { 1091 switch (event->attr.type) {
1088 case PERF_TYPE_HARDWARE: 1092 case PERF_TYPE_HARDWARE:
1089 ev = event->attr.config; 1093 ev = event->attr.config;
@@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event)
1193 return err; 1197 return err;
1194} 1198}
1195 1199
1200static int power_pmu_event_idx(struct perf_event *event)
1201{
1202 return event->hw.idx;
1203}
1204
1196struct pmu power_pmu = { 1205struct pmu power_pmu = {
1197 .pmu_enable = power_pmu_enable, 1206 .pmu_enable = power_pmu_enable,
1198 .pmu_disable = power_pmu_disable, 1207 .pmu_disable = power_pmu_disable,
@@ -1205,6 +1214,7 @@ struct pmu power_pmu = {
1205 .start_txn = power_pmu_start_txn, 1214 .start_txn = power_pmu_start_txn,
1206 .cancel_txn = power_pmu_cancel_txn, 1215 .cancel_txn = power_pmu_cancel_txn,
1207 .commit_txn = power_pmu_commit_txn, 1216 .commit_txn = power_pmu_commit_txn,
1217 .event_idx = power_pmu_event_idx,
1208}; 1218};
1209 1219
1210/* 1220/*
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 95a6cf2b5b67..6c32190dc73e 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -13,7 +13,7 @@
13#define ASM_ALIGN ".balign 4" 13#define ASM_ALIGN ".balign 4"
14#endif 14#endif
15 15
16static __always_inline bool arch_static_branch(struct jump_label_key *key) 16static __always_inline bool arch_static_branch(struct static_key *key)
17{ 17{
18 asm goto("0: brcl 0,0\n" 18 asm goto("0: brcl 0,0\n"
19 ".pushsection __jump_table, \"aw\"\n" 19 ".pushsection __jump_table, \"aw\"\n"
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168d2718..4eb444edbe49 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
6 6
7/* Empty, just to avoid compiling error */ 7/* Empty, just to avoid compiling error */
8 8
9#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 10b14e3a7eb8..068b8a2759b5 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
310{ 310{
311 int err; 311 int err;
312 312
313 /* does not support taken branch sampling */
314 if (has_branch_stack(event))
315 return -EOPNOTSUPP;
316
313 switch (event->attr.type) { 317 switch (event->attr.type) {
314 case PERF_TYPE_RAW: 318 case PERF_TYPE_RAW:
315 case PERF_TYPE_HW_CACHE: 319 case PERF_TYPE_HW_CACHE:
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index fc73a82366f8..5080d16a832f 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,7 +7,7 @@
7 7
8#define JUMP_LABEL_NOP_SIZE 4 8#define JUMP_LABEL_NOP_SIZE 4
9 9
10static __always_inline bool arch_static_branch(struct jump_label_key *key) 10static __always_inline bool arch_static_branch(struct static_key *key)
11{ 11{
12 asm goto("1:\n\t" 12 asm goto("1:\n\t"
13 "nop\n\t" 13 "nop\n\t"
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 614da624330c..8e16a4a21582 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
1105 if (atomic_read(&nmi_active) < 0) 1105 if (atomic_read(&nmi_active) < 0)
1106 return -ENODEV; 1106 return -ENODEV;
1107 1107
1108 /* does not support taken branch sampling */
1109 if (has_branch_stack(event))
1110 return -EOPNOTSUPP;
1111
1108 switch (attr->type) { 1112 switch (attr->type) {
1109 case PERF_TYPE_HARDWARE: 1113 case PERF_TYPE_HARDWARE:
1110 if (attr->config >= sparc_pmu->max_events) 1114 if (attr->config >= sparc_pmu->max_events)
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063e3e32..74a2e312e8a2 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
97 97
98/* Attribute search APIs */ 98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); 99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
100extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, 101extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
101 insn_byte_t last_pfx, 102 int lpfx_id,
102 insn_attr_t esc_attr); 103 insn_attr_t esc_attr);
103extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, 104extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
104 insn_byte_t last_pfx, 105 int lpfx_id,
105 insn_attr_t esc_attr); 106 insn_attr_t esc_attr);
106extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, 107extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
107 insn_byte_t vex_m, 108 insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1eddfd..48eb30a86062 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ 96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ 97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
98 98
99/* The last prefix is needed for two-byte and three-byte opcodes */
100static inline insn_byte_t insn_last_prefix(struct insn *insn)
101{
102 return insn->prefixes.bytes[3];
103}
104
105extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); 99extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
106extern void insn_get_prefixes(struct insn *insn); 100extern void insn_get_prefixes(struct insn *insn);
107extern void insn_get_opcode(struct insn *insn); 101extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
160 return X86_VEX_P(insn->vex_prefix.bytes[2]); 154 return X86_VEX_P(insn->vex_prefix.bytes[2]);
161} 155}
162 156
157/* Get the last prefix id from last prefix or VEX prefix */
158static inline int insn_last_prefix_id(struct insn *insn)
159{
160 if (insn_is_avx(insn))
161 return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
162
163 if (insn->prefixes.bytes[3])
164 return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
165
166 return 0;
167}
168
163/* Offset of each field from kaddr */ 169/* Offset of each field from kaddr */
164static inline int insn_offset_rex_prefix(struct insn *insn) 170static inline int insn_offset_rex_prefix(struct insn *insn)
165{ 171{
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18ce6ead..3a16c1483b45 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
9 9
10#define JUMP_LABEL_NOP_SIZE 5 10#define JUMP_LABEL_NOP_SIZE 5
11 11
12#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" 12#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
13 13
14static __always_inline bool arch_static_branch(struct jump_label_key *key) 14static __always_inline bool arch_static_branch(struct static_key *key)
15{ 15{
16 asm goto("1:" 16 asm goto("1:"
17 JUMP_LABEL_INITIAL_NOP 17 STATIC_KEY_INITIAL_NOP
18 ".pushsection __jump_table, \"aw\" \n\t" 18 ".pushsection __jump_table, \"aw\" \n\t"
19 _ASM_ALIGN "\n\t" 19 _ASM_ALIGN "\n\t"
20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t" 20 _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9161a0..ccb805966f68 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
56#define MSR_OFFCORE_RSP_0 0x000001a6 56#define MSR_OFFCORE_RSP_0 0x000001a6
57#define MSR_OFFCORE_RSP_1 0x000001a7 57#define MSR_OFFCORE_RSP_1 0x000001a7
58 58
59#define MSR_LBR_SELECT 0x000001c8
60#define MSR_LBR_TOS 0x000001c9
61#define MSR_LBR_NHM_FROM 0x00000680
62#define MSR_LBR_NHM_TO 0x000006c0
63#define MSR_LBR_CORE_FROM 0x00000040
64#define MSR_LBR_CORE_TO 0x00000060
65
59#define MSR_IA32_PEBS_ENABLE 0x000003f1 66#define MSR_IA32_PEBS_ENABLE 0x000003f1
60#define MSR_IA32_DS_AREA 0x00000600 67#define MSR_IA32_DS_AREA 0x00000600
61#define MSR_IA32_PERF_CAPABILITIES 0x00000345 68#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9a74fb..c0180fd372d2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void)
230 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 230 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
231} 231}
232 232
233struct jump_label_key; 233struct static_key;
234extern struct jump_label_key paravirt_steal_enabled; 234extern struct static_key paravirt_steal_enabled;
235extern struct jump_label_key paravirt_steal_rq_enabled; 235extern struct static_key paravirt_steal_rq_enabled;
236 236
237static inline u64 paravirt_steal_clock(int cpu) 237static inline u64 paravirt_steal_clock(int cpu)
238{ 238{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 461ce432b1c2..e8fb2c7a5f4f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
188#ifdef CONFIG_PERF_EVENTS 188#ifdef CONFIG_PERF_EVENTS
189extern void perf_events_lapic_init(void); 189extern void perf_events_lapic_init(void);
190 190
191#define PERF_EVENT_INDEX_OFFSET 0
192
193/* 191/*
194 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. 192 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
195 * This flag is otherwise unused and ABI specified to be 0, so nobody should 193 * This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369059c07a9..532d2e090e6f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
71obj-$(CONFIG_KPROBES) += kprobes.o 71obj-$(CONFIG_KPROBES) += kprobes.o
72obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
72obj-$(CONFIG_MODULES) += module.o 73obj-$(CONFIG_MODULES) += module.o
73obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 74obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
74obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4aae35..0a44b90602b0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6 6
7#include <linux/io.h> 7#include <linux/io.h>
8#include <linux/sched.h>
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/cpu.h> 11#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
456 if (c->x86_power & (1 << 8)) { 457 if (c->x86_power & (1 << 8)) {
457 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 458 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
458 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 459 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
460 if (!check_tsc_unstable())
461 sched_clock_stable = 1;
459 } 462 }
460 463
461#ifdef CONFIG_X86_64 464#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce1040b11..0a18d16cb58d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/device.h>
27 28
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
31#include <asm/compat.h> 32#include <asm/compat.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
33#include <asm/alternative.h> 34#include <asm/alternative.h>
35#include <asm/timer.h>
34 36
35#include "perf_event.h" 37#include "perf_event.h"
36 38
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
351 return 0; 353 return 0;
352} 354}
353 355
356/*
357 * check that branch_sample_type is compatible with
358 * settings needed for precise_ip > 1 which implies
359 * using the LBR to capture ALL taken branches at the
360 * priv levels of the measurement
361 */
362static inline int precise_br_compat(struct perf_event *event)
363{
364 u64 m = event->attr.branch_sample_type;
365 u64 b = 0;
366
367 /* must capture all branches */
368 if (!(m & PERF_SAMPLE_BRANCH_ANY))
369 return 0;
370
371 m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
372
373 if (!event->attr.exclude_user)
374 b |= PERF_SAMPLE_BRANCH_USER;
375
376 if (!event->attr.exclude_kernel)
377 b |= PERF_SAMPLE_BRANCH_KERNEL;
378
379 /*
380 * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
381 */
382
383 return m == b;
384}
385
354int x86_pmu_hw_config(struct perf_event *event) 386int x86_pmu_hw_config(struct perf_event *event)
355{ 387{
356 if (event->attr.precise_ip) { 388 if (event->attr.precise_ip) {
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
367 399
368 if (event->attr.precise_ip > precise) 400 if (event->attr.precise_ip > precise)
369 return -EOPNOTSUPP; 401 return -EOPNOTSUPP;
402 /*
403 * check that PEBS LBR correction does not conflict with
404 * whatever the user is asking with attr->branch_sample_type
405 */
406 if (event->attr.precise_ip > 1) {
407 u64 *br_type = &event->attr.branch_sample_type;
408
409 if (has_branch_stack(event)) {
410 if (!precise_br_compat(event))
411 return -EOPNOTSUPP;
412
413 /* branch_sample_type is compatible */
414
415 } else {
416 /*
417 * user did not specify branch_sample_type
418 *
419 * For PEBS fixups, we capture all
420 * the branches at the priv level of the
421 * event.
422 */
423 *br_type = PERF_SAMPLE_BRANCH_ANY;
424
425 if (!event->attr.exclude_user)
426 *br_type |= PERF_SAMPLE_BRANCH_USER;
427
428 if (!event->attr.exclude_kernel)
429 *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
430 }
431 }
370 } 432 }
371 433
372 /* 434 /*
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
424 /* mark unused */ 486 /* mark unused */
425 event->hw.extra_reg.idx = EXTRA_REG_NONE; 487 event->hw.extra_reg.idx = EXTRA_REG_NONE;
426 488
489 /* mark not used */
490 event->hw.extra_reg.idx = EXTRA_REG_NONE;
491 event->hw.branch_reg.idx = EXTRA_REG_NONE;
492
427 return x86_pmu.hw_config(event); 493 return x86_pmu.hw_config(event);
428} 494}
429 495
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1210 break; 1276 break;
1211 1277
1212 case CPU_STARTING: 1278 case CPU_STARTING:
1279 if (x86_pmu.attr_rdpmc)
1280 set_in_cr4(X86_CR4_PCE);
1213 if (x86_pmu.cpu_starting) 1281 if (x86_pmu.cpu_starting)
1214 x86_pmu.cpu_starting(cpu); 1282 x86_pmu.cpu_starting(cpu);
1215 break; 1283 break;
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void)
1319 } 1387 }
1320 } 1388 }
1321 1389
1390 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1391
1322 pr_info("... version: %d\n", x86_pmu.version); 1392 pr_info("... version: %d\n", x86_pmu.version);
1323 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1393 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1324 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1394 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event)
1542 return err; 1612 return err;
1543} 1613}
1544 1614
1615static int x86_pmu_event_idx(struct perf_event *event)
1616{
1617 int idx = event->hw.idx;
1618
1619 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1620 idx -= X86_PMC_IDX_FIXED;
1621 idx |= 1 << 30;
1622 }
1623
1624 return idx + 1;
1625}
1626
1627static ssize_t get_attr_rdpmc(struct device *cdev,
1628 struct device_attribute *attr,
1629 char *buf)
1630{
1631 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
1632}
1633
1634static void change_rdpmc(void *info)
1635{
1636 bool enable = !!(unsigned long)info;
1637
1638 if (enable)
1639 set_in_cr4(X86_CR4_PCE);
1640 else
1641 clear_in_cr4(X86_CR4_PCE);
1642}
1643
1644static ssize_t set_attr_rdpmc(struct device *cdev,
1645 struct device_attribute *attr,
1646 const char *buf, size_t count)
1647{
1648 unsigned long val = simple_strtoul(buf, NULL, 0);
1649
1650 if (!!val != !!x86_pmu.attr_rdpmc) {
1651 x86_pmu.attr_rdpmc = !!val;
1652 smp_call_function(change_rdpmc, (void *)val, 1);
1653 }
1654
1655 return count;
1656}
1657
1658static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
1659
1660static struct attribute *x86_pmu_attrs[] = {
1661 &dev_attr_rdpmc.attr,
1662 NULL,
1663};
1664
1665static struct attribute_group x86_pmu_attr_group = {
1666 .attrs = x86_pmu_attrs,
1667};
1668
1669static const struct attribute_group *x86_pmu_attr_groups[] = {
1670 &x86_pmu_attr_group,
1671 NULL,
1672};
1673
1674static void x86_pmu_flush_branch_stack(void)
1675{
1676 if (x86_pmu.flush_branch_stack)
1677 x86_pmu.flush_branch_stack();
1678}
1679
1545static struct pmu pmu = { 1680static struct pmu pmu = {
1546 .pmu_enable = x86_pmu_enable, 1681 .pmu_enable = x86_pmu_enable,
1547 .pmu_disable = x86_pmu_disable, 1682 .pmu_disable = x86_pmu_disable,
1683
1684 .attr_groups = x86_pmu_attr_groups,
1548 1685
1549 .event_init = x86_pmu_event_init, 1686 .event_init = x86_pmu_event_init,
1550 1687
1551 .add = x86_pmu_add, 1688 .add = x86_pmu_add,
1552 .del = x86_pmu_del, 1689 .del = x86_pmu_del,
1553 .start = x86_pmu_start, 1690 .start = x86_pmu_start,
1554 .stop = x86_pmu_stop, 1691 .stop = x86_pmu_stop,
1555 .read = x86_pmu_read, 1692 .read = x86_pmu_read,
1556 1693
1557 .start_txn = x86_pmu_start_txn, 1694 .start_txn = x86_pmu_start_txn,
1558 .cancel_txn = x86_pmu_cancel_txn, 1695 .cancel_txn = x86_pmu_cancel_txn,
1559 .commit_txn = x86_pmu_commit_txn, 1696 .commit_txn = x86_pmu_commit_txn,
1697
1698 .event_idx = x86_pmu_event_idx,
1699 .flush_branch_stack = x86_pmu_flush_branch_stack,
1560}; 1700};
1561 1701
1702void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
1703{
1704 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1705 return;
1706
1707 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1708 return;
1709
1710 userpg->time_mult = this_cpu_read(cyc2ns);
1711 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1712 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1713}
1714
1562/* 1715/*
1563 * callchain support 1716 * callchain support
1564 */ 1717 */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c30c807ddc72..8484e77c211e 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
33 33
34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
36 EXTRA_REG_LBR = 2, /* lbr_select */
36 37
37 EXTRA_REG_MAX /* number of entries needed */ 38 EXTRA_REG_MAX /* number of entries needed */
38}; 39};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
130 void *lbr_context; 131 void *lbr_context;
131 struct perf_branch_stack lbr_stack; 132 struct perf_branch_stack lbr_stack;
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 133 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
134 struct er_account *lbr_sel;
135 u64 br_sel;
133 136
134 /* 137 /*
135 * Intel host/guest exclude bits 138 * Intel host/guest exclude bits
@@ -268,6 +271,29 @@ struct x86_pmu_quirk {
268 void (*func)(void); 271 void (*func)(void);
269}; 272};
270 273
274union x86_pmu_config {
275 struct {
276 u64 event:8,
277 umask:8,
278 usr:1,
279 os:1,
280 edge:1,
281 pc:1,
282 interrupt:1,
283 __reserved1:1,
284 en:1,
285 inv:1,
286 cmask:8,
287 event2:4,
288 __reserved2:4,
289 go:1,
290 ho:1;
291 } bits;
292 u64 value;
293};
294
295#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
296
271/* 297/*
272 * struct x86_pmu - generic x86 pmu 298 * struct x86_pmu - generic x86 pmu
273 */ 299 */
@@ -309,10 +335,19 @@ struct x86_pmu {
309 struct x86_pmu_quirk *quirks; 335 struct x86_pmu_quirk *quirks;
310 int perfctr_second_write; 336 int perfctr_second_write;
311 337
338 /*
339 * sysfs attrs
340 */
341 int attr_rdpmc;
342
343 /*
344 * CPU Hotplug hooks
345 */
312 int (*cpu_prepare)(int cpu); 346 int (*cpu_prepare)(int cpu);
313 void (*cpu_starting)(int cpu); 347 void (*cpu_starting)(int cpu);
314 void (*cpu_dying)(int cpu); 348 void (*cpu_dying)(int cpu);
315 void (*cpu_dead)(int cpu); 349 void (*cpu_dead)(int cpu);
350 void (*flush_branch_stack)(void);
316 351
317 /* 352 /*
318 * Intel Arch Perfmon v2+ 353 * Intel Arch Perfmon v2+
@@ -334,6 +369,8 @@ struct x86_pmu {
334 */ 369 */
335 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 370 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
336 int lbr_nr; /* hardware stack size */ 371 int lbr_nr; /* hardware stack size */
372 u64 lbr_sel_mask; /* LBR_SELECT valid bits */
373 const int *lbr_sel_map; /* lbr_select mappings */
337 374
338 /* 375 /*
339 * Extra registers for events 376 * Extra registers for events
@@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint;
447 484
448extern struct event_constraint unconstrained; 485extern struct event_constraint unconstrained;
449 486
487static inline bool kernel_ip(unsigned long ip)
488{
489#ifdef CONFIG_X86_32
490 return ip > PAGE_OFFSET;
491#else
492 return (long)ip < 0;
493#endif
494}
495
450#ifdef CONFIG_CPU_SUP_AMD 496#ifdef CONFIG_CPU_SUP_AMD
451 497
452int amd_pmu_init(void); 498int amd_pmu_init(void);
@@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
527 573
528void intel_pmu_lbr_init_atom(void); 574void intel_pmu_lbr_init_atom(void);
529 575
576void intel_pmu_lbr_init_snb(void);
577
578int intel_pmu_setup_lbr_filter(struct perf_event *event);
579
530int p4_pmu_init(void); 580int p4_pmu_init(void);
531 581
532int p6_pmu_init(void); 582int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67250a52430b..dd002faff7a6 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
139 if (ret) 139 if (ret)
140 return ret; 140 return ret;
141 141
142 if (has_branch_stack(event))
143 return -EOPNOTSUPP;
144
142 if (event->attr.exclude_host && event->attr.exclude_guest) 145 if (event->attr.exclude_host && event->attr.exclude_guest)
143 /* 146 /*
144 * When HO == GO == 1 the hardware treats that as GO == HO == 0 147 * When HO == GO == 1 the hardware treats that as GO == HO == 0
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 61d4f79a550e..6a84e7f28f05 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
728 }, 728 },
729}; 729};
730 730
731static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
732{
733 /* user explicitly requested branch sampling */
734 if (has_branch_stack(event))
735 return true;
736
737 /* implicit branch sampling to correct PEBS skid */
738 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
739 return true;
740
741 return false;
742}
743
731static void intel_pmu_disable_all(void) 744static void intel_pmu_disable_all(void)
732{ 745{
733 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
882 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 895 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
883 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 896 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
884 897
898 /*
899 * must disable before any actual event
900 * because any event may be combined with LBR
901 */
902 if (intel_pmu_needs_lbr_smpl(event))
903 intel_pmu_lbr_disable(event);
904
885 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 905 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
886 intel_pmu_disable_fixed(hwc); 906 intel_pmu_disable_fixed(hwc);
887 return; 907 return;
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
936 intel_pmu_enable_bts(hwc->config); 956 intel_pmu_enable_bts(hwc->config);
937 return; 957 return;
938 } 958 }
959 /*
960 * must enabled before any actual event
961 * because any event may be combined with LBR
962 */
963 if (intel_pmu_needs_lbr_smpl(event))
964 intel_pmu_lbr_enable(event);
939 965
940 if (event->attr.exclude_host) 966 if (event->attr.exclude_host)
941 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 967 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1058,6 +1084,9 @@ again:
1058 1084
1059 data.period = event->hw.last_period; 1085 data.period = event->hw.last_period;
1060 1086
1087 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack;
1089
1061 if (perf_event_overflow(event, &data, regs)) 1090 if (perf_event_overflow(event, &data, regs))
1062 x86_pmu_stop(event, 0); 1091 x86_pmu_stop(event, 0);
1063 } 1092 }
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1124 */ 1153 */
1125static struct event_constraint * 1154static struct event_constraint *
1126__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 1155__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1127 struct perf_event *event) 1156 struct perf_event *event,
1157 struct hw_perf_event_extra *reg)
1128{ 1158{
1129 struct event_constraint *c = &emptyconstraint; 1159 struct event_constraint *c = &emptyconstraint;
1130 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1131 struct er_account *era; 1160 struct er_account *era;
1132 unsigned long flags; 1161 unsigned long flags;
1133 int orig_idx = reg->idx; 1162 int orig_idx = reg->idx;
1134 1163
1135 /* already allocated shared msr */ 1164 /* already allocated shared msr */
1136 if (reg->alloc) 1165 if (reg->alloc)
1137 return &unconstrained; 1166 return NULL; /* call x86_get_event_constraint() */
1138 1167
1139again: 1168again:
1140 era = &cpuc->shared_regs->regs[reg->idx]; 1169 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1157,14 +1186,10 @@ again:
1157 reg->alloc = 1; 1186 reg->alloc = 1;
1158 1187
1159 /* 1188 /*
1160 * All events using extra_reg are unconstrained. 1189 * need to call x86_get_event_constraint()
1161 * Avoids calling x86_get_event_constraints() 1190 * to check if associated event has constraints
1162 *
1163 * Must revisit if extra_reg controlling events
1164 * ever have constraints. Worst case we go through
1165 * the regular event constraint table.
1166 */ 1191 */
1167 c = &unconstrained; 1192 c = NULL;
1168 } else if (intel_try_alt_er(event, orig_idx)) { 1193 } else if (intel_try_alt_er(event, orig_idx)) {
1169 raw_spin_unlock_irqrestore(&era->lock, flags); 1194 raw_spin_unlock_irqrestore(&era->lock, flags);
1170 goto again; 1195 goto again;
@@ -1201,11 +1226,23 @@ static struct event_constraint *
1201intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 1226intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1202 struct perf_event *event) 1227 struct perf_event *event)
1203{ 1228{
1204 struct event_constraint *c = NULL; 1229 struct event_constraint *c = NULL, *d;
1205 1230 struct hw_perf_event_extra *xreg, *breg;
1206 if (event->hw.extra_reg.idx != EXTRA_REG_NONE) 1231
1207 c = __intel_shared_reg_get_constraints(cpuc, event); 1232 xreg = &event->hw.extra_reg;
1208 1233 if (xreg->idx != EXTRA_REG_NONE) {
1234 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
1235 if (c == &emptyconstraint)
1236 return c;
1237 }
1238 breg = &event->hw.branch_reg;
1239 if (breg->idx != EXTRA_REG_NONE) {
1240 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
1241 if (d == &emptyconstraint) {
1242 __intel_shared_reg_put_constraints(cpuc, xreg);
1243 c = d;
1244 }
1245 }
1209 return c; 1246 return c;
1210} 1247}
1211 1248
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1253 reg = &event->hw.extra_reg; 1290 reg = &event->hw.extra_reg;
1254 if (reg->idx != EXTRA_REG_NONE) 1291 if (reg->idx != EXTRA_REG_NONE)
1255 __intel_shared_reg_put_constraints(cpuc, reg); 1292 __intel_shared_reg_put_constraints(cpuc, reg);
1293
1294 reg = &event->hw.branch_reg;
1295 if (reg->idx != EXTRA_REG_NONE)
1296 __intel_shared_reg_put_constraints(cpuc, reg);
1256} 1297}
1257 1298
1258static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1299static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
1288 * 1329 *
1289 * Thereby we gain a PEBS capable cycle counter. 1330 * Thereby we gain a PEBS capable cycle counter.
1290 */ 1331 */
1291 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ 1332 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1333
1292 1334
1293 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1335 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1294 event->hw.config = alt_config; 1336 event->hw.config = alt_config;
1295 } 1337 }
1296 1338
1339 if (intel_pmu_needs_lbr_smpl(event)) {
1340 ret = intel_pmu_setup_lbr_filter(event);
1341 if (ret)
1342 return ret;
1343 }
1344
1297 if (event->attr.type != PERF_TYPE_RAW) 1345 if (event->attr.type != PERF_TYPE_RAW)
1298 return 0; 1346 return 0;
1299 1347
@@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
1432{ 1480{
1433 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1481 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1434 1482
1435 if (!x86_pmu.extra_regs) 1483 if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
1436 return NOTIFY_OK; 1484 return NOTIFY_OK;
1437 1485
1438 cpuc->shared_regs = allocate_shared_regs(cpu); 1486 cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
1454 */ 1502 */
1455 intel_pmu_lbr_reset(); 1503 intel_pmu_lbr_reset();
1456 1504
1457 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) 1505 cpuc->lbr_sel = NULL;
1506
1507 if (!cpuc->shared_regs)
1458 return; 1508 return;
1459 1509
1460 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1510 if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
1461 struct intel_shared_regs *pc; 1511 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1512 struct intel_shared_regs *pc;
1462 1513
1463 pc = per_cpu(cpu_hw_events, i).shared_regs; 1514 pc = per_cpu(cpu_hw_events, i).shared_regs;
1464 if (pc && pc->core_id == core_id) { 1515 if (pc && pc->core_id == core_id) {
1465 cpuc->kfree_on_online = cpuc->shared_regs; 1516 cpuc->kfree_on_online = cpuc->shared_regs;
1466 cpuc->shared_regs = pc; 1517 cpuc->shared_regs = pc;
1467 break; 1518 break;
1519 }
1468 } 1520 }
1521 cpuc->shared_regs->core_id = core_id;
1522 cpuc->shared_regs->refcnt++;
1469 } 1523 }
1470 1524
1471 cpuc->shared_regs->core_id = core_id; 1525 if (x86_pmu.lbr_sel_map)
1472 cpuc->shared_regs->refcnt++; 1526 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
1473} 1527}
1474 1528
1475static void intel_pmu_cpu_dying(int cpu) 1529static void intel_pmu_cpu_dying(int cpu)
@@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
1487 fini_debug_store_on_cpu(cpu); 1541 fini_debug_store_on_cpu(cpu);
1488} 1542}
1489 1543
1544static void intel_pmu_flush_branch_stack(void)
1545{
1546 /*
1547 * Intel LBR does not tag entries with the
1548 * PID of the current task, then we need to
1549 * flush it on ctxsw
1550 * For now, we simply reset it
1551 */
1552 if (x86_pmu.lbr_nr)
1553 intel_pmu_lbr_reset();
1554}
1555
1490static __initconst const struct x86_pmu intel_pmu = { 1556static __initconst const struct x86_pmu intel_pmu = {
1491 .name = "Intel", 1557 .name = "Intel",
1492 .handle_irq = intel_pmu_handle_irq, 1558 .handle_irq = intel_pmu_handle_irq,
@@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1514 .cpu_starting = intel_pmu_cpu_starting, 1580 .cpu_starting = intel_pmu_cpu_starting,
1515 .cpu_dying = intel_pmu_cpu_dying, 1581 .cpu_dying = intel_pmu_cpu_dying,
1516 .guest_get_msrs = intel_guest_get_msrs, 1582 .guest_get_msrs = intel_guest_get_msrs,
1583 .flush_branch_stack = intel_pmu_flush_branch_stack,
1517}; 1584};
1518 1585
1519static __init void intel_clovertown_quirk(void) 1586static __init void intel_clovertown_quirk(void)
@@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void)
1690 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1757 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1691 1758
1692 /* UOPS_ISSUED.STALLED_CYCLES */ 1759 /* UOPS_ISSUED.STALLED_CYCLES */
1693 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1760 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1761 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1694 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1762 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1695 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1763 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1764 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1696 1765
1697 x86_add_quirk(intel_nehalem_quirk); 1766 x86_add_quirk(intel_nehalem_quirk);
1698 1767
@@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void)
1727 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1796 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1728 1797
1729 /* UOPS_ISSUED.STALLED_CYCLES */ 1798 /* UOPS_ISSUED.STALLED_CYCLES */
1730 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1799 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1800 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1731 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1801 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1732 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1802 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1803 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
1733 1804
1734 pr_cont("Westmere events, "); 1805 pr_cont("Westmere events, ");
1735 break; 1806 break;
@@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void)
1740 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1811 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1741 sizeof(hw_cache_event_ids)); 1812 sizeof(hw_cache_event_ids));
1742 1813
1743 intel_pmu_lbr_init_nhm(); 1814 intel_pmu_lbr_init_snb();
1744 1815
1745 x86_pmu.event_constraints = intel_snb_event_constraints; 1816 x86_pmu.event_constraints = intel_snb_event_constraints;
1746 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1817 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void)
1750 x86_pmu.er_flags |= ERF_NO_HT_SHARING; 1821 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1751 1822
1752 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1823 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1753 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1824 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
1825 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
1754 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 1826 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1755 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; 1827 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
1828 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
1756 1829
1757 pr_cont("SandyBridge events, "); 1830 pr_cont("SandyBridge events, ");
1758 break; 1831 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d6bd49faa40c..7f64df19e7dd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
3#include <linux/slab.h> 3#include <linux/slab.h>
4 4
5#include <asm/perf_event.h> 5#include <asm/perf_event.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 440 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
440 441
441 cpuc->pebs_enabled |= 1ULL << hwc->idx; 442 cpuc->pebs_enabled |= 1ULL << hwc->idx;
442
443 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
444 intel_pmu_lbr_enable(event);
445} 443}
446 444
447void intel_pmu_pebs_disable(struct perf_event *event) 445void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
454 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 452 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
455 453
456 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 454 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
457
458 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
459 intel_pmu_lbr_disable(event);
460} 455}
461 456
462void intel_pmu_pebs_enable_all(void) 457void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
475 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 470 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
476} 471}
477 472
478#include <asm/insn.h>
479
480static inline bool kernel_ip(unsigned long ip)
481{
482#ifdef CONFIG_X86_32
483 return ip > PAGE_OFFSET;
484#else
485 return (long)ip < 0;
486#endif
487}
488
489static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 473static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
490{ 474{
491 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 475 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
572 * both formats and we don't use the other fields in this 556 * both formats and we don't use the other fields in this
573 * routine. 557 * routine.
574 */ 558 */
559 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
575 struct pebs_record_core *pebs = __pebs; 560 struct pebs_record_core *pebs = __pebs;
576 struct perf_sample_data data; 561 struct perf_sample_data data;
577 struct pt_regs regs; 562 struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
602 else 587 else
603 regs.flags &= ~PERF_EFLAGS_EXACT; 588 regs.flags &= ~PERF_EFLAGS_EXACT;
604 589
590 if (has_branch_stack(event))
591 data.br_stack = &cpuc->lbr_stack;
592
605 if (perf_event_overflow(event, &data, &regs)) 593 if (perf_event_overflow(event, &data, &regs))
606 x86_pmu_stop(event, 0); 594 x86_pmu_stop(event, 0);
607} 595}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 47a7e63bfe54..520b4265fcd2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
3 3
4#include <asm/perf_event.h> 4#include <asm/perf_event.h>
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include <asm/insn.h>
6 7
7#include "perf_event.h" 8#include "perf_event.h"
8 9
@@ -14,6 +15,100 @@ enum {
14}; 15};
15 16
16/* 17/*
18 * Intel LBR_SELECT bits
19 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
20 *
21 * Hardware branch filter (not available on all CPUs)
22 */
23#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
24#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
25#define LBR_JCC_BIT 2 /* do not capture conditional branches */
26#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
27#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
28#define LBR_RETURN_BIT 5 /* do not capture near returns */
29#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
30#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
31#define LBR_FAR_BIT 8 /* do not capture far branches */
32
33#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
34#define LBR_USER (1 << LBR_USER_BIT)
35#define LBR_JCC (1 << LBR_JCC_BIT)
36#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
37#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
38#define LBR_RETURN (1 << LBR_RETURN_BIT)
39#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
40#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
41#define LBR_FAR (1 << LBR_FAR_BIT)
42
43#define LBR_PLM (LBR_KERNEL | LBR_USER)
44
45#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
46#define LBR_NOT_SUPP -1 /* LBR filter not supported */
47#define LBR_IGN 0 /* ignored */
48
49#define LBR_ANY \
50 (LBR_JCC |\
51 LBR_REL_CALL |\
52 LBR_IND_CALL |\
53 LBR_RETURN |\
54 LBR_REL_JMP |\
55 LBR_IND_JMP |\
56 LBR_FAR)
57
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
59
60#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \
62 (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
63
64/*
65 * x86control flow change classification
66 * x86control flow changes include branches, interrupts, traps, faults
67 */
68enum {
69 X86_BR_NONE = 0, /* unknown */
70
71 X86_BR_USER = 1 << 0, /* branch target is user */
72 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
73
74 X86_BR_CALL = 1 << 2, /* call */
75 X86_BR_RET = 1 << 3, /* return */
76 X86_BR_SYSCALL = 1 << 4, /* syscall */
77 X86_BR_SYSRET = 1 << 5, /* syscall return */
78 X86_BR_INT = 1 << 6, /* sw interrupt */
79 X86_BR_IRET = 1 << 7, /* return from interrupt */
80 X86_BR_JCC = 1 << 8, /* conditional */
81 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
84};
85
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
87
88#define X86_BR_ANY \
89 (X86_BR_CALL |\
90 X86_BR_RET |\
91 X86_BR_SYSCALL |\
92 X86_BR_SYSRET |\
93 X86_BR_INT |\
94 X86_BR_IRET |\
95 X86_BR_JCC |\
96 X86_BR_JMP |\
97 X86_BR_IRQ |\
98 X86_BR_IND_CALL)
99
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
101
102#define X86_BR_ANY_CALL \
103 (X86_BR_CALL |\
104 X86_BR_IND_CALL |\
105 X86_BR_SYSCALL |\
106 X86_BR_IRQ |\
107 X86_BR_INT)
108
109static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
110
111/*
17 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 112 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
18 * otherwise it becomes near impossible to get a reliable stack. 113 * otherwise it becomes near impossible to get a reliable stack.
19 */ 114 */
@@ -21,6 +116,10 @@ enum {
21static void __intel_pmu_lbr_enable(void) 116static void __intel_pmu_lbr_enable(void)
22{ 117{
23 u64 debugctl; 118 u64 debugctl;
119 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
120
121 if (cpuc->lbr_sel)
122 wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
24 123
25 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 124 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
26 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 125 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
76 * Reset the LBR stack if we changed task context to 175 * Reset the LBR stack if we changed task context to
77 * avoid data leaks. 176 * avoid data leaks.
78 */ 177 */
79
80 if (event->ctx->task && cpuc->lbr_context != event->ctx) { 178 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
81 intel_pmu_lbr_reset(); 179 intel_pmu_lbr_reset();
82 cpuc->lbr_context = event->ctx; 180 cpuc->lbr_context = event->ctx;
83 } 181 }
182 cpuc->br_sel = event->hw.branch_reg.reg;
84 183
85 cpuc->lbr_users++; 184 cpuc->lbr_users++;
86} 185}
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
95 cpuc->lbr_users--; 194 cpuc->lbr_users--;
96 WARN_ON_ONCE(cpuc->lbr_users < 0); 195 WARN_ON_ONCE(cpuc->lbr_users < 0);
97 196
98 if (cpuc->enabled && !cpuc->lbr_users) 197 if (cpuc->enabled && !cpuc->lbr_users) {
99 __intel_pmu_lbr_disable(); 198 __intel_pmu_lbr_disable();
199 /* avoid stale pointer */
200 cpuc->lbr_context = NULL;
201 }
100} 202}
101 203
102void intel_pmu_lbr_enable_all(void) 204void intel_pmu_lbr_enable_all(void)
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
115 __intel_pmu_lbr_disable(); 217 __intel_pmu_lbr_disable();
116} 218}
117 219
220/*
221 * TOS = most recently recorded branch
222 */
118static inline u64 intel_pmu_lbr_tos(void) 223static inline u64 intel_pmu_lbr_tos(void)
119{ 224{
120 u64 tos; 225 u64 tos;
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
142 247
143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 248 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
144 249
145 cpuc->lbr_entries[i].from = msr_lastbranch.from; 250 cpuc->lbr_entries[i].from = msr_lastbranch.from;
146 cpuc->lbr_entries[i].to = msr_lastbranch.to; 251 cpuc->lbr_entries[i].to = msr_lastbranch.to;
147 cpuc->lbr_entries[i].flags = 0; 252 cpuc->lbr_entries[i].mispred = 0;
253 cpuc->lbr_entries[i].predicted = 0;
254 cpuc->lbr_entries[i].reserved = 0;
148 } 255 }
149 cpuc->lbr_stack.nr = i; 256 cpuc->lbr_stack.nr = i;
150} 257}
151 258
152#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
153
154/* 259/*
155 * Due to lack of segmentation in Linux the effective address (offset) 260 * Due to lack of segmentation in Linux the effective address (offset)
156 * is the same as the linear address, allowing us to merge the LIP and EIP 261 * is the same as the linear address, allowing us to merge the LIP and EIP
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
165 270
166 for (i = 0; i < x86_pmu.lbr_nr; i++) { 271 for (i = 0; i < x86_pmu.lbr_nr; i++) {
167 unsigned long lbr_idx = (tos - i) & mask; 272 unsigned long lbr_idx = (tos - i) & mask;
168 u64 from, to, flags = 0; 273 u64 from, to, mis = 0, pred = 0;
169 274
170 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
171 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
172 277
173 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
174 flags = !!(from & LBR_FROM_FLAG_MISPRED); 279 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis;
175 from = (u64)((((s64)from) << 1) >> 1); 281 from = (u64)((((s64)from) << 1) >> 1);
176 } 282 }
177 283
178 cpuc->lbr_entries[i].from = from; 284 cpuc->lbr_entries[i].from = from;
179 cpuc->lbr_entries[i].to = to; 285 cpuc->lbr_entries[i].to = to;
180 cpuc->lbr_entries[i].flags = flags; 286 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred;
288 cpuc->lbr_entries[i].reserved = 0;
181 } 289 }
182 cpuc->lbr_stack.nr = i; 290 cpuc->lbr_stack.nr = i;
183} 291}
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
193 intel_pmu_lbr_read_32(cpuc); 301 intel_pmu_lbr_read_32(cpuc);
194 else 302 else
195 intel_pmu_lbr_read_64(cpuc); 303 intel_pmu_lbr_read_64(cpuc);
304
305 intel_pmu_lbr_filter(cpuc);
306}
307
308/*
309 * SW filter is used:
310 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations
312 */
313static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{
315 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0;
317
318 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER;
320
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 mask |= X86_BR_KERNEL;
323
324 /* we ignore BRANCH_HV here */
325
326 if (br_type & PERF_SAMPLE_BRANCH_ANY)
327 mask |= X86_BR_ANY;
328
329 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
330 mask |= X86_BR_ANY_CALL;
331
332 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
333 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
334
335 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
336 mask |= X86_BR_IND_CALL;
337 /*
338 * stash actual user request into reg, it may
339 * be used by fixup code for some CPU
340 */
341 event->hw.branch_reg.reg = mask;
342}
343
344/*
345 * setup the HW LBR filter
346 * Used only when available, may not be enough to disambiguate
347 * all branches, may need the help of the SW filter
348 */
349static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
350{
351 struct hw_perf_event_extra *reg;
352 u64 br_type = event->attr.branch_sample_type;
353 u64 mask = 0, m;
354 u64 v;
355
356 for_each_branch_sample_type(m) {
357 if (!(br_type & m))
358 continue;
359
360 v = x86_pmu.lbr_sel_map[m];
361 if (v == LBR_NOT_SUPP)
362 return -EOPNOTSUPP;
363
364 if (v != LBR_IGN)
365 mask |= v;
366 }
367 reg = &event->hw.branch_reg;
368 reg->idx = EXTRA_REG_LBR;
369
370 /* LBR_SELECT operates in suppress mode so invert mask */
371 reg->config = ~mask & x86_pmu.lbr_sel_mask;
372
373 return 0;
374}
375
376int intel_pmu_setup_lbr_filter(struct perf_event *event)
377{
378 int ret = 0;
379
380 /*
381 * no LBR on this PMU
382 */
383 if (!x86_pmu.lbr_nr)
384 return -EOPNOTSUPP;
385
386 /*
387 * setup SW LBR filter
388 */
389 intel_pmu_setup_sw_lbr_filter(event);
390
391 /*
392 * setup HW LBR filter, if any
393 */
394 if (x86_pmu.lbr_sel_map)
395 ret = intel_pmu_setup_hw_lbr_filter(event);
396
397 return ret;
196} 398}
197 399
400/*
401 * return the type of control flow change at address "from"
402 * intruction is not necessarily a branch (in case of interrupt).
403 *
404 * The branch type returned also includes the priv level of the
405 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
406 *
407 * If a branch type is unknown OR the instruction cannot be
408 * decoded (e.g., text page not present), then X86_BR_NONE is
409 * returned.
410 */
411static int branch_type(unsigned long from, unsigned long to)
412{
413 struct insn insn;
414 void *addr;
415 int bytes, size = MAX_INSN_SIZE;
416 int ret = X86_BR_NONE;
417 int ext, to_plm, from_plm;
418 u8 buf[MAX_INSN_SIZE];
419 int is64 = 0;
420
421 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
422 from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
423
424 /*
425 * maybe zero if lbr did not fill up after a reset by the time
426 * we get a PMU interrupt
427 */
428 if (from == 0 || to == 0)
429 return X86_BR_NONE;
430
431 if (from_plm == X86_BR_USER) {
432 /*
433 * can happen if measuring at the user level only
434 * and we interrupt in a kernel thread, e.g., idle.
435 */
436 if (!current->mm)
437 return X86_BR_NONE;
438
439 /* may fail if text not present */
440 bytes = copy_from_user_nmi(buf, (void __user *)from, size);
441 if (bytes != size)
442 return X86_BR_NONE;
443
444 addr = buf;
445 } else
446 addr = (void *)from;
447
448 /*
449 * decoder needs to know the ABI especially
450 * on 64-bit systems running 32-bit apps
451 */
452#ifdef CONFIG_X86_64
453 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
454#endif
455 insn_init(&insn, addr, is64);
456 insn_get_opcode(&insn);
457
458 switch (insn.opcode.bytes[0]) {
459 case 0xf:
460 switch (insn.opcode.bytes[1]) {
461 case 0x05: /* syscall */
462 case 0x34: /* sysenter */
463 ret = X86_BR_SYSCALL;
464 break;
465 case 0x07: /* sysret */
466 case 0x35: /* sysexit */
467 ret = X86_BR_SYSRET;
468 break;
469 case 0x80 ... 0x8f: /* conditional */
470 ret = X86_BR_JCC;
471 break;
472 default:
473 ret = X86_BR_NONE;
474 }
475 break;
476 case 0x70 ... 0x7f: /* conditional */
477 ret = X86_BR_JCC;
478 break;
479 case 0xc2: /* near ret */
480 case 0xc3: /* near ret */
481 case 0xca: /* far ret */
482 case 0xcb: /* far ret */
483 ret = X86_BR_RET;
484 break;
485 case 0xcf: /* iret */
486 ret = X86_BR_IRET;
487 break;
488 case 0xcc ... 0xce: /* int */
489 ret = X86_BR_INT;
490 break;
491 case 0xe8: /* call near rel */
492 case 0x9a: /* call far absolute */
493 ret = X86_BR_CALL;
494 break;
495 case 0xe0 ... 0xe3: /* loop jmp */
496 ret = X86_BR_JCC;
497 break;
498 case 0xe9 ... 0xeb: /* jmp */
499 ret = X86_BR_JMP;
500 break;
501 case 0xff: /* call near absolute, call far absolute ind */
502 insn_get_modrm(&insn);
503 ext = (insn.modrm.bytes[0] >> 3) & 0x7;
504 switch (ext) {
505 case 2: /* near ind call */
506 case 3: /* far ind call */
507 ret = X86_BR_IND_CALL;
508 break;
509 case 4:
510 case 5:
511 ret = X86_BR_JMP;
512 break;
513 }
514 break;
515 default:
516 ret = X86_BR_NONE;
517 }
518 /*
519 * interrupts, traps, faults (and thus ring transition) may
520 * occur on any instructions. Thus, to classify them correctly,
521 * we need to first look at the from and to priv levels. If they
522 * are different and to is in the kernel, then it indicates
523 * a ring transition. If the from instruction is not a ring
524 * transition instr (syscall, systenter, int), then it means
525 * it was a irq, trap or fault.
526 *
527 * we have no way of detecting kernel to kernel faults.
528 */
529 if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
530 && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
531 ret = X86_BR_IRQ;
532
533 /*
534 * branch priv level determined by target as
535 * is done by HW when LBR_SELECT is implemented
536 */
537 if (ret != X86_BR_NONE)
538 ret |= to_plm;
539
540 return ret;
541}
542
543/*
544 * implement actual branch filter based on user demand.
545 * Hardware may not exactly satisfy that request, thus
546 * we need to inspect opcodes. Mismatched branches are
547 * discarded. Therefore, the number of branches returned
548 * in PERF_SAMPLE_BRANCH_STACK sample may vary.
549 */
550static void
551intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
552{
553 u64 from, to;
554 int br_sel = cpuc->br_sel;
555 int i, j, type;
556 bool compress = false;
557
558 /* if sampling all branches, then nothing to filter */
559 if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
560 return;
561
562 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
563
564 from = cpuc->lbr_entries[i].from;
565 to = cpuc->lbr_entries[i].to;
566
567 type = branch_type(from, to);
568
569 /* if type does not correspond, then discard */
570 if (type == X86_BR_NONE || (br_sel & type) != type) {
571 cpuc->lbr_entries[i].from = 0;
572 compress = true;
573 }
574 }
575
576 if (!compress)
577 return;
578
579 /* remove all entries with from=0 */
580 for (i = 0; i < cpuc->lbr_stack.nr; ) {
581 if (!cpuc->lbr_entries[i].from) {
582 j = i;
583 while (++j < cpuc->lbr_stack.nr)
584 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
585 cpuc->lbr_stack.nr--;
586 if (!cpuc->lbr_entries[i].from)
587 continue;
588 }
589 i++;
590 }
591}
592
593/*
594 * Map interface branch filters onto LBR filters
595 */
596static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
597 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
598 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
599 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
600 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
601 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
602 | LBR_IND_JMP | LBR_FAR,
603 /*
604 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
605 */
606 [PERF_SAMPLE_BRANCH_ANY_CALL] =
607 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
608 /*
609 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
610 */
611 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
612};
613
614static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
615 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
616 [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
617 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
618 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
619 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
620 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
621 | LBR_FAR,
622 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
623};
624
625/* core */
198void intel_pmu_lbr_init_core(void) 626void intel_pmu_lbr_init_core(void)
199{ 627{
200 x86_pmu.lbr_nr = 4; 628 x86_pmu.lbr_nr = 4;
201 x86_pmu.lbr_tos = 0x01c9; 629 x86_pmu.lbr_tos = MSR_LBR_TOS;
202 x86_pmu.lbr_from = 0x40; 630 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
203 x86_pmu.lbr_to = 0x60; 631 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
632
633 /*
634 * SW branch filter usage:
635 * - compensate for lack of HW filter
636 */
637 pr_cont("4-deep LBR, ");
204} 638}
205 639
640/* nehalem/westmere */
206void intel_pmu_lbr_init_nhm(void) 641void intel_pmu_lbr_init_nhm(void)
207{ 642{
208 x86_pmu.lbr_nr = 16; 643 x86_pmu.lbr_nr = 16;
209 x86_pmu.lbr_tos = 0x01c9; 644 x86_pmu.lbr_tos = MSR_LBR_TOS;
210 x86_pmu.lbr_from = 0x680; 645 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
211 x86_pmu.lbr_to = 0x6c0; 646 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
647
648 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
649 x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
650
651 /*
652 * SW branch filter usage:
653 * - workaround LBR_SEL errata (see above)
654 * - support syscall, sysret capture.
655 * That requires LBR_FAR but that means far
656 * jmp need to be filtered out
657 */
658 pr_cont("16-deep LBR, ");
659}
660
661/* sandy bridge */
662void intel_pmu_lbr_init_snb(void)
663{
664 x86_pmu.lbr_nr = 16;
665 x86_pmu.lbr_tos = MSR_LBR_TOS;
666 x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
667 x86_pmu.lbr_to = MSR_LBR_NHM_TO;
668
669 x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
670 x86_pmu.lbr_sel_map = snb_lbr_sel_map;
671
672 /*
673 * SW branch filter usage:
674 * - support syscall, sysret capture.
675 * That requires LBR_FAR but that means far
676 * jmp need to be filtered out
677 */
678 pr_cont("16-deep LBR, ");
212} 679}
213 680
681/* atom */
214void intel_pmu_lbr_init_atom(void) 682void intel_pmu_lbr_init_atom(void)
215{ 683{
684 /*
685 * only models starting at stepping 10 seems
686 * to have an operational LBR which can freeze
687 * on PMU interrupt
688 */
689 if (boot_cpu_data.x86_mask < 10) {
690 pr_cont("LBR disabled due to erratum");
691 return;
692 }
693
216 x86_pmu.lbr_nr = 8; 694 x86_pmu.lbr_nr = 8;
217 x86_pmu.lbr_tos = 0x01c9; 695 x86_pmu.lbr_tos = MSR_LBR_TOS;
218 x86_pmu.lbr_from = 0x40; 696 x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
219 x86_pmu.lbr_to = 0x60; 697 x86_pmu.lbr_to = MSR_LBR_CORE_TO;
698
699 /*
700 * SW branch filter usage:
701 * - compensate for lack of HW filter
702 */
703 pr_cont("8-deep LBR, ");
220} 704}
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 000000000000..3230b68ef29a
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
1#ifndef __X86_KERNEL_KPROBES_COMMON_H
2#define __X86_KERNEL_KPROBES_COMMON_H
3
4/* Kprobes and Optprobes common header */
5
6#ifdef CONFIG_X86_64
7#define SAVE_REGS_STRING \
8 /* Skip cs, ip, orig_ax. */ \
9 " subq $24, %rsp\n" \
10 " pushq %rdi\n" \
11 " pushq %rsi\n" \
12 " pushq %rdx\n" \
13 " pushq %rcx\n" \
14 " pushq %rax\n" \
15 " pushq %r8\n" \
16 " pushq %r9\n" \
17 " pushq %r10\n" \
18 " pushq %r11\n" \
19 " pushq %rbx\n" \
20 " pushq %rbp\n" \
21 " pushq %r12\n" \
22 " pushq %r13\n" \
23 " pushq %r14\n" \
24 " pushq %r15\n"
25#define RESTORE_REGS_STRING \
26 " popq %r15\n" \
27 " popq %r14\n" \
28 " popq %r13\n" \
29 " popq %r12\n" \
30 " popq %rbp\n" \
31 " popq %rbx\n" \
32 " popq %r11\n" \
33 " popq %r10\n" \
34 " popq %r9\n" \
35 " popq %r8\n" \
36 " popq %rax\n" \
37 " popq %rcx\n" \
38 " popq %rdx\n" \
39 " popq %rsi\n" \
40 " popq %rdi\n" \
41 /* Skip orig_ax, ip, cs */ \
42 " addq $24, %rsp\n"
43#else
44#define SAVE_REGS_STRING \
45 /* Skip cs, ip, orig_ax and gs. */ \
46 " subl $16, %esp\n" \
47 " pushl %fs\n" \
48 " pushl %es\n" \
49 " pushl %ds\n" \
50 " pushl %eax\n" \
51 " pushl %ebp\n" \
52 " pushl %edi\n" \
53 " pushl %esi\n" \
54 " pushl %edx\n" \
55 " pushl %ecx\n" \
56 " pushl %ebx\n"
57#define RESTORE_REGS_STRING \
58 " popl %ebx\n" \
59 " popl %ecx\n" \
60 " popl %edx\n" \
61 " popl %esi\n" \
62 " popl %edi\n" \
63 " popl %ebp\n" \
64 " popl %eax\n" \
65 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
66 " addl $24, %esp\n"
67#endif
68
69/* Ensure if the instruction can be boostable */
70extern int can_boost(kprobe_opcode_t *instruction);
71/* Recover instruction if given address is probed */
72extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
73 unsigned long addr);
74/*
75 * Copy an instruction and adjust the displacement if the instruction
76 * uses the %rip-relative addressing mode.
77 */
78extern int __copy_instruction(u8 *dest, u8 *src);
79
80/* Generate a relative-jump/call instruction */
81extern void synthesize_reljump(void *from, void *to);
82extern void synthesize_relcall(void *from, void *to);
83
84#ifdef CONFIG_OPTPROBES
85extern int arch_init_optprobes(void);
86extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
87extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
88#else /* !CONFIG_OPTPROBES */
89static inline int arch_init_optprobes(void)
90{
91 return 0;
92}
93static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
94{
95 return 0;
96}
97static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
98{
99 return addr;
100}
101#endif
102#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 000000000000..c5e410eed403
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
1/*
2 * Kernel Probes Jump Optimization (Optprobes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004
19 * Copyright (C) Hitachi Ltd., 2012
20 */
21#include <linux/kprobes.h>
22#include <linux/ptrace.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25#include <linux/hardirq.h>
26#include <linux/preempt.h>
27#include <linux/module.h>
28#include <linux/kdebug.h>
29#include <linux/kallsyms.h>
30#include <linux/ftrace.h>
31
32#include <asm/cacheflush.h>
33#include <asm/desc.h>
34#include <asm/pgtable.h>
35#include <asm/uaccess.h>
36#include <asm/alternative.h>
37#include <asm/insn.h>
38#include <asm/debugreg.h>
39
40#include "kprobes-common.h"
41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{
44 struct optimized_kprobe *op;
45 struct kprobe *kp;
46 long offs;
47 int i;
48
49 for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
50 kp = get_kprobe((void *)addr - i);
51 /* This function only handles jump-optimized kprobe */
52 if (kp && kprobe_optimized(kp)) {
53 op = container_of(kp, struct optimized_kprobe, kp);
54 /* If op->list is not empty, op is under optimizing */
55 if (list_empty(&op->list))
56 goto found;
57 }
58 }
59
60 return addr;
61found:
62 /*
63 * If the kprobe can be optimized, original bytes which can be
64 * overwritten by jump destination address. In this case, original
65 * bytes must be recovered from op->optinsn.copied_insn buffer.
66 */
67 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
68 if (addr == (unsigned long)kp->addr) {
69 buf[0] = kp->opcode;
70 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
71 } else {
72 offs = addr - (unsigned long)kp->addr - 1;
73 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
74 }
75
76 return (unsigned long)buf;
77}
78
79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
81{
82#ifdef CONFIG_X86_64
83 *addr++ = 0x48;
84 *addr++ = 0xbf;
85#else
86 *addr++ = 0xb8;
87#endif
88 *(unsigned long *)addr = val;
89}
90
91static void __used __kprobes kprobes_optinsn_template_holder(void)
92{
93 asm volatile (
94 ".global optprobe_template_entry\n"
95 "optprobe_template_entry:\n"
96#ifdef CONFIG_X86_64
97 /* We don't bother saving the ss register */
98 " pushq %rsp\n"
99 " pushfq\n"
100 SAVE_REGS_STRING
101 " movq %rsp, %rsi\n"
102 ".global optprobe_template_val\n"
103 "optprobe_template_val:\n"
104 ASM_NOP5
105 ASM_NOP5
106 ".global optprobe_template_call\n"
107 "optprobe_template_call:\n"
108 ASM_NOP5
109 /* Move flags to rsp */
110 " movq 144(%rsp), %rdx\n"
111 " movq %rdx, 152(%rsp)\n"
112 RESTORE_REGS_STRING
113 /* Skip flags entry */
114 " addq $8, %rsp\n"
115 " popfq\n"
116#else /* CONFIG_X86_32 */
117 " pushf\n"
118 SAVE_REGS_STRING
119 " movl %esp, %edx\n"
120 ".global optprobe_template_val\n"
121 "optprobe_template_val:\n"
122 ASM_NOP5
123 ".global optprobe_template_call\n"
124 "optprobe_template_call:\n"
125 ASM_NOP5
126 RESTORE_REGS_STRING
127 " addl $4, %esp\n" /* skip cs */
128 " popf\n"
129#endif
130 ".global optprobe_template_end\n"
131 "optprobe_template_end:\n");
132}
133
134#define TMPL_MOVE_IDX \
135 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
136#define TMPL_CALL_IDX \
137 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
138#define TMPL_END_IDX \
139 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
140
141#define INT3_SIZE sizeof(kprobe_opcode_t)
142
143/* Optimized kprobe call back function: called from optinsn */
144static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
145{
146 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
147 unsigned long flags;
148
149 /* This is possible if op is under delayed unoptimizing */
150 if (kprobe_disabled(&op->kp))
151 return;
152
153 local_irq_save(flags);
154 if (kprobe_running()) {
155 kprobes_inc_nmissed_count(&op->kp);
156 } else {
157 /* Save skipped registers */
158#ifdef CONFIG_X86_64
159 regs->cs = __KERNEL_CS;
160#else
161 regs->cs = __KERNEL_CS | get_kernel_rpl();
162 regs->gs = 0;
163#endif
164 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
165 regs->orig_ax = ~0UL;
166
167 __this_cpu_write(current_kprobe, &op->kp);
168 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
169 opt_pre_handler(&op->kp, regs);
170 __this_cpu_write(current_kprobe, NULL);
171 }
172 local_irq_restore(flags);
173}
174
175static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
176{
177 int len = 0, ret;
178
179 while (len < RELATIVEJUMP_SIZE) {
180 ret = __copy_instruction(dest + len, src + len);
181 if (!ret || !can_boost(dest + len))
182 return -EINVAL;
183 len += ret;
184 }
185 /* Check whether the address range is reserved */
186 if (ftrace_text_reserved(src, src + len - 1) ||
187 alternatives_text_reserved(src, src + len - 1) ||
188 jump_label_text_reserved(src, src + len - 1))
189 return -EBUSY;
190
191 return len;
192}
193
194/* Check whether insn is indirect jump */
195static int __kprobes insn_is_indirect_jump(struct insn *insn)
196{
197 return ((insn->opcode.bytes[0] == 0xff &&
198 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
199 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
200}
201
202/* Check whether insn jumps into specified address range */
203static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
204{
205 unsigned long target = 0;
206
207 switch (insn->opcode.bytes[0]) {
208 case 0xe0: /* loopne */
209 case 0xe1: /* loope */
210 case 0xe2: /* loop */
211 case 0xe3: /* jcxz */
212 case 0xe9: /* near relative jump */
213 case 0xeb: /* short relative jump */
214 break;
215 case 0x0f:
216 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
217 break;
218 return 0;
219 default:
220 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
221 break;
222 return 0;
223 }
224 target = (unsigned long)insn->next_byte + insn->immediate.value;
225
226 return (start <= target && target <= start + len);
227}
228
229/* Decode whole function to ensure any instructions don't jump into target */
230static int __kprobes can_optimize(unsigned long paddr)
231{
232 unsigned long addr, size = 0, offset = 0;
233 struct insn insn;
234 kprobe_opcode_t buf[MAX_INSN_SIZE];
235
236 /* Lookup symbol including addr */
237 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
238 return 0;
239
240 /*
241 * Do not optimize in the entry code due to the unstable
242 * stack handling.
243 */
244 if ((paddr >= (unsigned long)__entry_text_start) &&
245 (paddr < (unsigned long)__entry_text_end))
246 return 0;
247
248 /* Check there is enough space for a relative jump. */
249 if (size - offset < RELATIVEJUMP_SIZE)
250 return 0;
251
252 /* Decode instructions */
253 addr = paddr - offset;
254 while (addr < paddr - offset + size) { /* Decode until function end */
255 if (search_exception_tables(addr))
256 /*
257 * Since some fixup code will jumps into this function,
258 * we can't optimize kprobe in this function.
259 */
260 return 0;
261 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
262 insn_get_length(&insn);
263 /* Another subsystem puts a breakpoint */
264 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
265 return 0;
266 /* Recover address */
267 insn.kaddr = (void *)addr;
268 insn.next_byte = (void *)(addr + insn.length);
269 /* Check any instructions don't jump into target */
270 if (insn_is_indirect_jump(&insn) ||
271 insn_jump_into_range(&insn, paddr + INT3_SIZE,
272 RELATIVE_ADDR_SIZE))
273 return 0;
274 addr += insn.length;
275 }
276
277 return 1;
278}
279
280/* Check optimized_kprobe can actually be optimized. */
281int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
282{
283 int i;
284 struct kprobe *p;
285
286 for (i = 1; i < op->optinsn.size; i++) {
287 p = get_kprobe(op->kp.addr + i);
288 if (p && !kprobe_disabled(p))
289 return -EEXIST;
290 }
291
292 return 0;
293}
294
295/* Check the addr is within the optimized instructions. */
296int __kprobes
297arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
298{
299 return ((unsigned long)op->kp.addr <= addr &&
300 (unsigned long)op->kp.addr + op->optinsn.size > addr);
301}
302
303/* Free optimized instruction slot */
304static __kprobes
305void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
306{
307 if (op->optinsn.insn) {
308 free_optinsn_slot(op->optinsn.insn, dirty);
309 op->optinsn.insn = NULL;
310 op->optinsn.size = 0;
311 }
312}
313
314void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
315{
316 __arch_remove_optimized_kprobe(op, 1);
317}
318
319/*
320 * Copy replacing target instructions
321 * Target instructions MUST be relocatable (checked inside)
322 * This is called when new aggr(opt)probe is allocated or reused.
323 */
324int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
325{
326 u8 *buf;
327 int ret;
328 long rel;
329
330 if (!can_optimize((unsigned long)op->kp.addr))
331 return -EILSEQ;
332
333 op->optinsn.insn = get_optinsn_slot();
334 if (!op->optinsn.insn)
335 return -ENOMEM;
336
337 /*
338 * Verify if the address gap is in 2GB range, because this uses
339 * a relative jump.
340 */
341 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
342 if (abs(rel) > 0x7fffffff)
343 return -ERANGE;
344
345 buf = (u8 *)op->optinsn.insn;
346
347 /* Copy instructions into the out-of-line buffer */
348 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
349 if (ret < 0) {
350 __arch_remove_optimized_kprobe(op, 0);
351 return ret;
352 }
353 op->optinsn.size = ret;
354
355 /* Copy arch-dep-instance from template */
356 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
357
358 /* Set probe information */
359 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
360
361 /* Set probe function call */
362 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
363
364 /* Set returning jmp instruction at the tail of out-of-line buffer */
365 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
366 (u8 *)op->kp.addr + op->optinsn.size);
367
368 flush_icache_range((unsigned long) buf,
369 (unsigned long) buf + TMPL_END_IDX +
370 op->optinsn.size + RELATIVEJUMP_SIZE);
371 return 0;
372}
373
374#define MAX_OPTIMIZE_PROBES 256
375static struct text_poke_param *jump_poke_params;
376static struct jump_poke_buffer {
377 u8 buf[RELATIVEJUMP_SIZE];
378} *jump_poke_bufs;
379
380static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
381 u8 *insn_buf,
382 struct optimized_kprobe *op)
383{
384 s32 rel = (s32)((long)op->optinsn.insn -
385 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
386
387 /* Backup instructions which will be replaced by jump address */
388 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
389 RELATIVE_ADDR_SIZE);
390
391 insn_buf[0] = RELATIVEJUMP_OPCODE;
392 *(s32 *)(&insn_buf[1]) = rel;
393
394 tprm->addr = op->kp.addr;
395 tprm->opcode = insn_buf;
396 tprm->len = RELATIVEJUMP_SIZE;
397}
398
399/*
400 * Replace breakpoints (int3) with relative jumps.
401 * Caller must call with locking kprobe_mutex and text_mutex.
402 */
403void __kprobes arch_optimize_kprobes(struct list_head *oplist)
404{
405 struct optimized_kprobe *op, *tmp;
406 int c = 0;
407
408 list_for_each_entry_safe(op, tmp, oplist, list) {
409 WARN_ON(kprobe_disabled(&op->kp));
410 /* Setup param */
411 setup_optimize_kprobe(&jump_poke_params[c],
412 jump_poke_bufs[c].buf, op);
413 list_del_init(&op->list);
414 if (++c >= MAX_OPTIMIZE_PROBES)
415 break;
416 }
417
418 /*
419 * text_poke_smp doesn't support NMI/MCE code modifying.
420 * However, since kprobes itself also doesn't support NMI/MCE
421 * code probing, it's not a problem.
422 */
423 text_poke_smp_batch(jump_poke_params, c);
424}
425
426static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
427 u8 *insn_buf,
428 struct optimized_kprobe *op)
429{
430 /* Set int3 to first byte for kprobes */
431 insn_buf[0] = BREAKPOINT_INSTRUCTION;
432 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
433
434 tprm->addr = op->kp.addr;
435 tprm->opcode = insn_buf;
436 tprm->len = RELATIVEJUMP_SIZE;
437}
438
439/*
440 * Recover original instructions and breakpoints from relative jumps.
441 * Caller must call with locking kprobe_mutex.
442 */
443extern void arch_unoptimize_kprobes(struct list_head *oplist,
444 struct list_head *done_list)
445{
446 struct optimized_kprobe *op, *tmp;
447 int c = 0;
448
449 list_for_each_entry_safe(op, tmp, oplist, list) {
450 /* Setup param */
451 setup_unoptimize_kprobe(&jump_poke_params[c],
452 jump_poke_bufs[c].buf, op);
453 list_move(&op->list, done_list);
454 if (++c >= MAX_OPTIMIZE_PROBES)
455 break;
456 }
457
458 /*
459 * text_poke_smp doesn't support NMI/MCE code modifying.
460 * However, since kprobes itself also doesn't support NMI/MCE
461 * code probing, it's not a problem.
462 */
463 text_poke_smp_batch(jump_poke_params, c);
464}
465
466/* Replace a relative jump with a breakpoint (int3). */
467void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
468{
469 u8 buf[RELATIVEJUMP_SIZE];
470
471 /* Set int3 to first byte for kprobes */
472 buf[0] = BREAKPOINT_INSTRUCTION;
473 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
474 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
475}
476
477int __kprobes
478setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
479{
480 struct optimized_kprobe *op;
481
482 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
483 /* This kprobe is really able to run optimized path. */
484 op = container_of(p, struct optimized_kprobe, kp);
485 /* Detour through copied instructions */
486 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
487 if (!reenter)
488 reset_current_kprobe();
489 preempt_enable_no_resched();
490 return 1;
491 }
492 return 0;
493}
494
495int __kprobes arch_init_optprobes(void)
496{
497 /* Allocate code buffer and parameter array */
498 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
499 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
500 if (!jump_poke_bufs)
501 return -ENOMEM;
502
503 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
504 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
505 if (!jump_poke_params) {
506 kfree(jump_poke_bufs);
507 jump_poke_bufs = NULL;
508 return -ENOMEM;
509 }
510
511 return 0;
512}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d8b64c..e213fc8408d2 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 30 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
31 * <prasanna@in.ibm.com> added function-return probes. 31 * <prasanna@in.ibm.com> added function-return probes.
32 * 2005-May Rusty Lynch <rusty.lynch@intel.com> 32 * 2005-May Rusty Lynch <rusty.lynch@intel.com>
33 * Added function return probes functionality 33 * Added function return probes functionality
34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added 34 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
35 * kprobe-booster and kretprobe-booster for i386. 35 * kprobe-booster and kretprobe-booster for i386.
36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster 36 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
37 * and kretprobe-booster for x86-64 37 * and kretprobe-booster for x86-64
38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven 38 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> 39 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
40 * unified x86 kprobes code. 40 * unified x86 kprobes code.
41 */ 41 */
42
43#include <linux/kprobes.h> 42#include <linux/kprobes.h>
44#include <linux/ptrace.h> 43#include <linux/ptrace.h>
45#include <linux/string.h> 44#include <linux/string.h>
@@ -59,6 +58,8 @@
59#include <asm/insn.h> 58#include <asm/insn.h>
60#include <asm/debugreg.h> 59#include <asm/debugreg.h>
61 60
61#include "kprobes-common.h"
62
62void jprobe_return_end(void); 63void jprobe_return_end(void);
63 64
64DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 65DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
108 doesn't switch kernel stack.*/ 109 doesn't switch kernel stack.*/
109 {NULL, NULL} /* Terminator */ 110 {NULL, NULL} /* Terminator */
110}; 111};
112
111const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 113const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
112 114
113static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) 115static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
123} 125}
124 126
125/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 127/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
126static void __kprobes synthesize_reljump(void *from, void *to) 128void __kprobes synthesize_reljump(void *from, void *to)
127{ 129{
128 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); 130 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
129} 131}
130 132
133/* Insert a call instruction at address 'from', which calls address 'to'.*/
134void __kprobes synthesize_relcall(void *from, void *to)
135{
136 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
137}
138
131/* 139/*
132 * Skip the prefixes of the instruction. 140 * Skip the prefixes of the instruction.
133 */ 141 */
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
151 * Returns non-zero if opcode is boostable. 159 * Returns non-zero if opcode is boostable.
152 * RIP relative instructions are adjusted at copying time in 64 bits mode 160 * RIP relative instructions are adjusted at copying time in 64 bits mode
153 */ 161 */
154static int __kprobes can_boost(kprobe_opcode_t *opcodes) 162int __kprobes can_boost(kprobe_opcode_t *opcodes)
155{ 163{
156 kprobe_opcode_t opcode; 164 kprobe_opcode_t opcode;
157 kprobe_opcode_t *orig_opcodes = opcodes; 165 kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
207 } 215 }
208} 216}
209 217
210/* Recover the probed instruction at addr for further analysis. */ 218static unsigned long
211static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) 219__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
212{ 220{
213 struct kprobe *kp; 221 struct kprobe *kp;
222
214 kp = get_kprobe((void *)addr); 223 kp = get_kprobe((void *)addr);
224 /* There is no probe, return original address */
215 if (!kp) 225 if (!kp)
216 return -EINVAL; 226 return addr;
217 227
218 /* 228 /*
219 * Basically, kp->ainsn.insn has an original instruction. 229 * Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 */ 240 */
231 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 241 memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
232 buf[0] = kp->opcode; 242 buf[0] = kp->opcode;
233 return 0; 243 return (unsigned long)buf;
244}
245
246/*
247 * Recover the probed instruction at addr for further analysis.
248 * Caller must lock kprobes by kprobe_mutex, or disable preemption
249 * for preventing to release referencing kprobes.
250 */
251unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
252{
253 unsigned long __addr;
254
255 __addr = __recover_optprobed_insn(buf, addr);
256 if (__addr != addr)
257 return __addr;
258
259 return __recover_probed_insn(buf, addr);
234} 260}
235 261
236/* Check if paddr is at an instruction boundary */ 262/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 263static int __kprobes can_probe(unsigned long paddr)
238{ 264{
239 int ret; 265 unsigned long addr, __addr, offset = 0;
240 unsigned long addr, offset = 0;
241 struct insn insn; 266 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 267 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 268
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
247 /* Decode instructions */ 272 /* Decode instructions */
248 addr = paddr - offset; 273 addr = paddr - offset;
249 while (addr < paddr) { 274 while (addr < paddr) {
250 kernel_insn_init(&insn, (void *)addr);
251 insn_get_opcode(&insn);
252
253 /* 275 /*
254 * Check if the instruction has been modified by another 276 * Check if the instruction has been modified by another
255 * kprobe, in which case we replace the breakpoint by the 277 * kprobe, in which case we replace the breakpoint by the
256 * original instruction in our buffer. 278 * original instruction in our buffer.
279 * Also, jump optimization will change the breakpoint to
280 * relative-jump. Since the relative-jump itself is
281 * normally used, we just go through if there is no kprobe.
257 */ 282 */
258 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { 283 __addr = recover_probed_instruction(buf, addr);
259 ret = recover_probed_instruction(buf, addr); 284 kernel_insn_init(&insn, (void *)__addr);
260 if (ret)
261 /*
262 * Another debugging subsystem might insert
263 * this breakpoint. In that case, we can't
264 * recover it.
265 */
266 return 0;
267 kernel_insn_init(&insn, buf);
268 }
269 insn_get_length(&insn); 285 insn_get_length(&insn);
286
287 /*
288 * Another debugging subsystem might insert this breakpoint.
289 * In that case, we can't recover it.
290 */
291 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
292 return 0;
270 addr += insn.length; 293 addr += insn.length;
271 } 294 }
272 295
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
299 * If not, return null. 322 * If not, return null.
300 * Only applicable to 64-bit x86. 323 * Only applicable to 64-bit x86.
301 */ 324 */
302static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) 325int __kprobes __copy_instruction(u8 *dest, u8 *src)
303{ 326{
304 struct insn insn; 327 struct insn insn;
305 int ret;
306 kprobe_opcode_t buf[MAX_INSN_SIZE]; 328 kprobe_opcode_t buf[MAX_INSN_SIZE];
307 329
308 kernel_insn_init(&insn, src); 330 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
309 if (recover) {
310 insn_get_opcode(&insn);
311 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
312 ret = recover_probed_instruction(buf,
313 (unsigned long)src);
314 if (ret)
315 return 0;
316 kernel_insn_init(&insn, buf);
317 }
318 }
319 insn_get_length(&insn); 331 insn_get_length(&insn);
332 /* Another subsystem puts a breakpoint, failed to recover */
333 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
334 return 0;
320 memcpy(dest, insn.kaddr, insn.length); 335 memcpy(dest, insn.kaddr, insn.length);
321 336
322#ifdef CONFIG_X86_64 337#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
337 * extension of the original signed 32-bit displacement would 352 * extension of the original signed 32-bit displacement would
338 * have given. 353 * have given.
339 */ 354 */
340 newdisp = (u8 *) src + (s64) insn.displacement.value - 355 newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
341 (u8 *) dest;
342 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ 356 BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
343 disp = (u8 *) dest + insn_offset_displacement(&insn); 357 disp = (u8 *) dest + insn_offset_displacement(&insn);
344 *(s32 *) disp = (s32) newdisp; 358 *(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
349 363
350static void __kprobes arch_copy_kprobe(struct kprobe *p) 364static void __kprobes arch_copy_kprobe(struct kprobe *p)
351{ 365{
366 /* Copy an instruction with recovering if other optprobe modifies it.*/
367 __copy_instruction(p->ainsn.insn, p->addr);
368
352 /* 369 /*
353 * Copy an instruction without recovering int3, because it will be 370 * __copy_instruction can modify the displacement of the instruction,
354 * put by another subsystem. 371 * but it doesn't affect boostable check.
355 */ 372 */
356 __copy_instruction(p->ainsn.insn, p->addr, 0); 373 if (can_boost(p->ainsn.insn))
357
358 if (can_boost(p->addr))
359 p->ainsn.boostable = 0; 374 p->ainsn.boostable = 0;
360 else 375 else
361 p->ainsn.boostable = -1; 376 p->ainsn.boostable = -1;
362 377
363 p->opcode = *p->addr; 378 /* Also, displacement change doesn't affect the first byte */
379 p->opcode = p->ainsn.insn[0];
364} 380}
365 381
366int __kprobes arch_prepare_kprobe(struct kprobe *p) 382int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
442 } 458 }
443} 459}
444 460
445void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 461void __kprobes
446 struct pt_regs *regs) 462arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
447{ 463{
448 unsigned long *sara = stack_addr(regs); 464 unsigned long *sara = stack_addr(regs);
449 465
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
453 *sara = (unsigned long) &kretprobe_trampoline; 469 *sara = (unsigned long) &kretprobe_trampoline;
454} 470}
455 471
456#ifdef CONFIG_OPTPROBES 472static void __kprobes
457static int __kprobes setup_detour_execution(struct kprobe *p, 473setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
458 struct pt_regs *regs,
459 int reenter);
460#else
461#define setup_detour_execution(p, regs, reenter) (0)
462#endif
463
464static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
465 struct kprobe_ctlblk *kcb, int reenter)
466{ 474{
467 if (setup_detour_execution(p, regs, reenter)) 475 if (setup_detour_execution(p, regs, reenter))
468 return; 476 return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
504 * within the handler. We save the original kprobes variables and just single 512 * within the handler. We save the original kprobes variables and just single
505 * step on the instruction of the new probe without calling any user handlers. 513 * step on the instruction of the new probe without calling any user handlers.
506 */ 514 */
507static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, 515static int __kprobes
508 struct kprobe_ctlblk *kcb) 516reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
509{ 517{
510 switch (kcb->kprobe_status) { 518 switch (kcb->kprobe_status) {
511 case KPROBE_HIT_SSDONE: 519 case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
600 return 0; 608 return 0;
601} 609}
602 610
603#ifdef CONFIG_X86_64
604#define SAVE_REGS_STRING \
605 /* Skip cs, ip, orig_ax. */ \
606 " subq $24, %rsp\n" \
607 " pushq %rdi\n" \
608 " pushq %rsi\n" \
609 " pushq %rdx\n" \
610 " pushq %rcx\n" \
611 " pushq %rax\n" \
612 " pushq %r8\n" \
613 " pushq %r9\n" \
614 " pushq %r10\n" \
615 " pushq %r11\n" \
616 " pushq %rbx\n" \
617 " pushq %rbp\n" \
618 " pushq %r12\n" \
619 " pushq %r13\n" \
620 " pushq %r14\n" \
621 " pushq %r15\n"
622#define RESTORE_REGS_STRING \
623 " popq %r15\n" \
624 " popq %r14\n" \
625 " popq %r13\n" \
626 " popq %r12\n" \
627 " popq %rbp\n" \
628 " popq %rbx\n" \
629 " popq %r11\n" \
630 " popq %r10\n" \
631 " popq %r9\n" \
632 " popq %r8\n" \
633 " popq %rax\n" \
634 " popq %rcx\n" \
635 " popq %rdx\n" \
636 " popq %rsi\n" \
637 " popq %rdi\n" \
638 /* Skip orig_ax, ip, cs */ \
639 " addq $24, %rsp\n"
640#else
641#define SAVE_REGS_STRING \
642 /* Skip cs, ip, orig_ax and gs. */ \
643 " subl $16, %esp\n" \
644 " pushl %fs\n" \
645 " pushl %es\n" \
646 " pushl %ds\n" \
647 " pushl %eax\n" \
648 " pushl %ebp\n" \
649 " pushl %edi\n" \
650 " pushl %esi\n" \
651 " pushl %edx\n" \
652 " pushl %ecx\n" \
653 " pushl %ebx\n"
654#define RESTORE_REGS_STRING \
655 " popl %ebx\n" \
656 " popl %ecx\n" \
657 " popl %edx\n" \
658 " popl %esi\n" \
659 " popl %edi\n" \
660 " popl %ebp\n" \
661 " popl %eax\n" \
662 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
663 " addl $24, %esp\n"
664#endif
665
666/* 611/*
667 * When a retprobed function returns, this code saves registers and 612 * When a retprobed function returns, this code saves registers and
668 * calls trampoline_handler() runs, which calls the kretprobe's handler. 613 * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
816 * jump instruction after the copied instruction, that jumps to the next 761 * jump instruction after the copied instruction, that jumps to the next
817 * instruction after the probepoint. 762 * instruction after the probepoint.
818 */ 763 */
819static void __kprobes resume_execution(struct kprobe *p, 764static void __kprobes
820 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 765resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
821{ 766{
822 unsigned long *tos = stack_addr(regs); 767 unsigned long *tos = stack_addr(regs);
823 unsigned long copy_ip = (unsigned long)p->ainsn.insn; 768 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
996/* 941/*
997 * Wrapper routine for handling exceptions. 942 * Wrapper routine for handling exceptions.
998 */ 943 */
999int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 944int __kprobes
1000 unsigned long val, void *data) 945kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
1001{ 946{
1002 struct die_args *args = data; 947 struct die_args *args = data;
1003 int ret = NOTIFY_DONE; 948 int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1107 return 0; 1052 return 0;
1108} 1053}
1109 1054
1110
1111#ifdef CONFIG_OPTPROBES
1112
1113/* Insert a call instruction at address 'from', which calls address 'to'.*/
1114static void __kprobes synthesize_relcall(void *from, void *to)
1115{
1116 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
1117}
1118
1119/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
1120static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1121 unsigned long val)
1122{
1123#ifdef CONFIG_X86_64
1124 *addr++ = 0x48;
1125 *addr++ = 0xbf;
1126#else
1127 *addr++ = 0xb8;
1128#endif
1129 *(unsigned long *)addr = val;
1130}
1131
1132static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{
1134 asm volatile (
1135 ".global optprobe_template_entry\n"
1136 "optprobe_template_entry: \n"
1137#ifdef CONFIG_X86_64
1138 /* We don't bother saving the ss register */
1139 " pushq %rsp\n"
1140 " pushfq\n"
1141 SAVE_REGS_STRING
1142 " movq %rsp, %rsi\n"
1143 ".global optprobe_template_val\n"
1144 "optprobe_template_val: \n"
1145 ASM_NOP5
1146 ASM_NOP5
1147 ".global optprobe_template_call\n"
1148 "optprobe_template_call: \n"
1149 ASM_NOP5
1150 /* Move flags to rsp */
1151 " movq 144(%rsp), %rdx\n"
1152 " movq %rdx, 152(%rsp)\n"
1153 RESTORE_REGS_STRING
1154 /* Skip flags entry */
1155 " addq $8, %rsp\n"
1156 " popfq\n"
1157#else /* CONFIG_X86_32 */
1158 " pushf\n"
1159 SAVE_REGS_STRING
1160 " movl %esp, %edx\n"
1161 ".global optprobe_template_val\n"
1162 "optprobe_template_val: \n"
1163 ASM_NOP5
1164 ".global optprobe_template_call\n"
1165 "optprobe_template_call: \n"
1166 ASM_NOP5
1167 RESTORE_REGS_STRING
1168 " addl $4, %esp\n" /* skip cs */
1169 " popf\n"
1170#endif
1171 ".global optprobe_template_end\n"
1172 "optprobe_template_end: \n");
1173}
1174
1175#define TMPL_MOVE_IDX \
1176 ((long)&optprobe_template_val - (long)&optprobe_template_entry)
1177#define TMPL_CALL_IDX \
1178 ((long)&optprobe_template_call - (long)&optprobe_template_entry)
1179#define TMPL_END_IDX \
1180 ((long)&optprobe_template_end - (long)&optprobe_template_entry)
1181
1182#define INT3_SIZE sizeof(kprobe_opcode_t)
1183
1184/* Optimized kprobe call back function: called from optinsn */
1185static void __kprobes optimized_callback(struct optimized_kprobe *op,
1186 struct pt_regs *regs)
1187{
1188 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1189 unsigned long flags;
1190
1191 /* This is possible if op is under delayed unoptimizing */
1192 if (kprobe_disabled(&op->kp))
1193 return;
1194
1195 local_irq_save(flags);
1196 if (kprobe_running()) {
1197 kprobes_inc_nmissed_count(&op->kp);
1198 } else {
1199 /* Save skipped registers */
1200#ifdef CONFIG_X86_64
1201 regs->cs = __KERNEL_CS;
1202#else
1203 regs->cs = __KERNEL_CS | get_kernel_rpl();
1204 regs->gs = 0;
1205#endif
1206 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1207 regs->orig_ax = ~0UL;
1208
1209 __this_cpu_write(current_kprobe, &op->kp);
1210 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1211 opt_pre_handler(&op->kp, regs);
1212 __this_cpu_write(current_kprobe, NULL);
1213 }
1214 local_irq_restore(flags);
1215}
1216
1217static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1218{
1219 int len = 0, ret;
1220
1221 while (len < RELATIVEJUMP_SIZE) {
1222 ret = __copy_instruction(dest + len, src + len, 1);
1223 if (!ret || !can_boost(dest + len))
1224 return -EINVAL;
1225 len += ret;
1226 }
1227 /* Check whether the address range is reserved */
1228 if (ftrace_text_reserved(src, src + len - 1) ||
1229 alternatives_text_reserved(src, src + len - 1) ||
1230 jump_label_text_reserved(src, src + len - 1))
1231 return -EBUSY;
1232
1233 return len;
1234}
1235
1236/* Check whether insn is indirect jump */
1237static int __kprobes insn_is_indirect_jump(struct insn *insn)
1238{
1239 return ((insn->opcode.bytes[0] == 0xff &&
1240 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
1241 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
1242}
1243
1244/* Check whether insn jumps into specified address range */
1245static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
1246{
1247 unsigned long target = 0;
1248
1249 switch (insn->opcode.bytes[0]) {
1250 case 0xe0: /* loopne */
1251 case 0xe1: /* loope */
1252 case 0xe2: /* loop */
1253 case 0xe3: /* jcxz */
1254 case 0xe9: /* near relative jump */
1255 case 0xeb: /* short relative jump */
1256 break;
1257 case 0x0f:
1258 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
1259 break;
1260 return 0;
1261 default:
1262 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
1263 break;
1264 return 0;
1265 }
1266 target = (unsigned long)insn->next_byte + insn->immediate.value;
1267
1268 return (start <= target && target <= start + len);
1269}
1270
1271/* Decode whole function to ensure any instructions don't jump into target */
1272static int __kprobes can_optimize(unsigned long paddr)
1273{
1274 int ret;
1275 unsigned long addr, size = 0, offset = 0;
1276 struct insn insn;
1277 kprobe_opcode_t buf[MAX_INSN_SIZE];
1278
1279 /* Lookup symbol including addr */
1280 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1281 return 0;
1282
1283 /*
1284 * Do not optimize in the entry code due to the unstable
1285 * stack handling.
1286 */
1287 if ((paddr >= (unsigned long )__entry_text_start) &&
1288 (paddr < (unsigned long )__entry_text_end))
1289 return 0;
1290
1291 /* Check there is enough space for a relative jump. */
1292 if (size - offset < RELATIVEJUMP_SIZE)
1293 return 0;
1294
1295 /* Decode instructions */
1296 addr = paddr - offset;
1297 while (addr < paddr - offset + size) { /* Decode until function end */
1298 if (search_exception_tables(addr))
1299 /*
1300 * Since some fixup code will jumps into this function,
1301 * we can't optimize kprobe in this function.
1302 */
1303 return 0;
1304 kernel_insn_init(&insn, (void *)addr);
1305 insn_get_opcode(&insn);
1306 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
1307 ret = recover_probed_instruction(buf, addr);
1308 if (ret)
1309 return 0;
1310 kernel_insn_init(&insn, buf);
1311 }
1312 insn_get_length(&insn);
1313 /* Recover address */
1314 insn.kaddr = (void *)addr;
1315 insn.next_byte = (void *)(addr + insn.length);
1316 /* Check any instructions don't jump into target */
1317 if (insn_is_indirect_jump(&insn) ||
1318 insn_jump_into_range(&insn, paddr + INT3_SIZE,
1319 RELATIVE_ADDR_SIZE))
1320 return 0;
1321 addr += insn.length;
1322 }
1323
1324 return 1;
1325}
1326
1327/* Check optimized_kprobe can actually be optimized. */
1328int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
1329{
1330 int i;
1331 struct kprobe *p;
1332
1333 for (i = 1; i < op->optinsn.size; i++) {
1334 p = get_kprobe(op->kp.addr + i);
1335 if (p && !kprobe_disabled(p))
1336 return -EEXIST;
1337 }
1338
1339 return 0;
1340}
1341
1342/* Check the addr is within the optimized instructions. */
1343int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
1344 unsigned long addr)
1345{
1346 return ((unsigned long)op->kp.addr <= addr &&
1347 (unsigned long)op->kp.addr + op->optinsn.size > addr);
1348}
1349
1350/* Free optimized instruction slot */
1351static __kprobes
1352void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
1353{
1354 if (op->optinsn.insn) {
1355 free_optinsn_slot(op->optinsn.insn, dirty);
1356 op->optinsn.insn = NULL;
1357 op->optinsn.size = 0;
1358 }
1359}
1360
1361void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
1362{
1363 __arch_remove_optimized_kprobe(op, 1);
1364}
1365
1366/*
1367 * Copy replacing target instructions
1368 * Target instructions MUST be relocatable (checked inside)
1369 */
1370int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1371{
1372 u8 *buf;
1373 int ret;
1374 long rel;
1375
1376 if (!can_optimize((unsigned long)op->kp.addr))
1377 return -EILSEQ;
1378
1379 op->optinsn.insn = get_optinsn_slot();
1380 if (!op->optinsn.insn)
1381 return -ENOMEM;
1382
1383 /*
1384 * Verify if the address gap is in 2GB range, because this uses
1385 * a relative jump.
1386 */
1387 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
1388 if (abs(rel) > 0x7fffffff)
1389 return -ERANGE;
1390
1391 buf = (u8 *)op->optinsn.insn;
1392
1393 /* Copy instructions into the out-of-line buffer */
1394 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
1395 if (ret < 0) {
1396 __arch_remove_optimized_kprobe(op, 0);
1397 return ret;
1398 }
1399 op->optinsn.size = ret;
1400
1401 /* Copy arch-dep-instance from template */
1402 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
1403
1404 /* Set probe information */
1405 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
1406
1407 /* Set probe function call */
1408 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
1409
1410 /* Set returning jmp instruction at the tail of out-of-line buffer */
1411 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
1412 (u8 *)op->kp.addr + op->optinsn.size);
1413
1414 flush_icache_range((unsigned long) buf,
1415 (unsigned long) buf + TMPL_END_IDX +
1416 op->optinsn.size + RELATIVEJUMP_SIZE);
1417 return 0;
1418}
1419
1420#define MAX_OPTIMIZE_PROBES 256
1421static struct text_poke_param *jump_poke_params;
1422static struct jump_poke_buffer {
1423 u8 buf[RELATIVEJUMP_SIZE];
1424} *jump_poke_bufs;
1425
1426static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1427 u8 *insn_buf,
1428 struct optimized_kprobe *op)
1429{
1430 s32 rel = (s32)((long)op->optinsn.insn -
1431 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1432
1433 /* Backup instructions which will be replaced by jump address */
1434 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1435 RELATIVE_ADDR_SIZE);
1436
1437 insn_buf[0] = RELATIVEJUMP_OPCODE;
1438 *(s32 *)(&insn_buf[1]) = rel;
1439
1440 tprm->addr = op->kp.addr;
1441 tprm->opcode = insn_buf;
1442 tprm->len = RELATIVEJUMP_SIZE;
1443}
1444
1445/*
1446 * Replace breakpoints (int3) with relative jumps.
1447 * Caller must call with locking kprobe_mutex and text_mutex.
1448 */
1449void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1450{
1451 struct optimized_kprobe *op, *tmp;
1452 int c = 0;
1453
1454 list_for_each_entry_safe(op, tmp, oplist, list) {
1455 WARN_ON(kprobe_disabled(&op->kp));
1456 /* Setup param */
1457 setup_optimize_kprobe(&jump_poke_params[c],
1458 jump_poke_bufs[c].buf, op);
1459 list_del_init(&op->list);
1460 if (++c >= MAX_OPTIMIZE_PROBES)
1461 break;
1462 }
1463
1464 /*
1465 * text_poke_smp doesn't support NMI/MCE code modifying.
1466 * However, since kprobes itself also doesn't support NMI/MCE
1467 * code probing, it's not a problem.
1468 */
1469 text_poke_smp_batch(jump_poke_params, c);
1470}
1471
1472static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1473 u8 *insn_buf,
1474 struct optimized_kprobe *op)
1475{
1476 /* Set int3 to first byte for kprobes */
1477 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1478 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1479
1480 tprm->addr = op->kp.addr;
1481 tprm->opcode = insn_buf;
1482 tprm->len = RELATIVEJUMP_SIZE;
1483}
1484
1485/*
1486 * Recover original instructions and breakpoints from relative jumps.
1487 * Caller must call with locking kprobe_mutex.
1488 */
1489extern void arch_unoptimize_kprobes(struct list_head *oplist,
1490 struct list_head *done_list)
1491{
1492 struct optimized_kprobe *op, *tmp;
1493 int c = 0;
1494
1495 list_for_each_entry_safe(op, tmp, oplist, list) {
1496 /* Setup param */
1497 setup_unoptimize_kprobe(&jump_poke_params[c],
1498 jump_poke_bufs[c].buf, op);
1499 list_move(&op->list, done_list);
1500 if (++c >= MAX_OPTIMIZE_PROBES)
1501 break;
1502 }
1503
1504 /*
1505 * text_poke_smp doesn't support NMI/MCE code modifying.
1506 * However, since kprobes itself also doesn't support NMI/MCE
1507 * code probing, it's not a problem.
1508 */
1509 text_poke_smp_batch(jump_poke_params, c);
1510}
1511
1512/* Replace a relative jump with a breakpoint (int3). */
1513void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
1514{
1515 u8 buf[RELATIVEJUMP_SIZE];
1516
1517 /* Set int3 to first byte for kprobes */
1518 buf[0] = BREAKPOINT_INSTRUCTION;
1519 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1520 text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
1521}
1522
1523static int __kprobes setup_detour_execution(struct kprobe *p,
1524 struct pt_regs *regs,
1525 int reenter)
1526{
1527 struct optimized_kprobe *op;
1528
1529 if (p->flags & KPROBE_FLAG_OPTIMIZED) {
1530 /* This kprobe is really able to run optimized path. */
1531 op = container_of(p, struct optimized_kprobe, kp);
1532 /* Detour through copied instructions */
1533 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
1534 if (!reenter)
1535 reset_current_kprobe();
1536 preempt_enable_no_resched();
1537 return 1;
1538 }
1539 return 0;
1540}
1541
1542static int __kprobes init_poke_params(void)
1543{
1544 /* Allocate code buffer and parameter array */
1545 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1546 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1547 if (!jump_poke_bufs)
1548 return -ENOMEM;
1549
1550 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1551 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1552 if (!jump_poke_params) {
1553 kfree(jump_poke_bufs);
1554 jump_poke_bufs = NULL;
1555 return -ENOMEM;
1556 }
1557
1558 return 0;
1559}
1560#else /* !CONFIG_OPTPROBES */
1561static int __kprobes init_poke_params(void)
1562{
1563 return 0;
1564}
1565#endif
1566
1567int __init arch_init_kprobes(void) 1055int __init arch_init_kprobes(void)
1568{ 1056{
1569 return init_poke_params(); 1057 return arch_init_optprobes();
1570} 1058}
1571 1059
1572int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1060int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6f176b..694d801bf606 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void)
438static __init int activate_jump_labels(void) 438static __init int activate_jump_labels(void)
439{ 439{
440 if (has_steal_clock) { 440 if (has_steal_clock) {
441 jump_label_inc(&paravirt_steal_enabled); 441 static_key_slow_inc(&paravirt_steal_enabled);
442 if (steal_acc) 442 if (steal_acc)
443 jump_label_inc(&paravirt_steal_rq_enabled); 443 static_key_slow_inc(&paravirt_steal_rq_enabled);
444 } 444 }
445 445
446 return 0; 446 return 0;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e6bc40..ada2f99388dd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr)
202 __native_flush_tlb_single(addr); 202 __native_flush_tlb_single(addr);
203} 203}
204 204
205struct jump_label_key paravirt_steal_enabled; 205struct static_key paravirt_steal_enabled;
206struct jump_label_key paravirt_steal_rq_enabled; 206struct static_key paravirt_steal_rq_enabled;
207 207
208static u64 native_steal_clock(int cpu) 208static u64 native_steal_clock(int cpu)
209{ 209{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af7bfe3..44eefde92109 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -377,8 +377,8 @@ static inline int hlt_use_halt(void)
377void default_idle(void) 377void default_idle(void)
378{ 378{
379 if (hlt_use_halt()) { 379 if (hlt_use_halt()) {
380 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 380 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
381 trace_cpu_idle(1, smp_processor_id()); 381 trace_cpu_idle_rcuidle(1, smp_processor_id());
382 current_thread_info()->status &= ~TS_POLLING; 382 current_thread_info()->status &= ~TS_POLLING;
383 /* 383 /*
384 * TS_POLLING-cleared state must be visible before we 384 * TS_POLLING-cleared state must be visible before we
@@ -391,8 +391,8 @@ void default_idle(void)
391 else 391 else
392 local_irq_enable(); 392 local_irq_enable();
393 current_thread_info()->status |= TS_POLLING; 393 current_thread_info()->status |= TS_POLLING;
394 trace_power_end(smp_processor_id()); 394 trace_power_end_rcuidle(smp_processor_id());
395 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 395 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
396 } else { 396 } else {
397 local_irq_enable(); 397 local_irq_enable();
398 /* loop is done by the caller */ 398 /* loop is done by the caller */
@@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
450static void mwait_idle(void) 450static void mwait_idle(void)
451{ 451{
452 if (!need_resched()) { 452 if (!need_resched()) {
453 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 453 trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
454 trace_cpu_idle(1, smp_processor_id()); 454 trace_cpu_idle_rcuidle(1, smp_processor_id());
455 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) 455 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
456 clflush((void *)&current_thread_info()->flags); 456 clflush((void *)&current_thread_info()->flags);
457 457
@@ -461,8 +461,8 @@ static void mwait_idle(void)
461 __sti_mwait(0, 0); 461 __sti_mwait(0, 0);
462 else 462 else
463 local_irq_enable(); 463 local_irq_enable();
464 trace_power_end(smp_processor_id()); 464 trace_power_end_rcuidle(smp_processor_id());
465 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 465 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
466 } else 466 } else
467 local_irq_enable(); 467 local_irq_enable();
468} 468}
@@ -474,13 +474,13 @@ static void mwait_idle(void)
474 */ 474 */
475static void poll_idle(void) 475static void poll_idle(void)
476{ 476{
477 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 477 trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
478 trace_cpu_idle(0, smp_processor_id()); 478 trace_cpu_idle_rcuidle(0, smp_processor_id());
479 local_irq_enable(); 479 local_irq_enable();
480 while (!need_resched()) 480 while (!need_resched())
481 cpu_relax(); 481 cpu_relax();
482 trace_power_end(smp_processor_id()); 482 trace_power_end_rcuidle(smp_processor_id());
483 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); 483 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
484} 484}
485 485
486/* 486/*
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc07a6b..ea7b4fd34676 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
234} 234}
235 235
236static bool mmu_audit; 236static bool mmu_audit;
237static struct jump_label_key mmu_audit_key; 237static struct static_key mmu_audit_key;
238 238
239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 239static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
240{ 240{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
250 250
251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) 251static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
252{ 252{
253 if (static_branch((&mmu_audit_key))) 253 if (static_key_false((&mmu_audit_key)))
254 __kvm_mmu_audit(vcpu, point); 254 __kvm_mmu_audit(vcpu, point);
255} 255}
256 256
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
259 if (mmu_audit) 259 if (mmu_audit)
260 return; 260 return;
261 261
262 jump_label_inc(&mmu_audit_key); 262 static_key_slow_inc(&mmu_audit_key);
263 mmu_audit = true; 263 mmu_audit = true;
264} 264}
265 265
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
268 if (!mmu_audit) 268 if (!mmu_audit)
269 return; 269 return;
270 270
271 jump_label_dec(&mmu_audit_key); 271 static_key_slow_dec(&mmu_audit_key);
272 mmu_audit = false; 272 mmu_audit = false;
273} 273}
274 274
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fbda6e1..c1f01a8e9f65 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
29 return inat_primary_table[opcode]; 29 return inat_primary_table[opcode];
30} 30}
31 31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, 32int inat_get_last_prefix_id(insn_byte_t last_pfx)
33{
34 insn_attr_t lpfx_attr;
35
36 lpfx_attr = inat_get_opcode_attribute(last_pfx);
37 return inat_last_prefix_id(lpfx_attr);
38}
39
40insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
33 insn_attr_t esc_attr) 41 insn_attr_t esc_attr)
34{ 42{
35 const insn_attr_t *table; 43 const insn_attr_t *table;
36 insn_attr_t lpfx_attr; 44 int n;
37 int n, m = 0;
38 45
39 n = inat_escape_id(esc_attr); 46 n = inat_escape_id(esc_attr);
40 if (last_pfx) { 47
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0]; 48 table = inat_escape_tables[n][0];
45 if (!table) 49 if (!table)
46 return 0; 50 return 0;
47 if (inat_has_variant(table[opcode]) && m) { 51 if (inat_has_variant(table[opcode]) && lpfx_id) {
48 table = inat_escape_tables[n][m]; 52 table = inat_escape_tables[n][lpfx_id];
49 if (!table) 53 if (!table)
50 return 0; 54 return 0;
51 } 55 }
52 return table[opcode]; 56 return table[opcode];
53} 57}
54 58
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, 59insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
56 insn_attr_t grp_attr) 60 insn_attr_t grp_attr)
57{ 61{
58 const insn_attr_t *table; 62 const insn_attr_t *table;
59 insn_attr_t lpfx_attr; 63 int n;
60 int n, m = 0;
61 64
62 n = inat_group_id(grp_attr); 65 n = inat_group_id(grp_attr);
63 if (last_pfx) { 66
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0]; 67 table = inat_group_tables[n][0];
68 if (!table) 68 if (!table)
69 return inat_group_common_attribute(grp_attr); 69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { 70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
71 table = inat_group_tables[n][m]; 71 table = inat_group_tables[n][lpfx_id];
72 if (!table) 72 if (!table)
73 return inat_group_common_attribute(grp_attr); 73 return inat_group_common_attribute(grp_attr);
74 } 74 }
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3e3fbb..25feb1ae71c5 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
185void insn_get_opcode(struct insn *insn) 185void insn_get_opcode(struct insn *insn)
186{ 186{
187 struct insn_field *opcode = &insn->opcode; 187 struct insn_field *opcode = &insn->opcode;
188 insn_byte_t op, pfx; 188 insn_byte_t op;
189 int pfx_id;
189 if (opcode->got) 190 if (opcode->got)
190 return; 191 return;
191 if (!insn->prefixes.got) 192 if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
212 /* Get escaped opcode */ 213 /* Get escaped opcode */
213 op = get_next(insn_byte_t, insn); 214 op = get_next(insn_byte_t, insn);
214 opcode->bytes[opcode->nbytes++] = op; 215 opcode->bytes[opcode->nbytes++] = op;
215 pfx = insn_last_prefix(insn); 216 pfx_id = insn_last_prefix_id(insn);
216 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); 217 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
217 } 218 }
218 if (inat_must_vex(insn->attr)) 219 if (inat_must_vex(insn->attr))
219 insn->attr = 0; /* This instruction is bad */ 220 insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
235void insn_get_modrm(struct insn *insn) 236void insn_get_modrm(struct insn *insn)
236{ 237{
237 struct insn_field *modrm = &insn->modrm; 238 struct insn_field *modrm = &insn->modrm;
238 insn_byte_t pfx, mod; 239 insn_byte_t pfx_id, mod;
239 if (modrm->got) 240 if (modrm->got)
240 return; 241 return;
241 if (!insn->opcode.got) 242 if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
246 modrm->value = mod; 247 modrm->value = mod;
247 modrm->nbytes = 1; 248 modrm->nbytes = 1;
248 if (inat_is_group(insn->attr)) { 249 if (inat_is_group(insn->attr)) {
249 pfx = insn_last_prefix(insn); 250 pfx_id = insn_last_prefix_id(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 251 insn->attr = inat_get_group_attribute(mod, pfx_id,
251 insn->attr); 252 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 253 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */ 254 insn->attr = 0; /* This is bad */
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 59f4261c753a..6588f43017bd 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -94,13 +94,13 @@ int cpuidle_idle_call(void)
94 94
95 target_state = &drv->states[next_state]; 95 target_state = &drv->states[next_state];
96 96
97 trace_power_start(POWER_CSTATE, next_state, dev->cpu); 97 trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
98 trace_cpu_idle(next_state, dev->cpu); 98 trace_cpu_idle_rcuidle(next_state, dev->cpu);
99 99
100 entered_state = target_state->enter(dev, drv, next_state); 100 entered_state = target_state->enter(dev, drv, next_state);
101 101
102 trace_power_end(dev->cpu); 102 trace_power_end_rcuidle(dev->cpu);
103 trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); 103 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
104 104
105 if (entered_state >= 0) { 105 if (entered_state >= 0) {
106 /* Update cpuidle counters */ 106 /* Update cpuidle counters */
diff --git a/fs/exec.c b/fs/exec.c
index 153dee14fe55..b0695a9900ef 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
63#include <trace/events/task.h> 63#include <trace/events/task.h>
64#include "internal.h" 64#include "internal.h"
65 65
66#include <trace/events/sched.h>
67
66int core_uses_pid; 68int core_uses_pid;
67char core_pattern[CORENAME_MAX_SIZE] = "core"; 69char core_pattern[CORENAME_MAX_SIZE] = "core";
68unsigned int core_pipe_limit; 70unsigned int core_pipe_limit;
@@ -1402,9 +1404,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1402 */ 1404 */
1403 bprm->recursion_depth = depth; 1405 bprm->recursion_depth = depth;
1404 if (retval >= 0) { 1406 if (retval >= 0) {
1405 if (depth == 0) 1407 if (depth == 0) {
1406 ptrace_event(PTRACE_EVENT_EXEC, 1408 trace_sched_process_exec(current, old_pid, bprm);
1407 old_pid); 1409 ptrace_event(PTRACE_EVENT_EXEC, old_pid);
1410 }
1408 put_binfmt(fmt); 1411 put_binfmt(fmt);
1409 allow_write_access(bprm->file); 1412 allow_write_access(bprm->file);
1410 if (bprm->file) 1413 if (bprm->file)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 028e26f0bf08..72a6cabb4d5b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
31 31
32typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); 32typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
33 33
34/*
35 * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
36 * set in the flags member.
37 *
38 * ENABLED - set/unset when ftrace_ops is registered/unregistered
39 * GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops
40 * is part of the global tracers sharing the same filter
41 * via set_ftrace_* debugfs files.
42 * DYNAMIC - set when ftrace_ops is registered to denote dynamically
43 * allocated ftrace_ops which need special care
44 * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
45 * could be controled by following calls:
46 * ftrace_function_local_enable
47 * ftrace_function_local_disable
48 */
34enum { 49enum {
35 FTRACE_OPS_FL_ENABLED = 1 << 0, 50 FTRACE_OPS_FL_ENABLED = 1 << 0,
36 FTRACE_OPS_FL_GLOBAL = 1 << 1, 51 FTRACE_OPS_FL_GLOBAL = 1 << 1,
37 FTRACE_OPS_FL_DYNAMIC = 1 << 2, 52 FTRACE_OPS_FL_DYNAMIC = 1 << 2,
53 FTRACE_OPS_FL_CONTROL = 1 << 3,
38}; 54};
39 55
40struct ftrace_ops { 56struct ftrace_ops {
41 ftrace_func_t func; 57 ftrace_func_t func;
42 struct ftrace_ops *next; 58 struct ftrace_ops *next;
43 unsigned long flags; 59 unsigned long flags;
60 int __percpu *disabled;
44#ifdef CONFIG_DYNAMIC_FTRACE 61#ifdef CONFIG_DYNAMIC_FTRACE
45 struct ftrace_hash *notrace_hash; 62 struct ftrace_hash *notrace_hash;
46 struct ftrace_hash *filter_hash; 63 struct ftrace_hash *filter_hash;
@@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops);
97int unregister_ftrace_function(struct ftrace_ops *ops); 114int unregister_ftrace_function(struct ftrace_ops *ops);
98void clear_ftrace_function(void); 115void clear_ftrace_function(void);
99 116
117/**
118 * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu
119 *
120 * This function enables tracing on current cpu by decreasing
121 * the per cpu control variable.
122 * It must be called with preemption disabled and only on ftrace_ops
123 * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
124 * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
125 */
126static inline void ftrace_function_local_enable(struct ftrace_ops *ops)
127{
128 if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
129 return;
130
131 (*this_cpu_ptr(ops->disabled))--;
132}
133
134/**
135 * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu
136 *
137 * This function enables tracing on current cpu by decreasing
138 * the per cpu control variable.
139 * It must be called with preemption disabled and only on ftrace_ops
140 * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
141 * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
142 */
143static inline void ftrace_function_local_disable(struct ftrace_ops *ops)
144{
145 if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
146 return;
147
148 (*this_cpu_ptr(ops->disabled))++;
149}
150
151/**
152 * ftrace_function_local_disabled - returns ftrace_ops disabled value
153 * on current cpu
154 *
155 * This function returns value of ftrace_ops::disabled on current cpu.
156 * It must be called with preemption disabled and only on ftrace_ops
157 * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
158 * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
159 */
160static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
161{
162 WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL));
163 return *this_cpu_ptr(ops->disabled);
164}
165
100extern void ftrace_stub(unsigned long a0, unsigned long a1); 166extern void ftrace_stub(unsigned long a0, unsigned long a1);
101 167
102#else /* !CONFIG_FUNCTION_TRACER */ 168#else /* !CONFIG_FUNCTION_TRACER */
@@ -178,12 +244,13 @@ struct dyn_ftrace {
178}; 244};
179 245
180int ftrace_force_update(void); 246int ftrace_force_update(void);
181void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, 247int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
182 int len, int reset); 248 int len, int reset);
183void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, 249int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
184 int len, int reset); 250 int len, int reset);
185void ftrace_set_global_filter(unsigned char *buf, int len, int reset); 251void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
186void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); 252void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
253void ftrace_free_filter(struct ftrace_ops *ops);
187 254
188int register_ftrace_command(struct ftrace_func_command *cmd); 255int register_ftrace_command(struct ftrace_func_command *cmd);
189int unregister_ftrace_command(struct ftrace_func_command *cmd); 256int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void);
314#else 381#else
315static inline int skip_trace(unsigned long ip) { return 0; } 382static inline int skip_trace(unsigned long ip) { return 0; }
316static inline int ftrace_force_update(void) { return 0; } 383static inline int ftrace_force_update(void) { return 0; }
317static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
318{
319}
320static inline void ftrace_disable_daemon(void) { } 384static inline void ftrace_disable_daemon(void) { }
321static inline void ftrace_enable_daemon(void) { } 385static inline void ftrace_enable_daemon(void) { }
322static inline void ftrace_release_mod(struct module *mod) {} 386static inline void ftrace_release_mod(struct module *mod) {}
@@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end)
340 */ 404 */
341#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) 405#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
342#define ftrace_set_early_filter(ops, buf, enable) do { } while (0) 406#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
407#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
408#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
409#define ftrace_free_filter(ops) do { } while (0)
343 410
344static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, 411static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
345 size_t cnt, loff_t *ppos) { return -ENODEV; } 412 size_t cnt, loff_t *ppos) { return -ENODEV; }
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c3da42dd22ba..dd478fc8f9f5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -146,6 +146,10 @@ enum trace_reg {
146 TRACE_REG_UNREGISTER, 146 TRACE_REG_UNREGISTER,
147 TRACE_REG_PERF_REGISTER, 147 TRACE_REG_PERF_REGISTER,
148 TRACE_REG_PERF_UNREGISTER, 148 TRACE_REG_PERF_UNREGISTER,
149 TRACE_REG_PERF_OPEN,
150 TRACE_REG_PERF_CLOSE,
151 TRACE_REG_PERF_ADD,
152 TRACE_REG_PERF_DEL,
149}; 153};
150 154
151struct ftrace_event_call; 155struct ftrace_event_call;
@@ -157,7 +161,7 @@ struct ftrace_event_class {
157 void *perf_probe; 161 void *perf_probe;
158#endif 162#endif
159 int (*reg)(struct ftrace_event_call *event, 163 int (*reg)(struct ftrace_event_call *event,
160 enum trace_reg type); 164 enum trace_reg type, void *data);
161 int (*define_fields)(struct ftrace_event_call *); 165 int (*define_fields)(struct ftrace_event_call *);
162 struct list_head *(*get_fields)(struct ftrace_event_call *); 166 struct list_head *(*get_fields)(struct ftrace_event_call *);
163 struct list_head fields; 167 struct list_head fields;
@@ -165,7 +169,7 @@ struct ftrace_event_class {
165}; 169};
166 170
167extern int ftrace_event_reg(struct ftrace_event_call *event, 171extern int ftrace_event_reg(struct ftrace_event_call *event,
168 enum trace_reg type); 172 enum trace_reg type, void *data);
169 173
170enum { 174enum {
171 TRACE_EVENT_FL_ENABLED_BIT, 175 TRACE_EVENT_FL_ENABLED_BIT,
@@ -241,6 +245,7 @@ enum {
241 FILTER_STATIC_STRING, 245 FILTER_STATIC_STRING,
242 FILTER_DYN_STRING, 246 FILTER_DYN_STRING,
243 FILTER_PTR_STRING, 247 FILTER_PTR_STRING,
248 FILTER_TRACE_FN,
244}; 249};
245 250
246#define EVENT_STORAGE_SIZE 128 251#define EVENT_STORAGE_SIZE 128
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a64b00e286f5..3f830e005118 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -20,7 +20,6 @@
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <asm/ptrace.h> 21#include <asm/ptrace.h>
22#include <asm/system.h> 22#include <asm/system.h>
23#include <trace/events/irq.h>
24 23
25/* 24/*
26 * These correspond to the IORESOURCE_IRQ_* defines in 25 * These correspond to the IORESOURCE_IRQ_* defines in
@@ -456,11 +455,7 @@ asmlinkage void do_softirq(void);
456asmlinkage void __do_softirq(void); 455asmlinkage void __do_softirq(void);
457extern void open_softirq(int nr, void (*action)(struct softirq_action *)); 456extern void open_softirq(int nr, void (*action)(struct softirq_action *));
458extern void softirq_init(void); 457extern void softirq_init(void);
459static inline void __raise_softirq_irqoff(unsigned int nr) 458extern void __raise_softirq_irqoff(unsigned int nr);
460{
461 trace_softirq_raise(nr);
462 or_softirq_pending(1UL << nr);
463}
464 459
465extern void raise_softirq_irqoff(unsigned int nr); 460extern void raise_softirq_irqoff(unsigned int nr);
466extern void raise_softirq(unsigned int nr); 461extern void raise_softirq(unsigned int nr);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5ce8b140428f..c513a40510f5 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,22 +1,69 @@
1#ifndef _LINUX_JUMP_LABEL_H 1#ifndef _LINUX_JUMP_LABEL_H
2#define _LINUX_JUMP_LABEL_H 2#define _LINUX_JUMP_LABEL_H
3 3
4/*
5 * Jump label support
6 *
7 * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
8 * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * Jump labels provide an interface to generate dynamic branches using
11 * self-modifying code. Assuming toolchain and architecture support the result
12 * of a "if (static_key_false(&key))" statement is a unconditional branch (which
13 * defaults to false - and the true block is placed out of line).
14 *
15 * However at runtime we can change the branch target using
16 * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
17 * object and for as long as there are references all branches referring to
18 * that particular key will point to the (out of line) true block.
19 *
20 * Since this relies on modifying code the static_key_slow_{inc,dec}() functions
21 * must be considered absolute slow paths (machine wide synchronization etc.).
22 * OTOH, since the affected branches are unconditional their runtime overhead
23 * will be absolutely minimal, esp. in the default (off) case where the total
24 * effect is a single NOP of appropriate size. The on case will patch in a jump
25 * to the out-of-line block.
26 *
27 * When the control is directly exposed to userspace it is prudent to delay the
28 * decrement to avoid high frequency code modifications which can (and do)
29 * cause significant performance degradation. Struct static_key_deferred and
30 * static_key_slow_dec_deferred() provide for this.
31 *
32 * Lacking toolchain and or architecture support, it falls back to a simple
33 * conditional branch.
34 *
35 * struct static_key my_key = STATIC_KEY_INIT_TRUE;
36 *
37 * if (static_key_true(&my_key)) {
38 * }
39 *
40 * will result in the true case being in-line and starts the key with a single
41 * reference. Mixing static_key_true() and static_key_false() on the same key is not
42 * allowed.
43 *
44 * Not initializing the key (static data is initialized to 0s anyway) is the
45 * same as using STATIC_KEY_INIT_FALSE and static_key_false() is
46 * equivalent with static_branch().
47 *
48*/
49
4#include <linux/types.h> 50#include <linux/types.h>
5#include <linux/compiler.h> 51#include <linux/compiler.h>
6#include <linux/workqueue.h> 52#include <linux/workqueue.h>
7 53
8#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 54#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
9 55
10struct jump_label_key { 56struct static_key {
11 atomic_t enabled; 57 atomic_t enabled;
58/* Set lsb bit to 1 if branch is default true, 0 ot */
12 struct jump_entry *entries; 59 struct jump_entry *entries;
13#ifdef CONFIG_MODULES 60#ifdef CONFIG_MODULES
14 struct jump_label_mod *next; 61 struct static_key_mod *next;
15#endif 62#endif
16}; 63};
17 64
18struct jump_label_key_deferred { 65struct static_key_deferred {
19 struct jump_label_key key; 66 struct static_key key;
20 unsigned long timeout; 67 unsigned long timeout;
21 struct delayed_work work; 68 struct delayed_work work;
22}; 69};
@@ -34,13 +81,34 @@ struct module;
34 81
35#ifdef HAVE_JUMP_LABEL 82#ifdef HAVE_JUMP_LABEL
36 83
37#ifdef CONFIG_MODULES 84#define JUMP_LABEL_TRUE_BRANCH 1UL
38#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} 85
39#else 86static
40#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} 87inline struct jump_entry *jump_label_get_entries(struct static_key *key)
41#endif 88{
89 return (struct jump_entry *)((unsigned long)key->entries
90 & ~JUMP_LABEL_TRUE_BRANCH);
91}
42 92
43static __always_inline bool static_branch(struct jump_label_key *key) 93static inline bool jump_label_get_branch_default(struct static_key *key)
94{
95 if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH)
96 return true;
97 return false;
98}
99
100static __always_inline bool static_key_false(struct static_key *key)
101{
102 return arch_static_branch(key);
103}
104
105static __always_inline bool static_key_true(struct static_key *key)
106{
107 return !static_key_false(key);
108}
109
110/* Deprecated. Please use 'static_key_false() instead. */
111static __always_inline bool static_branch(struct static_key *key)
44{ 112{
45 return arch_static_branch(key); 113 return arch_static_branch(key);
46} 114}
@@ -56,21 +124,23 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
56extern void arch_jump_label_transform_static(struct jump_entry *entry, 124extern void arch_jump_label_transform_static(struct jump_entry *entry,
57 enum jump_label_type type); 125 enum jump_label_type type);
58extern int jump_label_text_reserved(void *start, void *end); 126extern int jump_label_text_reserved(void *start, void *end);
59extern void jump_label_inc(struct jump_label_key *key); 127extern void static_key_slow_inc(struct static_key *key);
60extern void jump_label_dec(struct jump_label_key *key); 128extern void static_key_slow_dec(struct static_key *key);
61extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); 129extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
62extern bool jump_label_enabled(struct jump_label_key *key);
63extern void jump_label_apply_nops(struct module *mod); 130extern void jump_label_apply_nops(struct module *mod);
64extern void jump_label_rate_limit(struct jump_label_key_deferred *key, 131extern void
65 unsigned long rl); 132jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
133
134#define STATIC_KEY_INIT_TRUE ((struct static_key) \
135 { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
136#define STATIC_KEY_INIT_FALSE ((struct static_key) \
137 { .enabled = ATOMIC_INIT(0), .entries = (void *)0 })
66 138
67#else /* !HAVE_JUMP_LABEL */ 139#else /* !HAVE_JUMP_LABEL */
68 140
69#include <linux/atomic.h> 141#include <linux/atomic.h>
70 142
71#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} 143struct static_key {
72
73struct jump_label_key {
74 atomic_t enabled; 144 atomic_t enabled;
75}; 145};
76 146
@@ -78,30 +148,45 @@ static __always_inline void jump_label_init(void)
78{ 148{
79} 149}
80 150
81struct jump_label_key_deferred { 151struct static_key_deferred {
82 struct jump_label_key key; 152 struct static_key key;
83}; 153};
84 154
85static __always_inline bool static_branch(struct jump_label_key *key) 155static __always_inline bool static_key_false(struct static_key *key)
156{
157 if (unlikely(atomic_read(&key->enabled)) > 0)
158 return true;
159 return false;
160}
161
162static __always_inline bool static_key_true(struct static_key *key)
86{ 163{
87 if (unlikely(atomic_read(&key->enabled))) 164 if (likely(atomic_read(&key->enabled)) > 0)
88 return true; 165 return true;
89 return false; 166 return false;
90} 167}
91 168
92static inline void jump_label_inc(struct jump_label_key *key) 169/* Deprecated. Please use 'static_key_false() instead. */
170static __always_inline bool static_branch(struct static_key *key)
171{
172 if (unlikely(atomic_read(&key->enabled)) > 0)
173 return true;
174 return false;
175}
176
177static inline void static_key_slow_inc(struct static_key *key)
93{ 178{
94 atomic_inc(&key->enabled); 179 atomic_inc(&key->enabled);
95} 180}
96 181
97static inline void jump_label_dec(struct jump_label_key *key) 182static inline void static_key_slow_dec(struct static_key *key)
98{ 183{
99 atomic_dec(&key->enabled); 184 atomic_dec(&key->enabled);
100} 185}
101 186
102static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) 187static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
103{ 188{
104 jump_label_dec(&key->key); 189 static_key_slow_dec(&key->key);
105} 190}
106 191
107static inline int jump_label_text_reserved(void *start, void *end) 192static inline int jump_label_text_reserved(void *start, void *end)
@@ -112,23 +197,30 @@ static inline int jump_label_text_reserved(void *start, void *end)
112static inline void jump_label_lock(void) {} 197static inline void jump_label_lock(void) {}
113static inline void jump_label_unlock(void) {} 198static inline void jump_label_unlock(void) {}
114 199
115static inline bool jump_label_enabled(struct jump_label_key *key)
116{
117 return !!atomic_read(&key->enabled);
118}
119
120static inline int jump_label_apply_nops(struct module *mod) 200static inline int jump_label_apply_nops(struct module *mod)
121{ 201{
122 return 0; 202 return 0;
123} 203}
124 204
125static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, 205static inline void
206jump_label_rate_limit(struct static_key_deferred *key,
126 unsigned long rl) 207 unsigned long rl)
127{ 208{
128} 209}
210
211#define STATIC_KEY_INIT_TRUE ((struct static_key) \
212 { .enabled = ATOMIC_INIT(1) })
213#define STATIC_KEY_INIT_FALSE ((struct static_key) \
214 { .enabled = ATOMIC_INIT(0) })
215
129#endif /* HAVE_JUMP_LABEL */ 216#endif /* HAVE_JUMP_LABEL */
130 217
131#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) 218#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
132#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) 219#define jump_label_enabled static_key_enabled
220
221static inline bool static_key_enabled(struct static_key *key)
222{
223 return (atomic_read(&key->enabled) > 0);
224}
133 225
134#endif /* _LINUX_JUMP_LABEL_H */ 226#endif /* _LINUX_JUMP_LABEL_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0eac07c95255..7dfaae7846ab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -214,8 +214,8 @@ enum {
214#include <linux/skbuff.h> 214#include <linux/skbuff.h>
215 215
216#ifdef CONFIG_RPS 216#ifdef CONFIG_RPS
217#include <linux/jump_label.h> 217#include <linux/static_key.h>
218extern struct jump_label_key rps_needed; 218extern struct static_key rps_needed;
219#endif 219#endif
220 220
221struct neighbour; 221struct neighbour;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index b809265607d0..29734be334c1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
163extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 163extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
164 164
165#if defined(CONFIG_JUMP_LABEL) 165#if defined(CONFIG_JUMP_LABEL)
166#include <linux/jump_label.h> 166#include <linux/static_key.h>
167extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 167extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
168static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) 168static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
169{ 169{
170 if (__builtin_constant_p(pf) && 170 if (__builtin_constant_p(pf) &&
171 __builtin_constant_p(hook)) 171 __builtin_constant_p(hook))
172 return static_branch(&nf_hooks_needed[pf][hook]); 172 return static_key_false(&nf_hooks_needed[pf][hook]);
173 173
174 return !list_empty(&nf_hooks[pf][hook]); 174 return !list_empty(&nf_hooks[pf][hook]);
175} 175}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index abb2776be1ba..bd9f55a5958d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -129,11 +129,40 @@ enum perf_event_sample_format {
129 PERF_SAMPLE_PERIOD = 1U << 8, 129 PERF_SAMPLE_PERIOD = 1U << 8,
130 PERF_SAMPLE_STREAM_ID = 1U << 9, 130 PERF_SAMPLE_STREAM_ID = 1U << 9,
131 PERF_SAMPLE_RAW = 1U << 10, 131 PERF_SAMPLE_RAW = 1U << 10,
132 PERF_SAMPLE_BRANCH_STACK = 1U << 11,
132 133
133 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ 134 PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
134}; 135};
135 136
136/* 137/*
138 * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
139 *
140 * If the user does not pass priv level information via branch_sample_type,
141 * the kernel uses the event's priv level. Branch and event priv levels do
142 * not have to match. Branch priv level is checked for permissions.
143 *
144 * The branch types can be combined, however BRANCH_ANY covers all types
145 * of branches and therefore it supersedes all the other types.
146 */
147enum perf_branch_sample_type {
148 PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
149 PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
150 PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
151
152 PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
153 PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
154 PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
155 PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
156
157 PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
158};
159
160#define PERF_SAMPLE_BRANCH_PLM_ALL \
161 (PERF_SAMPLE_BRANCH_USER|\
162 PERF_SAMPLE_BRANCH_KERNEL|\
163 PERF_SAMPLE_BRANCH_HV)
164
165/*
137 * The format of the data returned by read() on a perf event fd, 166 * The format of the data returned by read() on a perf event fd,
138 * as specified by attr.read_format: 167 * as specified by attr.read_format:
139 * 168 *
@@ -163,6 +192,8 @@ enum perf_event_read_format {
163}; 192};
164 193
165#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 194#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
195#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
196#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
166 197
167/* 198/*
168 * Hardware event_id to monitor via a performance monitoring event: 199 * Hardware event_id to monitor via a performance monitoring event:
@@ -240,6 +271,7 @@ struct perf_event_attr {
240 __u64 bp_len; 271 __u64 bp_len;
241 __u64 config2; /* extension of config1 */ 272 __u64 config2; /* extension of config1 */
242 }; 273 };
274 __u64 branch_sample_type; /* enum branch_sample_type */
243}; 275};
244 276
245/* 277/*
@@ -291,12 +323,14 @@ struct perf_event_mmap_page {
291 __s64 offset; /* add to hardware event value */ 323 __s64 offset; /* add to hardware event value */
292 __u64 time_enabled; /* time event active */ 324 __u64 time_enabled; /* time event active */
293 __u64 time_running; /* time event on cpu */ 325 __u64 time_running; /* time event on cpu */
326 __u32 time_mult, time_shift;
327 __u64 time_offset;
294 328
295 /* 329 /*
296 * Hole for extension of the self monitor capabilities 330 * Hole for extension of the self monitor capabilities
297 */ 331 */
298 332
299 __u64 __reserved[123]; /* align to 1k */ 333 __u64 __reserved[121]; /* align to 1k */
300 334
301 /* 335 /*
302 * Control data for the mmap() data buffer. 336 * Control data for the mmap() data buffer.
@@ -456,6 +490,8 @@ enum perf_event_type {
456 * 490 *
457 * { u32 size; 491 * { u32 size;
458 * char data[size];}&& PERF_SAMPLE_RAW 492 * char data[size];}&& PERF_SAMPLE_RAW
493 *
494 * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
459 * }; 495 * };
460 */ 496 */
461 PERF_RECORD_SAMPLE = 9, 497 PERF_RECORD_SAMPLE = 9,
@@ -512,7 +548,7 @@ struct perf_guest_info_callbacks {
512#include <linux/ftrace.h> 548#include <linux/ftrace.h>
513#include <linux/cpu.h> 549#include <linux/cpu.h>
514#include <linux/irq_work.h> 550#include <linux/irq_work.h>
515#include <linux/jump_label.h> 551#include <linux/static_key.h>
516#include <linux/atomic.h> 552#include <linux/atomic.h>
517#include <asm/local.h> 553#include <asm/local.h>
518 554
@@ -528,12 +564,34 @@ struct perf_raw_record {
528 void *data; 564 void *data;
529}; 565};
530 566
567/*
568 * single taken branch record layout:
569 *
570 * from: source instruction (may not always be a branch insn)
571 * to: branch target
572 * mispred: branch target was mispredicted
573 * predicted: branch target was predicted
574 *
575 * support for mispred, predicted is optional. In case it
576 * is not supported mispred = predicted = 0.
577 */
531struct perf_branch_entry { 578struct perf_branch_entry {
532 __u64 from; 579 __u64 from;
533 __u64 to; 580 __u64 to;
534 __u64 flags; 581 __u64 mispred:1, /* target mispredicted */
582 predicted:1,/* target predicted */
583 reserved:62;
535}; 584};
536 585
586/*
587 * branch stack layout:
588 * nr: number of taken branches stored in entries[]
589 *
590 * Note that nr can vary from sample to sample
591 * branches (to, from) are stored from most recent
592 * to least recent, i.e., entries[0] contains the most
593 * recent branch.
594 */
537struct perf_branch_stack { 595struct perf_branch_stack {
538 __u64 nr; 596 __u64 nr;
539 struct perf_branch_entry entries[0]; 597 struct perf_branch_entry entries[0];
@@ -564,7 +622,9 @@ struct hw_perf_event {
564 unsigned long event_base; 622 unsigned long event_base;
565 int idx; 623 int idx;
566 int last_cpu; 624 int last_cpu;
625
567 struct hw_perf_event_extra extra_reg; 626 struct hw_perf_event_extra extra_reg;
627 struct hw_perf_event_extra branch_reg;
568 }; 628 };
569 struct { /* software */ 629 struct { /* software */
570 struct hrtimer hrtimer; 630 struct hrtimer hrtimer;
@@ -616,6 +676,7 @@ struct pmu {
616 struct list_head entry; 676 struct list_head entry;
617 677
618 struct device *dev; 678 struct device *dev;
679 const struct attribute_group **attr_groups;
619 char *name; 680 char *name;
620 int type; 681 int type;
621 682
@@ -681,6 +742,17 @@ struct pmu {
681 * for each successful ->add() during the transaction. 742 * for each successful ->add() during the transaction.
682 */ 743 */
683 void (*cancel_txn) (struct pmu *pmu); /* optional */ 744 void (*cancel_txn) (struct pmu *pmu); /* optional */
745
746 /*
747 * Will return the value for perf_event_mmap_page::index for this event,
748 * if no implementation is provided it will default to: event->hw.idx + 1.
749 */
750 int (*event_idx) (struct perf_event *event); /*optional */
751
752 /*
753 * flush branch stack on context-switches (needed in cpu-wide mode)
754 */
755 void (*flush_branch_stack) (void);
684}; 756};
685 757
686/** 758/**
@@ -850,6 +922,9 @@ struct perf_event {
850#ifdef CONFIG_EVENT_TRACING 922#ifdef CONFIG_EVENT_TRACING
851 struct ftrace_event_call *tp_event; 923 struct ftrace_event_call *tp_event;
852 struct event_filter *filter; 924 struct event_filter *filter;
925#ifdef CONFIG_FUNCTION_TRACER
926 struct ftrace_ops ftrace_ops;
927#endif
853#endif 928#endif
854 929
855#ifdef CONFIG_CGROUP_PERF 930#ifdef CONFIG_CGROUP_PERF
@@ -911,7 +986,8 @@ struct perf_event_context {
911 u64 parent_gen; 986 u64 parent_gen;
912 u64 generation; 987 u64 generation;
913 int pin_count; 988 int pin_count;
914 int nr_cgroups; /* cgroup events present */ 989 int nr_cgroups; /* cgroup evts */
990 int nr_branch_stack; /* branch_stack evt */
915 struct rcu_head rcu_head; 991 struct rcu_head rcu_head;
916}; 992};
917 993
@@ -976,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
976extern u64 perf_event_read_value(struct perf_event *event, 1052extern u64 perf_event_read_value(struct perf_event *event,
977 u64 *enabled, u64 *running); 1053 u64 *enabled, u64 *running);
978 1054
1055
979struct perf_sample_data { 1056struct perf_sample_data {
980 u64 type; 1057 u64 type;
981 1058
@@ -995,12 +1072,14 @@ struct perf_sample_data {
995 u64 period; 1072 u64 period;
996 struct perf_callchain_entry *callchain; 1073 struct perf_callchain_entry *callchain;
997 struct perf_raw_record *raw; 1074 struct perf_raw_record *raw;
1075 struct perf_branch_stack *br_stack;
998}; 1076};
999 1077
1000static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) 1078static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
1001{ 1079{
1002 data->addr = addr; 1080 data->addr = addr;
1003 data->raw = NULL; 1081 data->raw = NULL;
1082 data->br_stack = NULL;
1004} 1083}
1005 1084
1006extern void perf_output_sample(struct perf_output_handle *handle, 1085extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1029,7 +1108,7 @@ static inline int is_software_event(struct perf_event *event)
1029 return event->pmu->task_ctx_nr == perf_sw_context; 1108 return event->pmu->task_ctx_nr == perf_sw_context;
1030} 1109}
1031 1110
1032extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 1111extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1033 1112
1034extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); 1113extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1035 1114
@@ -1057,7 +1136,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1057{ 1136{
1058 struct pt_regs hot_regs; 1137 struct pt_regs hot_regs;
1059 1138
1060 if (static_branch(&perf_swevent_enabled[event_id])) { 1139 if (static_key_false(&perf_swevent_enabled[event_id])) {
1061 if (!regs) { 1140 if (!regs) {
1062 perf_fetch_caller_regs(&hot_regs); 1141 perf_fetch_caller_regs(&hot_regs);
1063 regs = &hot_regs; 1142 regs = &hot_regs;
@@ -1066,12 +1145,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1066 } 1145 }
1067} 1146}
1068 1147
1069extern struct jump_label_key_deferred perf_sched_events; 1148extern struct static_key_deferred perf_sched_events;
1070 1149
1071static inline void perf_event_task_sched_in(struct task_struct *prev, 1150static inline void perf_event_task_sched_in(struct task_struct *prev,
1072 struct task_struct *task) 1151 struct task_struct *task)
1073{ 1152{
1074 if (static_branch(&perf_sched_events.key)) 1153 if (static_key_false(&perf_sched_events.key))
1075 __perf_event_task_sched_in(prev, task); 1154 __perf_event_task_sched_in(prev, task);
1076} 1155}
1077 1156
@@ -1080,7 +1159,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
1080{ 1159{
1081 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); 1160 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
1082 1161
1083 if (static_branch(&perf_sched_events.key)) 1162 if (static_key_false(&perf_sched_events.key))
1084 __perf_event_task_sched_out(prev, next); 1163 __perf_event_task_sched_out(prev, next);
1085} 1164}
1086 1165
@@ -1139,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
1139# define perf_instruction_pointer(regs) instruction_pointer(regs) 1218# define perf_instruction_pointer(regs) instruction_pointer(regs)
1140#endif 1219#endif
1141 1220
1221static inline bool has_branch_stack(struct perf_event *event)
1222{
1223 return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1224}
1225
1142extern int perf_output_begin(struct perf_output_handle *handle, 1226extern int perf_output_begin(struct perf_output_handle *handle,
1143 struct perf_event *event, unsigned int size); 1227 struct perf_event *event, unsigned int size);
1144extern void perf_output_end(struct perf_output_handle *handle); 1228extern void perf_output_end(struct perf_output_handle *handle);
diff --git a/include/linux/static_key.h b/include/linux/static_key.h
new file mode 100644
index 000000000000..27bd3f8a0857
--- /dev/null
+++ b/include/linux/static_key.h
@@ -0,0 +1 @@
#include <linux/jump_label.h>
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index df0a779c1bbd..bd96ecd0e05c 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,7 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
20#include <linux/jump_label.h> 20#include <linux/static_key.h>
21 21
22struct module; 22struct module;
23struct tracepoint; 23struct tracepoint;
@@ -29,7 +29,7 @@ struct tracepoint_func {
29 29
30struct tracepoint { 30struct tracepoint {
31 const char *name; /* Tracepoint name */ 31 const char *name; /* Tracepoint name */
32 struct jump_label_key key; 32 struct static_key key;
33 void (*regfunc)(void); 33 void (*regfunc)(void);
34 void (*unregfunc)(void); 34 void (*unregfunc)(void);
35 struct tracepoint_func __rcu *funcs; 35 struct tracepoint_func __rcu *funcs;
@@ -114,7 +114,7 @@ static inline void tracepoint_synchronize_unregister(void)
114 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just 114 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
115 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". 115 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
116 */ 116 */
117#define __DO_TRACE(tp, proto, args, cond) \ 117#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \
118 do { \ 118 do { \
119 struct tracepoint_func *it_func_ptr; \ 119 struct tracepoint_func *it_func_ptr; \
120 void *it_func; \ 120 void *it_func; \
@@ -122,6 +122,7 @@ static inline void tracepoint_synchronize_unregister(void)
122 \ 122 \
123 if (!(cond)) \ 123 if (!(cond)) \
124 return; \ 124 return; \
125 prercu; \
125 rcu_read_lock_sched_notrace(); \ 126 rcu_read_lock_sched_notrace(); \
126 it_func_ptr = rcu_dereference_sched((tp)->funcs); \ 127 it_func_ptr = rcu_dereference_sched((tp)->funcs); \
127 if (it_func_ptr) { \ 128 if (it_func_ptr) { \
@@ -132,6 +133,7 @@ static inline void tracepoint_synchronize_unregister(void)
132 } while ((++it_func_ptr)->func); \ 133 } while ((++it_func_ptr)->func); \
133 } \ 134 } \
134 rcu_read_unlock_sched_notrace(); \ 135 rcu_read_unlock_sched_notrace(); \
136 postrcu; \
135 } while (0) 137 } while (0)
136 138
137/* 139/*
@@ -139,15 +141,25 @@ static inline void tracepoint_synchronize_unregister(void)
139 * not add unwanted padding between the beginning of the section and the 141 * not add unwanted padding between the beginning of the section and the
140 * structure. Force alignment to the same alignment as the section start. 142 * structure. Force alignment to the same alignment as the section start.
141 */ 143 */
142#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ 144#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
143 extern struct tracepoint __tracepoint_##name; \ 145 extern struct tracepoint __tracepoint_##name; \
144 static inline void trace_##name(proto) \ 146 static inline void trace_##name(proto) \
145 { \ 147 { \
148 if (static_key_false(&__tracepoint_##name.key)) \
149 __DO_TRACE(&__tracepoint_##name, \
150 TP_PROTO(data_proto), \
151 TP_ARGS(data_args), \
152 TP_CONDITION(cond),,); \
153 } \
154 static inline void trace_##name##_rcuidle(proto) \
155 { \
146 if (static_branch(&__tracepoint_##name.key)) \ 156 if (static_branch(&__tracepoint_##name.key)) \
147 __DO_TRACE(&__tracepoint_##name, \ 157 __DO_TRACE(&__tracepoint_##name, \
148 TP_PROTO(data_proto), \ 158 TP_PROTO(data_proto), \
149 TP_ARGS(data_args), \ 159 TP_ARGS(data_args), \
150 TP_CONDITION(cond)); \ 160 TP_CONDITION(cond), \
161 rcu_idle_exit(), \
162 rcu_idle_enter()); \
151 } \ 163 } \
152 static inline int \ 164 static inline int \
153 register_trace_##name(void (*probe)(data_proto), void *data) \ 165 register_trace_##name(void (*probe)(data_proto), void *data) \
@@ -176,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void)
176 __attribute__((section("__tracepoints_strings"))) = #name; \ 188 __attribute__((section("__tracepoints_strings"))) = #name; \
177 struct tracepoint __tracepoint_##name \ 189 struct tracepoint __tracepoint_##name \
178 __attribute__((section("__tracepoints"))) = \ 190 __attribute__((section("__tracepoints"))) = \
179 { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ 191 { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
180 static struct tracepoint * const __tracepoint_ptr_##name __used \ 192 static struct tracepoint * const __tracepoint_ptr_##name __used \
181 __attribute__((section("__tracepoints_ptrs"))) = \ 193 __attribute__((section("__tracepoints_ptrs"))) = \
182 &__tracepoint_##name; 194 &__tracepoint_##name;
@@ -190,9 +202,11 @@ static inline void tracepoint_synchronize_unregister(void)
190 EXPORT_SYMBOL(__tracepoint_##name) 202 EXPORT_SYMBOL(__tracepoint_##name)
191 203
192#else /* !CONFIG_TRACEPOINTS */ 204#else /* !CONFIG_TRACEPOINTS */
193#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ 205#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
194 static inline void trace_##name(proto) \ 206 static inline void trace_##name(proto) \
195 { } \ 207 { } \
208 static inline void trace_##name##_rcuidle(proto) \
209 { } \
196 static inline int \ 210 static inline int \
197 register_trace_##name(void (*probe)(data_proto), \ 211 register_trace_##name(void (*probe)(data_proto), \
198 void *data) \ 212 void *data) \
diff --git a/include/net/sock.h b/include/net/sock.h
index 91c1c8baf020..dcde2d9268cd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -55,7 +55,7 @@
55#include <linux/uaccess.h> 55#include <linux/uaccess.h>
56#include <linux/memcontrol.h> 56#include <linux/memcontrol.h>
57#include <linux/res_counter.h> 57#include <linux/res_counter.h>
58#include <linux/jump_label.h> 58#include <linux/static_key.h>
59 59
60#include <linux/filter.h> 60#include <linux/filter.h>
61#include <linux/rculist_nulls.h> 61#include <linux/rculist_nulls.h>
@@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk)
924#endif /* SOCK_REFCNT_DEBUG */ 924#endif /* SOCK_REFCNT_DEBUG */
925 925
926#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) 926#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET)
927extern struct jump_label_key memcg_socket_limit_enabled; 927extern struct static_key memcg_socket_limit_enabled;
928static inline struct cg_proto *parent_cg_proto(struct proto *proto, 928static inline struct cg_proto *parent_cg_proto(struct proto *proto,
929 struct cg_proto *cg_proto) 929 struct cg_proto *cg_proto)
930{ 930{
931 return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); 931 return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
932} 932}
933#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) 933#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled)
934#else 934#else
935#define mem_cgroup_sockets_enabled 0 935#define mem_cgroup_sockets_enabled 0
936static inline struct cg_proto *parent_cg_proto(struct proto *proto, 936static inline struct cg_proto *parent_cg_proto(struct proto *proto,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 1bcc2a8c00e2..14b38940062b 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -151,6 +151,8 @@ enum {
151 events get removed */ 151 events get removed */
152static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; 152static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
153static inline void trace_power_end(u64 cpuid) {}; 153static inline void trace_power_end(u64 cpuid) {};
154static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {};
155static inline void trace_power_end_rcuidle(u64 cpuid) {};
154static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; 156static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
155#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ 157#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
156 158
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h
new file mode 100644
index 000000000000..94ec79cc011a
--- /dev/null
+++ b/include/trace/events/printk.h
@@ -0,0 +1,41 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM printk
3
4#if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_PRINTK_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT_CONDITION(console,
10 TP_PROTO(const char *log_buf, unsigned start, unsigned end,
11 unsigned log_buf_len),
12
13 TP_ARGS(log_buf, start, end, log_buf_len),
14
15 TP_CONDITION(start != end),
16
17 TP_STRUCT__entry(
18 __dynamic_array(char, msg, end - start + 1)
19 ),
20
21 TP_fast_assign(
22 if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) {
23 memcpy(__get_dynamic_array(msg),
24 log_buf + (start & (log_buf_len - 1)),
25 log_buf_len - (start & (log_buf_len - 1)));
26 memcpy((char *)__get_dynamic_array(msg) +
27 log_buf_len - (start & (log_buf_len - 1)),
28 log_buf, end & (log_buf_len - 1));
29 } else
30 memcpy(__get_dynamic_array(msg),
31 log_buf + (start & (log_buf_len - 1)),
32 end - start);
33 ((char *)__get_dynamic_array(msg))[end - start] = 0;
34 ),
35
36 TP_printk("%s", __get_str(msg))
37);
38#endif /* _TRACE_PRINTK_H */
39
40/* This part must be outside protection */
41#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e33ed1bfa113..fbc7b1ad929b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -6,6 +6,7 @@
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <linux/binfmts.h>
9 10
10/* 11/*
11 * Tracepoint for calling kthread_stop, performed to end a kthread: 12 * Tracepoint for calling kthread_stop, performed to end a kthread:
@@ -276,6 +277,32 @@ TRACE_EVENT(sched_process_fork,
276); 277);
277 278
278/* 279/*
280 * Tracepoint for exec:
281 */
282TRACE_EVENT(sched_process_exec,
283
284 TP_PROTO(struct task_struct *p, pid_t old_pid,
285 struct linux_binprm *bprm),
286
287 TP_ARGS(p, old_pid, bprm),
288
289 TP_STRUCT__entry(
290 __string( filename, bprm->filename )
291 __field( pid_t, pid )
292 __field( pid_t, old_pid )
293 ),
294
295 TP_fast_assign(
296 __assign_str(filename, bprm->filename);
297 __entry->pid = p->pid;
298 __entry->old_pid = p->pid;
299 ),
300
301 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
302 __entry->pid, __entry->old_pid)
303);
304
305/*
279 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 306 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
280 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 307 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
281 */ 308 */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 17df43464df0..39a8a430d90f 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -23,11 +23,23 @@
23 } \ 23 } \
24 } while (0) 24 } while (0)
25 25
26#ifndef TRACE_HEADER_MULTI_READ
27enum {
28 TRACE_SIGNAL_DELIVERED,
29 TRACE_SIGNAL_IGNORED,
30 TRACE_SIGNAL_ALREADY_PENDING,
31 TRACE_SIGNAL_OVERFLOW_FAIL,
32 TRACE_SIGNAL_LOSE_INFO,
33};
34#endif
35
26/** 36/**
27 * signal_generate - called when a signal is generated 37 * signal_generate - called when a signal is generated
28 * @sig: signal number 38 * @sig: signal number
29 * @info: pointer to struct siginfo 39 * @info: pointer to struct siginfo
30 * @task: pointer to struct task_struct 40 * @task: pointer to struct task_struct
41 * @group: shared or private
42 * @result: TRACE_SIGNAL_*
31 * 43 *
32 * Current process sends a 'sig' signal to 'task' process with 44 * Current process sends a 'sig' signal to 'task' process with
33 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, 45 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
@@ -37,9 +49,10 @@
37 */ 49 */
38TRACE_EVENT(signal_generate, 50TRACE_EVENT(signal_generate,
39 51
40 TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), 52 TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
53 int group, int result),
41 54
42 TP_ARGS(sig, info, task), 55 TP_ARGS(sig, info, task, group, result),
43 56
44 TP_STRUCT__entry( 57 TP_STRUCT__entry(
45 __field( int, sig ) 58 __field( int, sig )
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate,
47 __field( int, code ) 60 __field( int, code )
48 __array( char, comm, TASK_COMM_LEN ) 61 __array( char, comm, TASK_COMM_LEN )
49 __field( pid_t, pid ) 62 __field( pid_t, pid )
63 __field( int, group )
64 __field( int, result )
50 ), 65 ),
51 66
52 TP_fast_assign( 67 TP_fast_assign(
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate,
54 TP_STORE_SIGINFO(__entry, info); 69 TP_STORE_SIGINFO(__entry, info);
55 memcpy(__entry->comm, task->comm, TASK_COMM_LEN); 70 memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
56 __entry->pid = task->pid; 71 __entry->pid = task->pid;
72 __entry->group = group;
73 __entry->result = result;
57 ), 74 ),
58 75
59 TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", 76 TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
60 __entry->sig, __entry->errno, __entry->code, 77 __entry->sig, __entry->errno, __entry->code,
61 __entry->comm, __entry->pid) 78 __entry->comm, __entry->pid, __entry->group,
79 __entry->result)
62); 80);
63 81
64/** 82/**
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver,
101 __entry->sa_handler, __entry->sa_flags) 119 __entry->sa_handler, __entry->sa_flags)
102); 120);
103 121
104DECLARE_EVENT_CLASS(signal_queue_overflow,
105
106 TP_PROTO(int sig, int group, struct siginfo *info),
107
108 TP_ARGS(sig, group, info),
109
110 TP_STRUCT__entry(
111 __field( int, sig )
112 __field( int, group )
113 __field( int, errno )
114 __field( int, code )
115 ),
116
117 TP_fast_assign(
118 __entry->sig = sig;
119 __entry->group = group;
120 TP_STORE_SIGINFO(__entry, info);
121 ),
122
123 TP_printk("sig=%d group=%d errno=%d code=%d",
124 __entry->sig, __entry->group, __entry->errno, __entry->code)
125);
126
127/**
128 * signal_overflow_fail - called when signal queue is overflow
129 * @sig: signal number
130 * @group: signal to process group or not (bool)
131 * @info: pointer to struct siginfo
132 *
133 * Kernel fails to generate 'sig' signal with 'info' siginfo, because
134 * siginfo queue is overflow, and the signal is dropped.
135 * 'group' is not 0 if the signal will be sent to a process group.
136 * 'sig' is always one of RT signals.
137 */
138DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
139
140 TP_PROTO(int sig, int group, struct siginfo *info),
141
142 TP_ARGS(sig, group, info)
143);
144
145/**
146 * signal_lose_info - called when siginfo is lost
147 * @sig: signal number
148 * @group: signal to process group or not (bool)
149 * @info: pointer to struct siginfo
150 *
151 * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
152 * queue is overflow.
153 * 'group' is not 0 if the signal will be sent to a process group.
154 * 'sig' is always one of non-RT signals.
155 */
156DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
157
158 TP_PROTO(int sig, int group, struct siginfo *info),
159
160 TP_ARGS(sig, group, info)
161);
162
163#endif /* _TRACE_SIGNAL_H */ 122#endif /* _TRACE_SIGNAL_H */
164 123
165/* This part must be outside protection */ 124/* This part must be outside protection */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1b5c081d8b9f..c61234b1a988 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
118 PERF_FLAG_FD_OUTPUT |\ 118 PERF_FLAG_FD_OUTPUT |\
119 PERF_FLAG_PID_CGROUP) 119 PERF_FLAG_PID_CGROUP)
120 120
121/*
122 * branch priv levels that need permission checks
123 */
124#define PERF_SAMPLE_BRANCH_PERM_PLM \
125 (PERF_SAMPLE_BRANCH_KERNEL |\
126 PERF_SAMPLE_BRANCH_HV)
127
121enum event_type_t { 128enum event_type_t {
122 EVENT_FLEXIBLE = 0x1, 129 EVENT_FLEXIBLE = 0x1,
123 EVENT_PINNED = 0x2, 130 EVENT_PINNED = 0x2,
@@ -128,8 +135,9 @@ enum event_type_t {
128 * perf_sched_events : >0 events exist 135 * perf_sched_events : >0 events exist
129 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu 136 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
130 */ 137 */
131struct jump_label_key_deferred perf_sched_events __read_mostly; 138struct static_key_deferred perf_sched_events __read_mostly;
132static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 139static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
140static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
133 141
134static atomic_t nr_mmap_events __read_mostly; 142static atomic_t nr_mmap_events __read_mostly;
135static atomic_t nr_comm_events __read_mostly; 143static atomic_t nr_comm_events __read_mostly;
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
881 if (is_cgroup_event(event)) 889 if (is_cgroup_event(event))
882 ctx->nr_cgroups++; 890 ctx->nr_cgroups++;
883 891
892 if (has_branch_stack(event))
893 ctx->nr_branch_stack++;
894
884 list_add_rcu(&event->event_entry, &ctx->event_list); 895 list_add_rcu(&event->event_entry, &ctx->event_list);
885 if (!ctx->nr_events) 896 if (!ctx->nr_events)
886 perf_pmu_rotate_start(ctx->pmu); 897 perf_pmu_rotate_start(ctx->pmu);
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1020 cpuctx->cgrp = NULL; 1031 cpuctx->cgrp = NULL;
1021 } 1032 }
1022 1033
1034 if (has_branch_stack(event))
1035 ctx->nr_branch_stack--;
1036
1023 ctx->nr_events--; 1037 ctx->nr_events--;
1024 if (event->attr.inherit_stat) 1038 if (event->attr.inherit_stat)
1025 ctx->nr_stat--; 1039 ctx->nr_stat--;
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2195} 2209}
2196 2210
2197/* 2211/*
2212 * When sampling the branck stack in system-wide, it may be necessary
2213 * to flush the stack on context switch. This happens when the branch
2214 * stack does not tag its entries with the pid of the current task.
2215 * Otherwise it becomes impossible to associate a branch entry with a
2216 * task. This ambiguity is more likely to appear when the branch stack
2217 * supports priv level filtering and the user sets it to monitor only
2218 * at the user level (which could be a useful measurement in system-wide
2219 * mode). In that case, the risk is high of having a branch stack with
2220 * branch from multiple tasks. Flushing may mean dropping the existing
2221 * entries or stashing them somewhere in the PMU specific code layer.
2222 *
2223 * This function provides the context switch callback to the lower code
2224 * layer. It is invoked ONLY when there is at least one system-wide context
2225 * with at least one active event using taken branch sampling.
2226 */
2227static void perf_branch_stack_sched_in(struct task_struct *prev,
2228 struct task_struct *task)
2229{
2230 struct perf_cpu_context *cpuctx;
2231 struct pmu *pmu;
2232 unsigned long flags;
2233
2234 /* no need to flush branch stack if not changing task */
2235 if (prev == task)
2236 return;
2237
2238 local_irq_save(flags);
2239
2240 rcu_read_lock();
2241
2242 list_for_each_entry_rcu(pmu, &pmus, entry) {
2243 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2244
2245 /*
2246 * check if the context has at least one
2247 * event using PERF_SAMPLE_BRANCH_STACK
2248 */
2249 if (cpuctx->ctx.nr_branch_stack > 0
2250 && pmu->flush_branch_stack) {
2251
2252 pmu = cpuctx->ctx.pmu;
2253
2254 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2255
2256 perf_pmu_disable(pmu);
2257
2258 pmu->flush_branch_stack();
2259
2260 perf_pmu_enable(pmu);
2261
2262 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2263 }
2264 }
2265
2266 rcu_read_unlock();
2267
2268 local_irq_restore(flags);
2269}
2270
2271/*
2198 * Called from scheduler to add the events of the current task 2272 * Called from scheduler to add the events of the current task
2199 * with interrupts disabled. 2273 * with interrupts disabled.
2200 * 2274 *
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2225 */ 2299 */
2226 if (atomic_read(&__get_cpu_var(perf_cgroup_events))) 2300 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2227 perf_cgroup_sched_in(prev, task); 2301 perf_cgroup_sched_in(prev, task);
2302
2303 /* check for system-wide branch_stack events */
2304 if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
2305 perf_branch_stack_sched_in(prev, task);
2228} 2306}
2229 2307
2230static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2308static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2778,7 +2856,7 @@ static void free_event(struct perf_event *event)
2778 2856
2779 if (!event->parent) { 2857 if (!event->parent) {
2780 if (event->attach_state & PERF_ATTACH_TASK) 2858 if (event->attach_state & PERF_ATTACH_TASK)
2781 jump_label_dec_deferred(&perf_sched_events); 2859 static_key_slow_dec_deferred(&perf_sched_events);
2782 if (event->attr.mmap || event->attr.mmap_data) 2860 if (event->attr.mmap || event->attr.mmap_data)
2783 atomic_dec(&nr_mmap_events); 2861 atomic_dec(&nr_mmap_events);
2784 if (event->attr.comm) 2862 if (event->attr.comm)
@@ -2789,7 +2867,15 @@ static void free_event(struct perf_event *event)
2789 put_callchain_buffers(); 2867 put_callchain_buffers();
2790 if (is_cgroup_event(event)) { 2868 if (is_cgroup_event(event)) {
2791 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); 2869 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2792 jump_label_dec_deferred(&perf_sched_events); 2870 static_key_slow_dec_deferred(&perf_sched_events);
2871 }
2872
2873 if (has_branch_stack(event)) {
2874 static_key_slow_dec_deferred(&perf_sched_events);
2875 /* is system-wide event */
2876 if (!(event->attach_state & PERF_ATTACH_TASK))
2877 atomic_dec(&per_cpu(perf_branch_stack_events,
2878 event->cpu));
2793 } 2879 }
2794 } 2880 }
2795 2881
@@ -3238,10 +3324,6 @@ int perf_event_task_disable(void)
3238 return 0; 3324 return 0;
3239} 3325}
3240 3326
3241#ifndef PERF_EVENT_INDEX_OFFSET
3242# define PERF_EVENT_INDEX_OFFSET 0
3243#endif
3244
3245static int perf_event_index(struct perf_event *event) 3327static int perf_event_index(struct perf_event *event)
3246{ 3328{
3247 if (event->hw.state & PERF_HES_STOPPED) 3329 if (event->hw.state & PERF_HES_STOPPED)
@@ -3250,21 +3332,26 @@ static int perf_event_index(struct perf_event *event)
3250 if (event->state != PERF_EVENT_STATE_ACTIVE) 3332 if (event->state != PERF_EVENT_STATE_ACTIVE)
3251 return 0; 3333 return 0;
3252 3334
3253 return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; 3335 return event->pmu->event_idx(event);
3254} 3336}
3255 3337
3256static void calc_timer_values(struct perf_event *event, 3338static void calc_timer_values(struct perf_event *event,
3339 u64 *now,
3257 u64 *enabled, 3340 u64 *enabled,
3258 u64 *running) 3341 u64 *running)
3259{ 3342{
3260 u64 now, ctx_time; 3343 u64 ctx_time;
3261 3344
3262 now = perf_clock(); 3345 *now = perf_clock();
3263 ctx_time = event->shadow_ctx_time + now; 3346 ctx_time = event->shadow_ctx_time + *now;
3264 *enabled = ctx_time - event->tstamp_enabled; 3347 *enabled = ctx_time - event->tstamp_enabled;
3265 *running = ctx_time - event->tstamp_running; 3348 *running = ctx_time - event->tstamp_running;
3266} 3349}
3267 3350
3351void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
3352{
3353}
3354
3268/* 3355/*
3269 * Callers need to ensure there can be no nesting of this function, otherwise 3356 * Callers need to ensure there can be no nesting of this function, otherwise
3270 * the seqlock logic goes bad. We can not serialize this because the arch 3357 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3274,7 +3361,7 @@ void perf_event_update_userpage(struct perf_event *event)
3274{ 3361{
3275 struct perf_event_mmap_page *userpg; 3362 struct perf_event_mmap_page *userpg;
3276 struct ring_buffer *rb; 3363 struct ring_buffer *rb;
3277 u64 enabled, running; 3364 u64 enabled, running, now;
3278 3365
3279 rcu_read_lock(); 3366 rcu_read_lock();
3280 /* 3367 /*
@@ -3286,7 +3373,7 @@ void perf_event_update_userpage(struct perf_event *event)
3286 * because of locking issue as we can be called in 3373 * because of locking issue as we can be called in
3287 * NMI context 3374 * NMI context
3288 */ 3375 */
3289 calc_timer_values(event, &enabled, &running); 3376 calc_timer_values(event, &now, &enabled, &running);
3290 rb = rcu_dereference(event->rb); 3377 rb = rcu_dereference(event->rb);
3291 if (!rb) 3378 if (!rb)
3292 goto unlock; 3379 goto unlock;
@@ -3302,7 +3389,7 @@ void perf_event_update_userpage(struct perf_event *event)
3302 barrier(); 3389 barrier();
3303 userpg->index = perf_event_index(event); 3390 userpg->index = perf_event_index(event);
3304 userpg->offset = perf_event_count(event); 3391 userpg->offset = perf_event_count(event);
3305 if (event->state == PERF_EVENT_STATE_ACTIVE) 3392 if (userpg->index)
3306 userpg->offset -= local64_read(&event->hw.prev_count); 3393 userpg->offset -= local64_read(&event->hw.prev_count);
3307 3394
3308 userpg->time_enabled = enabled + 3395 userpg->time_enabled = enabled +
@@ -3311,6 +3398,8 @@ void perf_event_update_userpage(struct perf_event *event)
3311 userpg->time_running = running + 3398 userpg->time_running = running +
3312 atomic64_read(&event->child_total_time_running); 3399 atomic64_read(&event->child_total_time_running);
3313 3400
3401 perf_update_user_clock(userpg, now);
3402
3314 barrier(); 3403 barrier();
3315 ++userpg->lock; 3404 ++userpg->lock;
3316 preempt_enable(); 3405 preempt_enable();
@@ -3568,6 +3657,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3568 event->mmap_user = get_current_user(); 3657 event->mmap_user = get_current_user();
3569 vma->vm_mm->pinned_vm += event->mmap_locked; 3658 vma->vm_mm->pinned_vm += event->mmap_locked;
3570 3659
3660 perf_event_update_userpage(event);
3661
3571unlock: 3662unlock:
3572 if (!ret) 3663 if (!ret)
3573 atomic_inc(&event->mmap_count); 3664 atomic_inc(&event->mmap_count);
@@ -3799,7 +3890,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3799static void perf_output_read(struct perf_output_handle *handle, 3890static void perf_output_read(struct perf_output_handle *handle,
3800 struct perf_event *event) 3891 struct perf_event *event)
3801{ 3892{
3802 u64 enabled = 0, running = 0; 3893 u64 enabled = 0, running = 0, now;
3803 u64 read_format = event->attr.read_format; 3894 u64 read_format = event->attr.read_format;
3804 3895
3805 /* 3896 /*
@@ -3812,7 +3903,7 @@ static void perf_output_read(struct perf_output_handle *handle,
3812 * NMI context 3903 * NMI context
3813 */ 3904 */
3814 if (read_format & PERF_FORMAT_TOTAL_TIMES) 3905 if (read_format & PERF_FORMAT_TOTAL_TIMES)
3815 calc_timer_values(event, &enabled, &running); 3906 calc_timer_values(event, &now, &enabled, &running);
3816 3907
3817 if (event->attr.read_format & PERF_FORMAT_GROUP) 3908 if (event->attr.read_format & PERF_FORMAT_GROUP)
3818 perf_output_read_group(handle, event, enabled, running); 3909 perf_output_read_group(handle, event, enabled, running);
@@ -3902,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
3902 } 3993 }
3903 } 3994 }
3904 } 3995 }
3996
3997 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
3998 if (data->br_stack) {
3999 size_t size;
4000
4001 size = data->br_stack->nr
4002 * sizeof(struct perf_branch_entry);
4003
4004 perf_output_put(handle, data->br_stack->nr);
4005 perf_output_copy(handle, data->br_stack->entries, size);
4006 } else {
4007 /*
4008 * we always store at least the value of nr
4009 */
4010 u64 nr = 0;
4011 perf_output_put(handle, nr);
4012 }
4013 }
3905} 4014}
3906 4015
3907void perf_prepare_sample(struct perf_event_header *header, 4016void perf_prepare_sample(struct perf_event_header *header,
@@ -3944,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
3944 WARN_ON_ONCE(size & (sizeof(u64)-1)); 4053 WARN_ON_ONCE(size & (sizeof(u64)-1));
3945 header->size += size; 4054 header->size += size;
3946 } 4055 }
4056
4057 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
4058 int size = sizeof(u64); /* nr */
4059 if (data->br_stack) {
4060 size += data->br_stack->nr
4061 * sizeof(struct perf_branch_entry);
4062 }
4063 header->size += size;
4064 }
3947} 4065}
3948 4066
3949static void perf_event_output(struct perf_event *event, 4067static void perf_event_output(struct perf_event *event,
@@ -4986,7 +5104,7 @@ fail:
4986 return err; 5104 return err;
4987} 5105}
4988 5106
4989struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 5107struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
4990 5108
4991static void sw_perf_event_destroy(struct perf_event *event) 5109static void sw_perf_event_destroy(struct perf_event *event)
4992{ 5110{
@@ -4994,7 +5112,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
4994 5112
4995 WARN_ON(event->parent); 5113 WARN_ON(event->parent);
4996 5114
4997 jump_label_dec(&perf_swevent_enabled[event_id]); 5115 static_key_slow_dec(&perf_swevent_enabled[event_id]);
4998 swevent_hlist_put(event); 5116 swevent_hlist_put(event);
4999} 5117}
5000 5118
@@ -5005,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
5005 if (event->attr.type != PERF_TYPE_SOFTWARE) 5123 if (event->attr.type != PERF_TYPE_SOFTWARE)
5006 return -ENOENT; 5124 return -ENOENT;
5007 5125
5126 /*
5127 * no branch sampling for software events
5128 */
5129 if (has_branch_stack(event))
5130 return -EOPNOTSUPP;
5131
5008 switch (event_id) { 5132 switch (event_id) {
5009 case PERF_COUNT_SW_CPU_CLOCK: 5133 case PERF_COUNT_SW_CPU_CLOCK:
5010 case PERF_COUNT_SW_TASK_CLOCK: 5134 case PERF_COUNT_SW_TASK_CLOCK:
@@ -5024,13 +5148,18 @@ static int perf_swevent_init(struct perf_event *event)
5024 if (err) 5148 if (err)
5025 return err; 5149 return err;
5026 5150
5027 jump_label_inc(&perf_swevent_enabled[event_id]); 5151 static_key_slow_inc(&perf_swevent_enabled[event_id]);
5028 event->destroy = sw_perf_event_destroy; 5152 event->destroy = sw_perf_event_destroy;
5029 } 5153 }
5030 5154
5031 return 0; 5155 return 0;
5032} 5156}
5033 5157
5158static int perf_swevent_event_idx(struct perf_event *event)
5159{
5160 return 0;
5161}
5162
5034static struct pmu perf_swevent = { 5163static struct pmu perf_swevent = {
5035 .task_ctx_nr = perf_sw_context, 5164 .task_ctx_nr = perf_sw_context,
5036 5165
@@ -5040,6 +5169,8 @@ static struct pmu perf_swevent = {
5040 .start = perf_swevent_start, 5169 .start = perf_swevent_start,
5041 .stop = perf_swevent_stop, 5170 .stop = perf_swevent_stop,
5042 .read = perf_swevent_read, 5171 .read = perf_swevent_read,
5172
5173 .event_idx = perf_swevent_event_idx,
5043}; 5174};
5044 5175
5045#ifdef CONFIG_EVENT_TRACING 5176#ifdef CONFIG_EVENT_TRACING
@@ -5108,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
5108 if (event->attr.type != PERF_TYPE_TRACEPOINT) 5239 if (event->attr.type != PERF_TYPE_TRACEPOINT)
5109 return -ENOENT; 5240 return -ENOENT;
5110 5241
5242 /*
5243 * no branch sampling for tracepoint events
5244 */
5245 if (has_branch_stack(event))
5246 return -EOPNOTSUPP;
5247
5111 err = perf_trace_init(event); 5248 err = perf_trace_init(event);
5112 if (err) 5249 if (err)
5113 return err; 5250 return err;
@@ -5126,6 +5263,8 @@ static struct pmu perf_tracepoint = {
5126 .start = perf_swevent_start, 5263 .start = perf_swevent_start,
5127 .stop = perf_swevent_stop, 5264 .stop = perf_swevent_stop,
5128 .read = perf_swevent_read, 5265 .read = perf_swevent_read,
5266
5267 .event_idx = perf_swevent_event_idx,
5129}; 5268};
5130 5269
5131static inline void perf_tp_register(void) 5270static inline void perf_tp_register(void)
@@ -5331,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
5331 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) 5470 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5332 return -ENOENT; 5471 return -ENOENT;
5333 5472
5473 /*
5474 * no branch sampling for software events
5475 */
5476 if (has_branch_stack(event))
5477 return -EOPNOTSUPP;
5478
5334 perf_swevent_init_hrtimer(event); 5479 perf_swevent_init_hrtimer(event);
5335 5480
5336 return 0; 5481 return 0;
@@ -5345,6 +5490,8 @@ static struct pmu perf_cpu_clock = {
5345 .start = cpu_clock_event_start, 5490 .start = cpu_clock_event_start,
5346 .stop = cpu_clock_event_stop, 5491 .stop = cpu_clock_event_stop,
5347 .read = cpu_clock_event_read, 5492 .read = cpu_clock_event_read,
5493
5494 .event_idx = perf_swevent_event_idx,
5348}; 5495};
5349 5496
5350/* 5497/*
@@ -5403,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
5403 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) 5550 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5404 return -ENOENT; 5551 return -ENOENT;
5405 5552
5553 /*
5554 * no branch sampling for software events
5555 */
5556 if (has_branch_stack(event))
5557 return -EOPNOTSUPP;
5558
5406 perf_swevent_init_hrtimer(event); 5559 perf_swevent_init_hrtimer(event);
5407 5560
5408 return 0; 5561 return 0;
@@ -5417,6 +5570,8 @@ static struct pmu perf_task_clock = {
5417 .start = task_clock_event_start, 5570 .start = task_clock_event_start,
5418 .stop = task_clock_event_stop, 5571 .stop = task_clock_event_stop,
5419 .read = task_clock_event_read, 5572 .read = task_clock_event_read,
5573
5574 .event_idx = perf_swevent_event_idx,
5420}; 5575};
5421 5576
5422static void perf_pmu_nop_void(struct pmu *pmu) 5577static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5444,6 +5599,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
5444 perf_pmu_enable(pmu); 5599 perf_pmu_enable(pmu);
5445} 5600}
5446 5601
5602static int perf_event_idx_default(struct perf_event *event)
5603{
5604 return event->hw.idx + 1;
5605}
5606
5447/* 5607/*
5448 * Ensures all contexts with the same task_ctx_nr have the same 5608 * Ensures all contexts with the same task_ctx_nr have the same
5449 * pmu_cpu_context too. 5609 * pmu_cpu_context too.
@@ -5530,6 +5690,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
5530 if (!pmu->dev) 5690 if (!pmu->dev)
5531 goto out; 5691 goto out;
5532 5692
5693 pmu->dev->groups = pmu->attr_groups;
5533 device_initialize(pmu->dev); 5694 device_initialize(pmu->dev);
5534 ret = dev_set_name(pmu->dev, "%s", pmu->name); 5695 ret = dev_set_name(pmu->dev, "%s", pmu->name);
5535 if (ret) 5696 if (ret)
@@ -5633,6 +5794,9 @@ got_cpu_context:
5633 pmu->pmu_disable = perf_pmu_nop_void; 5794 pmu->pmu_disable = perf_pmu_nop_void;
5634 } 5795 }
5635 5796
5797 if (!pmu->event_idx)
5798 pmu->event_idx = perf_event_idx_default;
5799
5636 list_add_rcu(&pmu->entry, &pmus); 5800 list_add_rcu(&pmu->entry, &pmus);
5637 ret = 0; 5801 ret = 0;
5638unlock: 5802unlock:
@@ -5825,7 +5989,7 @@ done:
5825 5989
5826 if (!event->parent) { 5990 if (!event->parent) {
5827 if (event->attach_state & PERF_ATTACH_TASK) 5991 if (event->attach_state & PERF_ATTACH_TASK)
5828 jump_label_inc(&perf_sched_events.key); 5992 static_key_slow_inc(&perf_sched_events.key);
5829 if (event->attr.mmap || event->attr.mmap_data) 5993 if (event->attr.mmap || event->attr.mmap_data)
5830 atomic_inc(&nr_mmap_events); 5994 atomic_inc(&nr_mmap_events);
5831 if (event->attr.comm) 5995 if (event->attr.comm)
@@ -5839,6 +6003,12 @@ done:
5839 return ERR_PTR(err); 6003 return ERR_PTR(err);
5840 } 6004 }
5841 } 6005 }
6006 if (has_branch_stack(event)) {
6007 static_key_slow_inc(&perf_sched_events.key);
6008 if (!(event->attach_state & PERF_ATTACH_TASK))
6009 atomic_inc(&per_cpu(perf_branch_stack_events,
6010 event->cpu));
6011 }
5842 } 6012 }
5843 6013
5844 return event; 6014 return event;
@@ -5908,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
5908 if (attr->read_format & ~(PERF_FORMAT_MAX-1)) 6078 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
5909 return -EINVAL; 6079 return -EINVAL;
5910 6080
6081 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
6082 u64 mask = attr->branch_sample_type;
6083
6084 /* only using defined bits */
6085 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
6086 return -EINVAL;
6087
6088 /* at least one branch bit must be set */
6089 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
6090 return -EINVAL;
6091
6092 /* kernel level capture: check permissions */
6093 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
6094 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
6095 return -EACCES;
6096
6097 /* propagate priv level, when not set for branch */
6098 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
6099
6100 /* exclude_kernel checked on syscall entry */
6101 if (!attr->exclude_kernel)
6102 mask |= PERF_SAMPLE_BRANCH_KERNEL;
6103
6104 if (!attr->exclude_user)
6105 mask |= PERF_SAMPLE_BRANCH_USER;
6106
6107 if (!attr->exclude_hv)
6108 mask |= PERF_SAMPLE_BRANCH_HV;
6109 /*
6110 * adjust user setting (for HW filter setup)
6111 */
6112 attr->branch_sample_type = mask;
6113 }
6114 }
5911out: 6115out:
5912 return ret; 6116 return ret;
5913 6117
@@ -6063,7 +6267,7 @@ SYSCALL_DEFINE5(perf_event_open,
6063 * - that may need work on context switch 6267 * - that may need work on context switch
6064 */ 6268 */
6065 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); 6269 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
6066 jump_label_inc(&perf_sched_events.key); 6270 static_key_slow_inc(&perf_sched_events.key);
6067 } 6271 }
6068 6272
6069 /* 6273 /*
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index ee706ce44aa0..bb38c4d3ee12 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
581 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 581 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
582 return -ENOENT; 582 return -ENOENT;
583 583
584 /*
585 * no branch sampling for breakpoint events
586 */
587 if (has_branch_stack(bp))
588 return -EOPNOTSUPP;
589
584 err = register_perf_hw_breakpoint(bp); 590 err = register_perf_hw_breakpoint(bp);
585 if (err) 591 if (err)
586 return err; 592 return err;
@@ -613,6 +619,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
613 bp->hw.state = PERF_HES_STOPPED; 619 bp->hw.state = PERF_HES_STOPPED;
614} 620}
615 621
622static int hw_breakpoint_event_idx(struct perf_event *bp)
623{
624 return 0;
625}
626
616static struct pmu perf_breakpoint = { 627static struct pmu perf_breakpoint = {
617 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 628 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
618 629
@@ -622,6 +633,8 @@ static struct pmu perf_breakpoint = {
622 .start = hw_breakpoint_start, 633 .start = hw_breakpoint_start,
623 .stop = hw_breakpoint_stop, 634 .stop = hw_breakpoint_stop,
624 .read = hw_breakpoint_pmu_read, 635 .read = hw_breakpoint_pmu_read,
636
637 .event_idx = hw_breakpoint_event_idx,
625}; 638};
626 639
627int __init init_hw_breakpoint(void) 640int __init init_hw_breakpoint(void)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 25784d630a12..6080f6bc8c33 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -16,6 +16,8 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18 18
19#include <trace/events/irq.h>
20
19#include "internals.h" 21#include "internals.h"
20 22
21/** 23/**
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 01d3b70fc98a..43049192b5ec 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -12,7 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sort.h> 13#include <linux/sort.h>
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/jump_label.h> 15#include <linux/static_key.h>
16 16
17#ifdef HAVE_JUMP_LABEL 17#ifdef HAVE_JUMP_LABEL
18 18
@@ -29,11 +29,6 @@ void jump_label_unlock(void)
29 mutex_unlock(&jump_label_mutex); 29 mutex_unlock(&jump_label_mutex);
30} 30}
31 31
32bool jump_label_enabled(struct jump_label_key *key)
33{
34 return !!atomic_read(&key->enabled);
35}
36
37static int jump_label_cmp(const void *a, const void *b) 32static int jump_label_cmp(const void *a, const void *b)
38{ 33{
39 const struct jump_entry *jea = a; 34 const struct jump_entry *jea = a;
@@ -58,56 +53,66 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
58 sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); 53 sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
59} 54}
60 55
61static void jump_label_update(struct jump_label_key *key, int enable); 56static void jump_label_update(struct static_key *key, int enable);
62 57
63void jump_label_inc(struct jump_label_key *key) 58void static_key_slow_inc(struct static_key *key)
64{ 59{
65 if (atomic_inc_not_zero(&key->enabled)) 60 if (atomic_inc_not_zero(&key->enabled))
66 return; 61 return;
67 62
68 jump_label_lock(); 63 jump_label_lock();
69 if (atomic_read(&key->enabled) == 0) 64 if (atomic_read(&key->enabled) == 0) {
70 jump_label_update(key, JUMP_LABEL_ENABLE); 65 if (!jump_label_get_branch_default(key))
66 jump_label_update(key, JUMP_LABEL_ENABLE);
67 else
68 jump_label_update(key, JUMP_LABEL_DISABLE);
69 }
71 atomic_inc(&key->enabled); 70 atomic_inc(&key->enabled);
72 jump_label_unlock(); 71 jump_label_unlock();
73} 72}
74EXPORT_SYMBOL_GPL(jump_label_inc); 73EXPORT_SYMBOL_GPL(static_key_slow_inc);
75 74
76static void __jump_label_dec(struct jump_label_key *key, 75static void __static_key_slow_dec(struct static_key *key,
77 unsigned long rate_limit, struct delayed_work *work) 76 unsigned long rate_limit, struct delayed_work *work)
78{ 77{
79 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) 78 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
79 WARN(atomic_read(&key->enabled) < 0,
80 "jump label: negative count!\n");
80 return; 81 return;
82 }
81 83
82 if (rate_limit) { 84 if (rate_limit) {
83 atomic_inc(&key->enabled); 85 atomic_inc(&key->enabled);
84 schedule_delayed_work(work, rate_limit); 86 schedule_delayed_work(work, rate_limit);
85 } else 87 } else {
86 jump_label_update(key, JUMP_LABEL_DISABLE); 88 if (!jump_label_get_branch_default(key))
87 89 jump_label_update(key, JUMP_LABEL_DISABLE);
90 else
91 jump_label_update(key, JUMP_LABEL_ENABLE);
92 }
88 jump_label_unlock(); 93 jump_label_unlock();
89} 94}
90EXPORT_SYMBOL_GPL(jump_label_dec);
91 95
92static void jump_label_update_timeout(struct work_struct *work) 96static void jump_label_update_timeout(struct work_struct *work)
93{ 97{
94 struct jump_label_key_deferred *key = 98 struct static_key_deferred *key =
95 container_of(work, struct jump_label_key_deferred, work.work); 99 container_of(work, struct static_key_deferred, work.work);
96 __jump_label_dec(&key->key, 0, NULL); 100 __static_key_slow_dec(&key->key, 0, NULL);
97} 101}
98 102
99void jump_label_dec(struct jump_label_key *key) 103void static_key_slow_dec(struct static_key *key)
100{ 104{
101 __jump_label_dec(key, 0, NULL); 105 __static_key_slow_dec(key, 0, NULL);
102} 106}
107EXPORT_SYMBOL_GPL(static_key_slow_dec);
103 108
104void jump_label_dec_deferred(struct jump_label_key_deferred *key) 109void static_key_slow_dec_deferred(struct static_key_deferred *key)
105{ 110{
106 __jump_label_dec(&key->key, key->timeout, &key->work); 111 __static_key_slow_dec(&key->key, key->timeout, &key->work);
107} 112}
113EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
108 114
109 115void jump_label_rate_limit(struct static_key_deferred *key,
110void jump_label_rate_limit(struct jump_label_key_deferred *key,
111 unsigned long rl) 116 unsigned long rl)
112{ 117{
113 key->timeout = rl; 118 key->timeout = rl;
@@ -150,7 +155,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry
150 arch_jump_label_transform(entry, type); 155 arch_jump_label_transform(entry, type);
151} 156}
152 157
153static void __jump_label_update(struct jump_label_key *key, 158static void __jump_label_update(struct static_key *key,
154 struct jump_entry *entry, 159 struct jump_entry *entry,
155 struct jump_entry *stop, int enable) 160 struct jump_entry *stop, int enable)
156{ 161{
@@ -167,27 +172,40 @@ static void __jump_label_update(struct jump_label_key *key,
167 } 172 }
168} 173}
169 174
175static enum jump_label_type jump_label_type(struct static_key *key)
176{
177 bool true_branch = jump_label_get_branch_default(key);
178 bool state = static_key_enabled(key);
179
180 if ((!true_branch && state) || (true_branch && !state))
181 return JUMP_LABEL_ENABLE;
182
183 return JUMP_LABEL_DISABLE;
184}
185
170void __init jump_label_init(void) 186void __init jump_label_init(void)
171{ 187{
172 struct jump_entry *iter_start = __start___jump_table; 188 struct jump_entry *iter_start = __start___jump_table;
173 struct jump_entry *iter_stop = __stop___jump_table; 189 struct jump_entry *iter_stop = __stop___jump_table;
174 struct jump_label_key *key = NULL; 190 struct static_key *key = NULL;
175 struct jump_entry *iter; 191 struct jump_entry *iter;
176 192
177 jump_label_lock(); 193 jump_label_lock();
178 jump_label_sort_entries(iter_start, iter_stop); 194 jump_label_sort_entries(iter_start, iter_stop);
179 195
180 for (iter = iter_start; iter < iter_stop; iter++) { 196 for (iter = iter_start; iter < iter_stop; iter++) {
181 struct jump_label_key *iterk; 197 struct static_key *iterk;
182 198
183 iterk = (struct jump_label_key *)(unsigned long)iter->key; 199 iterk = (struct static_key *)(unsigned long)iter->key;
184 arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? 200 arch_jump_label_transform_static(iter, jump_label_type(iterk));
185 JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
186 if (iterk == key) 201 if (iterk == key)
187 continue; 202 continue;
188 203
189 key = iterk; 204 key = iterk;
190 key->entries = iter; 205 /*
206 * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
207 */
208 *((unsigned long *)&key->entries) += (unsigned long)iter;
191#ifdef CONFIG_MODULES 209#ifdef CONFIG_MODULES
192 key->next = NULL; 210 key->next = NULL;
193#endif 211#endif
@@ -197,8 +215,8 @@ void __init jump_label_init(void)
197 215
198#ifdef CONFIG_MODULES 216#ifdef CONFIG_MODULES
199 217
200struct jump_label_mod { 218struct static_key_mod {
201 struct jump_label_mod *next; 219 struct static_key_mod *next;
202 struct jump_entry *entries; 220 struct jump_entry *entries;
203 struct module *mod; 221 struct module *mod;
204}; 222};
@@ -218,9 +236,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end)
218 start, end); 236 start, end);
219} 237}
220 238
221static void __jump_label_mod_update(struct jump_label_key *key, int enable) 239static void __jump_label_mod_update(struct static_key *key, int enable)
222{ 240{
223 struct jump_label_mod *mod = key->next; 241 struct static_key_mod *mod = key->next;
224 242
225 while (mod) { 243 while (mod) {
226 struct module *m = mod->mod; 244 struct module *m = mod->mod;
@@ -251,11 +269,7 @@ void jump_label_apply_nops(struct module *mod)
251 return; 269 return;
252 270
253 for (iter = iter_start; iter < iter_stop; iter++) { 271 for (iter = iter_start; iter < iter_stop; iter++) {
254 struct jump_label_key *iterk; 272 arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
255
256 iterk = (struct jump_label_key *)(unsigned long)iter->key;
257 arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
258 JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
259 } 273 }
260} 274}
261 275
@@ -264,8 +278,8 @@ static int jump_label_add_module(struct module *mod)
264 struct jump_entry *iter_start = mod->jump_entries; 278 struct jump_entry *iter_start = mod->jump_entries;
265 struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; 279 struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
266 struct jump_entry *iter; 280 struct jump_entry *iter;
267 struct jump_label_key *key = NULL; 281 struct static_key *key = NULL;
268 struct jump_label_mod *jlm; 282 struct static_key_mod *jlm;
269 283
270 /* if the module doesn't have jump label entries, just return */ 284 /* if the module doesn't have jump label entries, just return */
271 if (iter_start == iter_stop) 285 if (iter_start == iter_stop)
@@ -274,28 +288,30 @@ static int jump_label_add_module(struct module *mod)
274 jump_label_sort_entries(iter_start, iter_stop); 288 jump_label_sort_entries(iter_start, iter_stop);
275 289
276 for (iter = iter_start; iter < iter_stop; iter++) { 290 for (iter = iter_start; iter < iter_stop; iter++) {
277 if (iter->key == (jump_label_t)(unsigned long)key) 291 struct static_key *iterk;
278 continue;
279 292
280 key = (struct jump_label_key *)(unsigned long)iter->key; 293 iterk = (struct static_key *)(unsigned long)iter->key;
294 if (iterk == key)
295 continue;
281 296
297 key = iterk;
282 if (__module_address(iter->key) == mod) { 298 if (__module_address(iter->key) == mod) {
283 atomic_set(&key->enabled, 0); 299 /*
284 key->entries = iter; 300 * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
301 */
302 *((unsigned long *)&key->entries) += (unsigned long)iter;
285 key->next = NULL; 303 key->next = NULL;
286 continue; 304 continue;
287 } 305 }
288 306 jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
289 jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
290 if (!jlm) 307 if (!jlm)
291 return -ENOMEM; 308 return -ENOMEM;
292
293 jlm->mod = mod; 309 jlm->mod = mod;
294 jlm->entries = iter; 310 jlm->entries = iter;
295 jlm->next = key->next; 311 jlm->next = key->next;
296 key->next = jlm; 312 key->next = jlm;
297 313
298 if (jump_label_enabled(key)) 314 if (jump_label_type(key) == JUMP_LABEL_ENABLE)
299 __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); 315 __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
300 } 316 }
301 317
@@ -307,14 +323,14 @@ static void jump_label_del_module(struct module *mod)
307 struct jump_entry *iter_start = mod->jump_entries; 323 struct jump_entry *iter_start = mod->jump_entries;
308 struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; 324 struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
309 struct jump_entry *iter; 325 struct jump_entry *iter;
310 struct jump_label_key *key = NULL; 326 struct static_key *key = NULL;
311 struct jump_label_mod *jlm, **prev; 327 struct static_key_mod *jlm, **prev;
312 328
313 for (iter = iter_start; iter < iter_stop; iter++) { 329 for (iter = iter_start; iter < iter_stop; iter++) {
314 if (iter->key == (jump_label_t)(unsigned long)key) 330 if (iter->key == (jump_label_t)(unsigned long)key)
315 continue; 331 continue;
316 332
317 key = (struct jump_label_key *)(unsigned long)iter->key; 333 key = (struct static_key *)(unsigned long)iter->key;
318 334
319 if (__module_address(iter->key) == mod) 335 if (__module_address(iter->key) == mod)
320 continue; 336 continue;
@@ -416,12 +432,13 @@ int jump_label_text_reserved(void *start, void *end)
416 return ret; 432 return ret;
417} 433}
418 434
419static void jump_label_update(struct jump_label_key *key, int enable) 435static void jump_label_update(struct static_key *key, int enable)
420{ 436{
421 struct jump_entry *entry = key->entries, *stop = __stop___jump_table; 437 struct jump_entry *stop = __stop___jump_table;
438 struct jump_entry *entry = jump_label_get_entries(key);
422 439
423#ifdef CONFIG_MODULES 440#ifdef CONFIG_MODULES
424 struct module *mod = __module_address((jump_label_t)key); 441 struct module *mod = __module_address((unsigned long)key);
425 442
426 __jump_label_mod_update(key, enable); 443 __jump_label_mod_update(key, enable);
427 444
diff --git a/kernel/printk.c b/kernel/printk.c
index 32690a0b7a18..0b3ea2cbd5fb 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -44,6 +44,9 @@
44 44
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46 46
47#define CREATE_TRACE_POINTS
48#include <trace/events/printk.h>
49
47/* 50/*
48 * Architectures can override it: 51 * Architectures can override it:
49 */ 52 */
@@ -542,6 +545,8 @@ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
542static void _call_console_drivers(unsigned start, 545static void _call_console_drivers(unsigned start,
543 unsigned end, int msg_log_level) 546 unsigned end, int msg_log_level)
544{ 547{
548 trace_console(&LOG_BUF(0), start, end, log_buf_len);
549
545 if ((msg_log_level < console_loglevel || ignore_loglevel) && 550 if ((msg_log_level < console_loglevel || ignore_loglevel) &&
546 console_drivers && start != end) { 551 console_drivers && start != end) {
547 if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { 552 if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b342f57879e6..6c41ba49767a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v)
162 162
163#ifdef HAVE_JUMP_LABEL 163#ifdef HAVE_JUMP_LABEL
164 164
165#define jump_label_key__true jump_label_key_enabled 165#define jump_label_key__true STATIC_KEY_INIT_TRUE
166#define jump_label_key__false jump_label_key_disabled 166#define jump_label_key__false STATIC_KEY_INIT_FALSE
167 167
168#define SCHED_FEAT(name, enabled) \ 168#define SCHED_FEAT(name, enabled) \
169 jump_label_key__##enabled , 169 jump_label_key__##enabled ,
170 170
171struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { 171struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
172#include "features.h" 172#include "features.h"
173}; 173};
174 174
@@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = {
176 176
177static void sched_feat_disable(int i) 177static void sched_feat_disable(int i)
178{ 178{
179 if (jump_label_enabled(&sched_feat_keys[i])) 179 if (static_key_enabled(&sched_feat_keys[i]))
180 jump_label_dec(&sched_feat_keys[i]); 180 static_key_slow_dec(&sched_feat_keys[i]);
181} 181}
182 182
183static void sched_feat_enable(int i) 183static void sched_feat_enable(int i)
184{ 184{
185 if (!jump_label_enabled(&sched_feat_keys[i])) 185 if (!static_key_enabled(&sched_feat_keys[i]))
186 jump_label_inc(&sched_feat_keys[i]); 186 static_key_slow_inc(&sched_feat_keys[i]);
187} 187}
188#else 188#else
189static void sched_feat_disable(int i) { }; 189static void sched_feat_disable(int i) { };
@@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
894 delta -= irq_delta; 894 delta -= irq_delta;
895#endif 895#endif
896#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING 896#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
897 if (static_branch((&paravirt_steal_rq_enabled))) { 897 if (static_key_false((&paravirt_steal_rq_enabled))) {
898 u64 st; 898 u64 st;
899 899
900 steal = paravirt_steal_clock(cpu_of(rq)); 900 steal = paravirt_steal_clock(cpu_of(rq));
@@ -2755,7 +2755,7 @@ void account_idle_time(cputime_t cputime)
2755static __always_inline bool steal_account_process_tick(void) 2755static __always_inline bool steal_account_process_tick(void)
2756{ 2756{
2757#ifdef CONFIG_PARAVIRT 2757#ifdef CONFIG_PARAVIRT
2758 if (static_branch(&paravirt_steal_enabled)) { 2758 if (static_key_false(&paravirt_steal_enabled)) {
2759 u64 steal, st = 0; 2759 u64 steal, st = 0;
2760 2760
2761 steal = paravirt_steal_clock(smp_processor_id()); 2761 steal = paravirt_steal_clock(smp_processor_id());
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aca16b843b7e..fd974faf467d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1401,20 +1401,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
1401#ifdef CONFIG_CFS_BANDWIDTH 1401#ifdef CONFIG_CFS_BANDWIDTH
1402 1402
1403#ifdef HAVE_JUMP_LABEL 1403#ifdef HAVE_JUMP_LABEL
1404static struct jump_label_key __cfs_bandwidth_used; 1404static struct static_key __cfs_bandwidth_used;
1405 1405
1406static inline bool cfs_bandwidth_used(void) 1406static inline bool cfs_bandwidth_used(void)
1407{ 1407{
1408 return static_branch(&__cfs_bandwidth_used); 1408 return static_key_false(&__cfs_bandwidth_used);
1409} 1409}
1410 1410
1411void account_cfs_bandwidth_used(int enabled, int was_enabled) 1411void account_cfs_bandwidth_used(int enabled, int was_enabled)
1412{ 1412{
1413 /* only need to count groups transitioning between enabled/!enabled */ 1413 /* only need to count groups transitioning between enabled/!enabled */
1414 if (enabled && !was_enabled) 1414 if (enabled && !was_enabled)
1415 jump_label_inc(&__cfs_bandwidth_used); 1415 static_key_slow_inc(&__cfs_bandwidth_used);
1416 else if (!enabled && was_enabled) 1416 else if (!enabled && was_enabled)
1417 jump_label_dec(&__cfs_bandwidth_used); 1417 static_key_slow_dec(&__cfs_bandwidth_used);
1418} 1418}
1419#else /* HAVE_JUMP_LABEL */ 1419#else /* HAVE_JUMP_LABEL */
1420static bool cfs_bandwidth_used(void) 1420static bool cfs_bandwidth_used(void)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 98c0c2623db8..b4cd6d8ea150 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
611 * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 611 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
612 */ 612 */
613#ifdef CONFIG_SCHED_DEBUG 613#ifdef CONFIG_SCHED_DEBUG
614# include <linux/jump_label.h> 614# include <linux/static_key.h>
615# define const_debug __read_mostly 615# define const_debug __read_mostly
616#else 616#else
617# define const_debug const 617# define const_debug const
@@ -630,18 +630,18 @@ enum {
630#undef SCHED_FEAT 630#undef SCHED_FEAT
631 631
632#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) 632#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
633static __always_inline bool static_branch__true(struct jump_label_key *key) 633static __always_inline bool static_branch__true(struct static_key *key)
634{ 634{
635 return likely(static_branch(key)); /* Not out of line branch. */ 635 return static_key_true(key); /* Not out of line branch. */
636} 636}
637 637
638static __always_inline bool static_branch__false(struct jump_label_key *key) 638static __always_inline bool static_branch__false(struct static_key *key)
639{ 639{
640 return unlikely(static_branch(key)); /* Out of line branch. */ 640 return static_key_false(key); /* Out of line branch. */
641} 641}
642 642
643#define SCHED_FEAT(name, enabled) \ 643#define SCHED_FEAT(name, enabled) \
644static __always_inline bool static_branch_##name(struct jump_label_key *key) \ 644static __always_inline bool static_branch_##name(struct static_key *key) \
645{ \ 645{ \
646 return static_branch__##enabled(key); \ 646 return static_branch__##enabled(key); \
647} 647}
@@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \
650 650
651#undef SCHED_FEAT 651#undef SCHED_FEAT
652 652
653extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; 653extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
654#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) 654#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
655#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ 655#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
656#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) 656#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
diff --git a/kernel/signal.c b/kernel/signal.c
index c73c4284160e..8511e39813c7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1054 struct sigpending *pending; 1054 struct sigpending *pending;
1055 struct sigqueue *q; 1055 struct sigqueue *q;
1056 int override_rlimit; 1056 int override_rlimit;
1057 1057 int ret = 0, result;
1058 trace_signal_generate(sig, info, t);
1059 1058
1060 assert_spin_locked(&t->sighand->siglock); 1059 assert_spin_locked(&t->sighand->siglock);
1061 1060
1061 result = TRACE_SIGNAL_IGNORED;
1062 if (!prepare_signal(sig, t, from_ancestor_ns)) 1062 if (!prepare_signal(sig, t, from_ancestor_ns))
1063 return 0; 1063 goto ret;
1064 1064
1065 pending = group ? &t->signal->shared_pending : &t->pending; 1065 pending = group ? &t->signal->shared_pending : &t->pending;
1066 /* 1066 /*
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1068 * exactly one non-rt signal, so that we can get more 1068 * exactly one non-rt signal, so that we can get more
1069 * detailed information about the cause of the signal. 1069 * detailed information about the cause of the signal.
1070 */ 1070 */
1071 result = TRACE_SIGNAL_ALREADY_PENDING;
1071 if (legacy_queue(pending, sig)) 1072 if (legacy_queue(pending, sig))
1072 return 0; 1073 goto ret;
1074
1075 result = TRACE_SIGNAL_DELIVERED;
1073 /* 1076 /*
1074 * fast-pathed signals for kernel-internal things like SIGSTOP 1077 * fast-pathed signals for kernel-internal things like SIGSTOP
1075 * or SIGKILL. 1078 * or SIGKILL.
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1127 * signal was rt and sent by user using something 1130 * signal was rt and sent by user using something
1128 * other than kill(). 1131 * other than kill().
1129 */ 1132 */
1130 trace_signal_overflow_fail(sig, group, info); 1133 result = TRACE_SIGNAL_OVERFLOW_FAIL;
1131 return -EAGAIN; 1134 ret = -EAGAIN;
1135 goto ret;
1132 } else { 1136 } else {
1133 /* 1137 /*
1134 * This is a silent loss of information. We still 1138 * This is a silent loss of information. We still
1135 * send the signal, but the *info bits are lost. 1139 * send the signal, but the *info bits are lost.
1136 */ 1140 */
1137 trace_signal_lose_info(sig, group, info); 1141 result = TRACE_SIGNAL_LOSE_INFO;
1138 } 1142 }
1139 } 1143 }
1140 1144
@@ -1142,7 +1146,9 @@ out_set:
1142 signalfd_notify(t, sig); 1146 signalfd_notify(t, sig);
1143 sigaddset(&pending->signal, sig); 1147 sigaddset(&pending->signal, sig);
1144 complete_signal(sig, t, group); 1148 complete_signal(sig, t, group);
1145 return 0; 1149ret:
1150 trace_signal_generate(sig, info, t, group, result);
1151 return ret;
1146} 1152}
1147 1153
1148static int send_signal(int sig, struct siginfo *info, struct task_struct *t, 1154static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1585 int sig = q->info.si_signo; 1591 int sig = q->info.si_signo;
1586 struct sigpending *pending; 1592 struct sigpending *pending;
1587 unsigned long flags; 1593 unsigned long flags;
1588 int ret; 1594 int ret, result;
1589 1595
1590 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1596 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1591 1597
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1594 goto ret; 1600 goto ret;
1595 1601
1596 ret = 1; /* the signal is ignored */ 1602 ret = 1; /* the signal is ignored */
1603 result = TRACE_SIGNAL_IGNORED;
1597 if (!prepare_signal(sig, t, 0)) 1604 if (!prepare_signal(sig, t, 0))
1598 goto out; 1605 goto out;
1599 1606
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1605 */ 1612 */
1606 BUG_ON(q->info.si_code != SI_TIMER); 1613 BUG_ON(q->info.si_code != SI_TIMER);
1607 q->info.si_overrun++; 1614 q->info.si_overrun++;
1615 result = TRACE_SIGNAL_ALREADY_PENDING;
1608 goto out; 1616 goto out;
1609 } 1617 }
1610 q->info.si_overrun = 0; 1618 q->info.si_overrun = 0;
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1614 list_add_tail(&q->list, &pending->list); 1622 list_add_tail(&q->list, &pending->list);
1615 sigaddset(&pending->signal, sig); 1623 sigaddset(&pending->signal, sig);
1616 complete_signal(sig, t, group); 1624 complete_signal(sig, t, group);
1625 result = TRACE_SIGNAL_DELIVERED;
1617out: 1626out:
1627 trace_signal_generate(sig, &q->info, t, group, result);
1618 unlock_task_sighand(t, &flags); 1628 unlock_task_sighand(t, &flags);
1619ret: 1629ret:
1620 return ret; 1630 return ret;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c82d95a022ef..8afc6a8d4d7c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -375,6 +375,12 @@ void raise_softirq(unsigned int nr)
375 local_irq_restore(flags); 375 local_irq_restore(flags);
376} 376}
377 377
378void __raise_softirq_irqoff(unsigned int nr)
379{
380 trace_softirq_raise(nr);
381 or_softirq_pending(1UL << nr);
382}
383
378void open_softirq(int nr, void (*action)(struct softirq_action *)) 384void open_softirq(int nr, void (*action)(struct softirq_action *))
379{ 385{
380 softirq_vec[nr].action = action; 386 softirq_vec[nr].action = action;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 683d559a0eef..867bd1dd2dd0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,6 +62,8 @@
62#define FTRACE_HASH_DEFAULT_BITS 10 62#define FTRACE_HASH_DEFAULT_BITS 10
63#define FTRACE_HASH_MAX_BITS 12 63#define FTRACE_HASH_MAX_BITS 12
64 64
65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
66
65/* ftrace_enabled is a method to turn ftrace on or off */ 67/* ftrace_enabled is a method to turn ftrace on or off */
66int ftrace_enabled __read_mostly; 68int ftrace_enabled __read_mostly;
67static int last_ftrace_enabled; 69static int last_ftrace_enabled;
@@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
89}; 91};
90 92
91static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; 93static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
94static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
92static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 95static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
93ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 96ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
94static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; 97static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
95ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 98ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
96ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 99ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
97static struct ftrace_ops global_ops; 100static struct ftrace_ops global_ops;
101static struct ftrace_ops control_ops;
98 102
99static void 103static void
100ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); 104ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
@@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
168} 172}
169#endif 173#endif
170 174
175static void control_ops_disable_all(struct ftrace_ops *ops)
176{
177 int cpu;
178
179 for_each_possible_cpu(cpu)
180 *per_cpu_ptr(ops->disabled, cpu) = 1;
181}
182
183static int control_ops_alloc(struct ftrace_ops *ops)
184{
185 int __percpu *disabled;
186
187 disabled = alloc_percpu(int);
188 if (!disabled)
189 return -ENOMEM;
190
191 ops->disabled = disabled;
192 control_ops_disable_all(ops);
193 return 0;
194}
195
196static void control_ops_free(struct ftrace_ops *ops)
197{
198 free_percpu(ops->disabled);
199}
200
171static void update_global_ops(void) 201static void update_global_ops(void)
172{ 202{
173 ftrace_func_t func; 203 ftrace_func_t func;
@@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
259 return 0; 289 return 0;
260} 290}
261 291
292static void add_ftrace_list_ops(struct ftrace_ops **list,
293 struct ftrace_ops *main_ops,
294 struct ftrace_ops *ops)
295{
296 int first = *list == &ftrace_list_end;
297 add_ftrace_ops(list, ops);
298 if (first)
299 add_ftrace_ops(&ftrace_ops_list, main_ops);
300}
301
302static int remove_ftrace_list_ops(struct ftrace_ops **list,
303 struct ftrace_ops *main_ops,
304 struct ftrace_ops *ops)
305{
306 int ret = remove_ftrace_ops(list, ops);
307 if (!ret && *list == &ftrace_list_end)
308 ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
309 return ret;
310}
311
262static int __register_ftrace_function(struct ftrace_ops *ops) 312static int __register_ftrace_function(struct ftrace_ops *ops)
263{ 313{
264 if (ftrace_disabled) 314 if (ftrace_disabled)
@@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
270 if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) 320 if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
271 return -EBUSY; 321 return -EBUSY;
272 322
323 /* We don't support both control and global flags set. */
324 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
325 return -EINVAL;
326
273 if (!core_kernel_data((unsigned long)ops)) 327 if (!core_kernel_data((unsigned long)ops))
274 ops->flags |= FTRACE_OPS_FL_DYNAMIC; 328 ops->flags |= FTRACE_OPS_FL_DYNAMIC;
275 329
276 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 330 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
277 int first = ftrace_global_list == &ftrace_list_end; 331 add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
278 add_ftrace_ops(&ftrace_global_list, ops);
279 ops->flags |= FTRACE_OPS_FL_ENABLED; 332 ops->flags |= FTRACE_OPS_FL_ENABLED;
280 if (first) 333 } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
281 add_ftrace_ops(&ftrace_ops_list, &global_ops); 334 if (control_ops_alloc(ops))
335 return -ENOMEM;
336 add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
282 } else 337 } else
283 add_ftrace_ops(&ftrace_ops_list, ops); 338 add_ftrace_ops(&ftrace_ops_list, ops);
284 339
@@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
302 return -EINVAL; 357 return -EINVAL;
303 358
304 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 359 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
305 ret = remove_ftrace_ops(&ftrace_global_list, ops); 360 ret = remove_ftrace_list_ops(&ftrace_global_list,
306 if (!ret && ftrace_global_list == &ftrace_list_end) 361 &global_ops, ops);
307 ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
308 if (!ret) 362 if (!ret)
309 ops->flags &= ~FTRACE_OPS_FL_ENABLED; 363 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
364 } else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
365 ret = remove_ftrace_list_ops(&ftrace_control_list,
366 &control_ops, ops);
367 if (!ret) {
368 /*
369 * The ftrace_ops is now removed from the list,
370 * so there'll be no new users. We must ensure
371 * all current users are done before we free
372 * the control data.
373 */
374 synchronize_sched();
375 control_ops_free(ops);
376 }
310 } else 377 } else
311 ret = remove_ftrace_ops(&ftrace_ops_list, ops); 378 ret = remove_ftrace_ops(&ftrace_ops_list, ops);
312 379
@@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
1119 call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); 1186 call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
1120} 1187}
1121 1188
1189void ftrace_free_filter(struct ftrace_ops *ops)
1190{
1191 free_ftrace_hash(ops->filter_hash);
1192 free_ftrace_hash(ops->notrace_hash);
1193}
1194
1122static struct ftrace_hash *alloc_ftrace_hash(int size_bits) 1195static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
1123{ 1196{
1124 struct ftrace_hash *hash; 1197 struct ftrace_hash *hash;
@@ -1129,7 +1202,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
1129 return NULL; 1202 return NULL;
1130 1203
1131 size = 1 << size_bits; 1204 size = 1 << size_bits;
1132 hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL); 1205 hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL);
1133 1206
1134 if (!hash->buckets) { 1207 if (!hash->buckets) {
1135 kfree(hash); 1208 kfree(hash);
@@ -3146,8 +3219,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3146 mutex_lock(&ftrace_regex_lock); 3219 mutex_lock(&ftrace_regex_lock);
3147 if (reset) 3220 if (reset)
3148 ftrace_filter_reset(hash); 3221 ftrace_filter_reset(hash);
3149 if (buf) 3222 if (buf && !ftrace_match_records(hash, buf, len)) {
3150 ftrace_match_records(hash, buf, len); 3223 ret = -EINVAL;
3224 goto out_regex_unlock;
3225 }
3151 3226
3152 mutex_lock(&ftrace_lock); 3227 mutex_lock(&ftrace_lock);
3153 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 3228 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3157,6 +3232,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3157 3232
3158 mutex_unlock(&ftrace_lock); 3233 mutex_unlock(&ftrace_lock);
3159 3234
3235 out_regex_unlock:
3160 mutex_unlock(&ftrace_regex_lock); 3236 mutex_unlock(&ftrace_regex_lock);
3161 3237
3162 free_ftrace_hash(hash); 3238 free_ftrace_hash(hash);
@@ -3173,10 +3249,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3173 * Filters denote which functions should be enabled when tracing is enabled. 3249 * Filters denote which functions should be enabled when tracing is enabled.
3174 * If @buf is NULL and reset is set, all functions will be enabled for tracing. 3250 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
3175 */ 3251 */
3176void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, 3252int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
3177 int len, int reset) 3253 int len, int reset)
3178{ 3254{
3179 ftrace_set_regex(ops, buf, len, reset, 1); 3255 return ftrace_set_regex(ops, buf, len, reset, 1);
3180} 3256}
3181EXPORT_SYMBOL_GPL(ftrace_set_filter); 3257EXPORT_SYMBOL_GPL(ftrace_set_filter);
3182 3258
@@ -3191,10 +3267,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
3191 * is enabled. If @buf is NULL and reset is set, all functions will be enabled 3267 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
3192 * for tracing. 3268 * for tracing.
3193 */ 3269 */
3194void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, 3270int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
3195 int len, int reset) 3271 int len, int reset)
3196{ 3272{
3197 ftrace_set_regex(ops, buf, len, reset, 0); 3273 return ftrace_set_regex(ops, buf, len, reset, 0);
3198} 3274}
3199EXPORT_SYMBOL_GPL(ftrace_set_notrace); 3275EXPORT_SYMBOL_GPL(ftrace_set_notrace);
3200/** 3276/**
@@ -3871,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
3871#endif /* CONFIG_DYNAMIC_FTRACE */ 3947#endif /* CONFIG_DYNAMIC_FTRACE */
3872 3948
3873static void 3949static void
3950ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
3951{
3952 struct ftrace_ops *op;
3953
3954 if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
3955 return;
3956
3957 /*
3958 * Some of the ops may be dynamically allocated,
3959 * they must be freed after a synchronize_sched().
3960 */
3961 preempt_disable_notrace();
3962 trace_recursion_set(TRACE_CONTROL_BIT);
3963 op = rcu_dereference_raw(ftrace_control_list);
3964 while (op != &ftrace_list_end) {
3965 if (!ftrace_function_local_disabled(op) &&
3966 ftrace_ops_test(op, ip))
3967 op->func(ip, parent_ip);
3968
3969 op = rcu_dereference_raw(op->next);
3970 };
3971 trace_recursion_clear(TRACE_CONTROL_BIT);
3972 preempt_enable_notrace();
3973}
3974
3975static struct ftrace_ops control_ops = {
3976 .func = ftrace_ops_control_func,
3977};
3978
3979static void
3874ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) 3980ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3875{ 3981{
3876 struct ftrace_ops *op; 3982 struct ftrace_ops *op;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a3f1bc5d2a00..10d5503f0d04 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2764,12 +2764,12 @@ static const char readme_msg[] =
2764 "tracing mini-HOWTO:\n\n" 2764 "tracing mini-HOWTO:\n\n"
2765 "# mount -t debugfs nodev /sys/kernel/debug\n\n" 2765 "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2766 "# cat /sys/kernel/debug/tracing/available_tracers\n" 2766 "# cat /sys/kernel/debug/tracing/available_tracers\n"
2767 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" 2767 "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n"
2768 "# cat /sys/kernel/debug/tracing/current_tracer\n" 2768 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2769 "nop\n" 2769 "nop\n"
2770 "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" 2770 "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n"
2771 "# cat /sys/kernel/debug/tracing/current_tracer\n" 2771 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2772 "sched_switch\n" 2772 "wakeup\n"
2773 "# cat /sys/kernel/debug/tracing/trace_options\n" 2773 "# cat /sys/kernel/debug/tracing/trace_options\n"
2774 "noprint-parent nosym-offset nosym-addr noverbose\n" 2774 "noprint-parent nosym-offset nosym-addr noverbose\n"
2775 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" 2775 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b93ecbadad6d..54faec790bc1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -56,17 +56,23 @@ enum trace_type {
56#define F_STRUCT(args...) args 56#define F_STRUCT(args...) args
57 57
58#undef FTRACE_ENTRY 58#undef FTRACE_ENTRY
59#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 59#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
60 struct struct_name { \ 60 struct struct_name { \
61 struct trace_entry ent; \ 61 struct trace_entry ent; \
62 tstruct \ 62 tstruct \
63 } 63 }
64 64
65#undef TP_ARGS 65#undef TP_ARGS
66#define TP_ARGS(args...) args 66#define TP_ARGS(args...) args
67 67
68#undef FTRACE_ENTRY_DUP 68#undef FTRACE_ENTRY_DUP
69#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) 69#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter)
70
71#undef FTRACE_ENTRY_REG
72#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
73 filter, regfn) \
74 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
75 filter)
70 76
71#include "trace_entries.h" 77#include "trace_entries.h"
72 78
@@ -288,6 +294,8 @@ struct tracer {
288/* for function tracing recursion */ 294/* for function tracing recursion */
289#define TRACE_INTERNAL_BIT (1<<11) 295#define TRACE_INTERNAL_BIT (1<<11)
290#define TRACE_GLOBAL_BIT (1<<12) 296#define TRACE_GLOBAL_BIT (1<<12)
297#define TRACE_CONTROL_BIT (1<<13)
298
291/* 299/*
292 * Abuse of the trace_recursion. 300 * Abuse of the trace_recursion.
293 * As we need a way to maintain state if we are tracing the function 301 * As we need a way to maintain state if we are tracing the function
@@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task)
589static inline int ftrace_is_dead(void) { return 0; } 597static inline int ftrace_is_dead(void) { return 0; }
590#endif 598#endif
591 599
600int ftrace_event_is_function(struct ftrace_event_call *call);
601
592/* 602/*
593 * struct trace_parser - servers for reading the user input separated by spaces 603 * struct trace_parser - servers for reading the user input separated by spaces
594 * @cont: set if the input is not complete - no final space char was found 604 * @cont: set if the input is not complete - no final space char was found
@@ -766,9 +776,7 @@ struct filter_pred {
766 u64 val; 776 u64 val;
767 struct regex regex; 777 struct regex regex;
768 unsigned short *ops; 778 unsigned short *ops;
769#ifdef CONFIG_FTRACE_STARTUP_TEST
770 struct ftrace_event_field *field; 779 struct ftrace_event_field *field;
771#endif
772 int offset; 780 int offset;
773 int not; 781 int not;
774 int op; 782 int op;
@@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[];
818extern const char *__stop___trace_bprintk_fmt[]; 826extern const char *__stop___trace_bprintk_fmt[];
819 827
820#undef FTRACE_ENTRY 828#undef FTRACE_ENTRY
821#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 829#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
822 extern struct ftrace_event_call \ 830 extern struct ftrace_event_call \
823 __attribute__((__aligned__(4))) event_##call; 831 __attribute__((__aligned__(4))) event_##call;
824#undef FTRACE_ENTRY_DUP 832#undef FTRACE_ENTRY_DUP
825#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 833#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
826 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 834 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
835 filter)
827#include "trace_entries.h" 836#include "trace_entries.h"
828 837
838#ifdef CONFIG_PERF_EVENTS
839#ifdef CONFIG_FUNCTION_TRACER
840int perf_ftrace_event_register(struct ftrace_event_call *call,
841 enum trace_reg type, void *data);
842#else
843#define perf_ftrace_event_register NULL
844#endif /* CONFIG_FUNCTION_TRACER */
845#endif /* CONFIG_PERF_EVENTS */
846
829#endif /* _LINUX_KERNEL_TRACE_H */ 847#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 93365907f219..d91eb0541b3a 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -55,7 +55,7 @@
55/* 55/*
56 * Function trace entry - function address and parent function address: 56 * Function trace entry - function address and parent function address:
57 */ 57 */
58FTRACE_ENTRY(function, ftrace_entry, 58FTRACE_ENTRY_REG(function, ftrace_entry,
59 59
60 TRACE_FN, 60 TRACE_FN,
61 61
@@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry,
64 __field( unsigned long, parent_ip ) 64 __field( unsigned long, parent_ip )
65 ), 65 ),
66 66
67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) 67 F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
68
69 FILTER_TRACE_FN,
70
71 perf_ftrace_event_register
68); 72);
69 73
70/* Function call entry */ 74/* Function call entry */
@@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
78 __field_desc( int, graph_ent, depth ) 82 __field_desc( int, graph_ent, depth )
79 ), 83 ),
80 84
81 F_printk("--> %lx (%d)", __entry->func, __entry->depth) 85 F_printk("--> %lx (%d)", __entry->func, __entry->depth),
86
87 FILTER_OTHER
82); 88);
83 89
84/* Function return entry */ 90/* Function return entry */
@@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
98 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", 104 F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
99 __entry->func, __entry->depth, 105 __entry->func, __entry->depth,
100 __entry->calltime, __entry->rettime, 106 __entry->calltime, __entry->rettime,
101 __entry->depth) 107 __entry->depth),
108
109 FILTER_OTHER
102); 110);
103 111
104/* 112/*
@@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
127 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", 135 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
128 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 136 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
129 __entry->next_pid, __entry->next_prio, __entry->next_state, 137 __entry->next_pid, __entry->next_prio, __entry->next_state,
130 __entry->next_cpu 138 __entry->next_cpu),
131 ) 139
140 FILTER_OTHER
132); 141);
133 142
134/* 143/*
@@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
146 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", 155 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
147 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 156 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
148 __entry->next_pid, __entry->next_prio, __entry->next_state, 157 __entry->next_pid, __entry->next_prio, __entry->next_state,
149 __entry->next_cpu 158 __entry->next_cpu),
150 ) 159
160 FILTER_OTHER
151); 161);
152 162
153/* 163/*
@@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
169 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 179 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
170 __entry->caller[0], __entry->caller[1], __entry->caller[2], 180 __entry->caller[0], __entry->caller[1], __entry->caller[2],
171 __entry->caller[3], __entry->caller[4], __entry->caller[5], 181 __entry->caller[3], __entry->caller[4], __entry->caller[5],
172 __entry->caller[6], __entry->caller[7]) 182 __entry->caller[6], __entry->caller[7]),
183
184 FILTER_OTHER
173); 185);
174 186
175FTRACE_ENTRY(user_stack, userstack_entry, 187FTRACE_ENTRY(user_stack, userstack_entry,
@@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
185 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", 197 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
186 __entry->caller[0], __entry->caller[1], __entry->caller[2], 198 __entry->caller[0], __entry->caller[1], __entry->caller[2],
187 __entry->caller[3], __entry->caller[4], __entry->caller[5], 199 __entry->caller[3], __entry->caller[4], __entry->caller[5],
188 __entry->caller[6], __entry->caller[7]) 200 __entry->caller[6], __entry->caller[7]),
201
202 FILTER_OTHER
189); 203);
190 204
191/* 205/*
@@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry,
202 ), 216 ),
203 217
204 F_printk("%08lx fmt:%p", 218 F_printk("%08lx fmt:%p",
205 __entry->ip, __entry->fmt) 219 __entry->ip, __entry->fmt),
220
221 FILTER_OTHER
206); 222);
207 223
208FTRACE_ENTRY(print, print_entry, 224FTRACE_ENTRY(print, print_entry,
@@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry,
215 ), 231 ),
216 232
217 F_printk("%08lx %s", 233 F_printk("%08lx %s",
218 __entry->ip, __entry->buf) 234 __entry->ip, __entry->buf),
235
236 FILTER_OTHER
219); 237);
220 238
221FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, 239FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
@@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
234 252
235 F_printk("%lx %lx %lx %d %x %x", 253 F_printk("%lx %lx %lx %d %x %x",
236 (unsigned long)__entry->phys, __entry->value, __entry->pc, 254 (unsigned long)__entry->phys, __entry->value, __entry->pc,
237 __entry->map_id, __entry->opcode, __entry->width) 255 __entry->map_id, __entry->opcode, __entry->width),
256
257 FILTER_OTHER
238); 258);
239 259
240FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, 260FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
@@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
252 272
253 F_printk("%lx %lx %lx %d %x", 273 F_printk("%lx %lx %lx %d %x",
254 (unsigned long)__entry->phys, __entry->virt, __entry->len, 274 (unsigned long)__entry->phys, __entry->virt, __entry->len,
255 __entry->map_id, __entry->opcode) 275 __entry->map_id, __entry->opcode),
276
277 FILTER_OTHER
256); 278);
257 279
258 280
@@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch,
272 294
273 F_printk("%u:%s:%s (%u)", 295 F_printk("%u:%s:%s (%u)",
274 __entry->line, 296 __entry->line,
275 __entry->func, __entry->file, __entry->correct) 297 __entry->func, __entry->file, __entry->correct),
298
299 FILTER_OTHER
276); 300);
277 301
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 19a359d5e6d5..fee3752ae8f6 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,6 +24,11 @@ static int total_ref_count;
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event, 24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 /* The ftrace function trace is allowed only for root. */
28 if (ftrace_event_is_function(tp_event) &&
29 perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
30 return -EPERM;
31
27 /* No tracing, just counting, so no obvious leak */ 32 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) 33 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0; 34 return 0;
@@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
44 return 0; 49 return 0;
45} 50}
46 51
47static int perf_trace_event_init(struct ftrace_event_call *tp_event, 52static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
48 struct perf_event *p_event) 53 struct perf_event *p_event)
49{ 54{
50 struct hlist_head __percpu *list; 55 struct hlist_head __percpu *list;
51 int ret; 56 int ret = -ENOMEM;
52 int cpu; 57 int cpu;
53 58
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
58 p_event->tp_event = tp_event; 59 p_event->tp_event = tp_event;
59 if (tp_event->perf_refcount++ > 0) 60 if (tp_event->perf_refcount++ > 0)
60 return 0; 61 return 0;
61 62
62 ret = -ENOMEM;
63
64 list = alloc_percpu(struct hlist_head); 63 list = alloc_percpu(struct hlist_head);
65 if (!list) 64 if (!list)
66 goto fail; 65 goto fail;
@@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
83 } 82 }
84 } 83 }
85 84
86 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); 85 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
87 if (ret) 86 if (ret)
88 goto fail; 87 goto fail;
89 88
@@ -108,6 +107,69 @@ fail:
108 return ret; 107 return ret;
109} 108}
110 109
110static void perf_trace_event_unreg(struct perf_event *p_event)
111{
112 struct ftrace_event_call *tp_event = p_event->tp_event;
113 int i;
114
115 if (--tp_event->perf_refcount > 0)
116 goto out;
117
118 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
119
120 /*
121 * Ensure our callback won't be called anymore. The buffers
122 * will be freed after that.
123 */
124 tracepoint_synchronize_unregister();
125
126 free_percpu(tp_event->perf_events);
127 tp_event->perf_events = NULL;
128
129 if (!--total_ref_count) {
130 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
131 free_percpu(perf_trace_buf[i]);
132 perf_trace_buf[i] = NULL;
133 }
134 }
135out:
136 module_put(tp_event->mod);
137}
138
139static int perf_trace_event_open(struct perf_event *p_event)
140{
141 struct ftrace_event_call *tp_event = p_event->tp_event;
142 return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
143}
144
145static void perf_trace_event_close(struct perf_event *p_event)
146{
147 struct ftrace_event_call *tp_event = p_event->tp_event;
148 tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
149}
150
151static int perf_trace_event_init(struct ftrace_event_call *tp_event,
152 struct perf_event *p_event)
153{
154 int ret;
155
156 ret = perf_trace_event_perm(tp_event, p_event);
157 if (ret)
158 return ret;
159
160 ret = perf_trace_event_reg(tp_event, p_event);
161 if (ret)
162 return ret;
163
164 ret = perf_trace_event_open(p_event);
165 if (ret) {
166 perf_trace_event_unreg(p_event);
167 return ret;
168 }
169
170 return 0;
171}
172
111int perf_trace_init(struct perf_event *p_event) 173int perf_trace_init(struct perf_event *p_event)
112{ 174{
113 struct ftrace_event_call *tp_event; 175 struct ftrace_event_call *tp_event;
@@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event)
130 return ret; 192 return ret;
131} 193}
132 194
195void perf_trace_destroy(struct perf_event *p_event)
196{
197 mutex_lock(&event_mutex);
198 perf_trace_event_close(p_event);
199 perf_trace_event_unreg(p_event);
200 mutex_unlock(&event_mutex);
201}
202
133int perf_trace_add(struct perf_event *p_event, int flags) 203int perf_trace_add(struct perf_event *p_event, int flags)
134{ 204{
135 struct ftrace_event_call *tp_event = p_event->tp_event; 205 struct ftrace_event_call *tp_event = p_event->tp_event;
@@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags)
146 list = this_cpu_ptr(pcpu_list); 216 list = this_cpu_ptr(pcpu_list);
147 hlist_add_head_rcu(&p_event->hlist_entry, list); 217 hlist_add_head_rcu(&p_event->hlist_entry, list);
148 218
149 return 0; 219 return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
150} 220}
151 221
152void perf_trace_del(struct perf_event *p_event, int flags) 222void perf_trace_del(struct perf_event *p_event, int flags)
153{ 223{
154 hlist_del_rcu(&p_event->hlist_entry);
155}
156
157void perf_trace_destroy(struct perf_event *p_event)
158{
159 struct ftrace_event_call *tp_event = p_event->tp_event; 224 struct ftrace_event_call *tp_event = p_event->tp_event;
160 int i; 225 hlist_del_rcu(&p_event->hlist_entry);
161 226 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
162 mutex_lock(&event_mutex);
163 if (--tp_event->perf_refcount > 0)
164 goto out;
165
166 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167
168 /*
169 * Ensure our callback won't be called anymore. The buffers
170 * will be freed after that.
171 */
172 tracepoint_synchronize_unregister();
173
174 free_percpu(tp_event->perf_events);
175 tp_event->perf_events = NULL;
176
177 if (!--total_ref_count) {
178 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179 free_percpu(perf_trace_buf[i]);
180 perf_trace_buf[i] = NULL;
181 }
182 }
183out:
184 module_put(tp_event->mod);
185 mutex_unlock(&event_mutex);
186} 227}
187 228
188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 229__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
214 return raw_data; 255 return raw_data;
215} 256}
216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 257EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
258
259#ifdef CONFIG_FUNCTION_TRACER
260static void
261perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
262{
263 struct ftrace_entry *entry;
264 struct hlist_head *head;
265 struct pt_regs regs;
266 int rctx;
267
268#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
269 sizeof(u64)) - sizeof(u32))
270
271 BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
272
273 perf_fetch_caller_regs(&regs);
274
275 entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
276 if (!entry)
277 return;
278
279 entry->ip = ip;
280 entry->parent_ip = parent_ip;
281
282 head = this_cpu_ptr(event_function.perf_events);
283 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
284 1, &regs, head);
285
286#undef ENTRY_SIZE
287}
288
289static int perf_ftrace_function_register(struct perf_event *event)
290{
291 struct ftrace_ops *ops = &event->ftrace_ops;
292
293 ops->flags |= FTRACE_OPS_FL_CONTROL;
294 ops->func = perf_ftrace_function_call;
295 return register_ftrace_function(ops);
296}
297
298static int perf_ftrace_function_unregister(struct perf_event *event)
299{
300 struct ftrace_ops *ops = &event->ftrace_ops;
301 int ret = unregister_ftrace_function(ops);
302 ftrace_free_filter(ops);
303 return ret;
304}
305
306static void perf_ftrace_function_enable(struct perf_event *event)
307{
308 ftrace_function_local_enable(&event->ftrace_ops);
309}
310
311static void perf_ftrace_function_disable(struct perf_event *event)
312{
313 ftrace_function_local_disable(&event->ftrace_ops);
314}
315
316int perf_ftrace_event_register(struct ftrace_event_call *call,
317 enum trace_reg type, void *data)
318{
319 switch (type) {
320 case TRACE_REG_REGISTER:
321 case TRACE_REG_UNREGISTER:
322 break;
323 case TRACE_REG_PERF_REGISTER:
324 case TRACE_REG_PERF_UNREGISTER:
325 return 0;
326 case TRACE_REG_PERF_OPEN:
327 return perf_ftrace_function_register(data);
328 case TRACE_REG_PERF_CLOSE:
329 return perf_ftrace_function_unregister(data);
330 case TRACE_REG_PERF_ADD:
331 perf_ftrace_function_enable(data);
332 return 0;
333 case TRACE_REG_PERF_DEL:
334 perf_ftrace_function_disable(data);
335 return 0;
336 }
337
338 return -EINVAL;
339}
340#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c212a7f934ec..079a93ae8a9d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call)
147} 147}
148EXPORT_SYMBOL_GPL(trace_event_raw_init); 148EXPORT_SYMBOL_GPL(trace_event_raw_init);
149 149
150int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) 150int ftrace_event_reg(struct ftrace_event_call *call,
151 enum trace_reg type, void *data)
151{ 152{
152 switch (type) { 153 switch (type) {
153 case TRACE_REG_REGISTER: 154 case TRACE_REG_REGISTER:
@@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
170 call->class->perf_probe, 171 call->class->perf_probe,
171 call); 172 call);
172 return 0; 173 return 0;
174 case TRACE_REG_PERF_OPEN:
175 case TRACE_REG_PERF_CLOSE:
176 case TRACE_REG_PERF_ADD:
177 case TRACE_REG_PERF_DEL:
178 return 0;
173#endif 179#endif
174 } 180 }
175 return 0; 181 return 0;
@@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
209 tracing_stop_cmdline_record(); 215 tracing_stop_cmdline_record();
210 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; 216 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
211 } 217 }
212 call->class->reg(call, TRACE_REG_UNREGISTER); 218 call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
213 } 219 }
214 break; 220 break;
215 case 1: 221 case 1:
@@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
218 tracing_start_cmdline_record(); 224 tracing_start_cmdline_record();
219 call->flags |= TRACE_EVENT_FL_RECORDED_CMD; 225 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
220 } 226 }
221 ret = call->class->reg(call, TRACE_REG_REGISTER); 227 ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
222 if (ret) { 228 if (ret) {
223 tracing_stop_cmdline_record(); 229 tracing_stop_cmdline_record();
224 pr_info("event trace: Could not enable event " 230 pr_info("event trace: Could not enable event "
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 24aee7127451..431dba8b7542 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -81,6 +81,7 @@ enum {
81 FILT_ERR_TOO_MANY_PREDS, 81 FILT_ERR_TOO_MANY_PREDS,
82 FILT_ERR_MISSING_FIELD, 82 FILT_ERR_MISSING_FIELD,
83 FILT_ERR_INVALID_FILTER, 83 FILT_ERR_INVALID_FILTER,
84 FILT_ERR_IP_FIELD_ONLY,
84}; 85};
85 86
86static char *err_text[] = { 87static char *err_text[] = {
@@ -96,6 +97,7 @@ static char *err_text[] = {
96 "Too many terms in predicate expression", 97 "Too many terms in predicate expression",
97 "Missing field name and/or value", 98 "Missing field name and/or value",
98 "Meaningless filter expression", 99 "Meaningless filter expression",
100 "Only 'ip' field is supported for function trace",
99}; 101};
100 102
101struct opstack_op { 103struct opstack_op {
@@ -685,7 +687,7 @@ find_event_field(struct ftrace_event_call *call, char *name)
685 687
686static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) 688static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
687{ 689{
688 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); 690 stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
689 if (!stack->preds) 691 if (!stack->preds)
690 return -ENOMEM; 692 return -ENOMEM;
691 stack->index = n_preds; 693 stack->index = n_preds;
@@ -826,8 +828,7 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
826 if (filter->preds) 828 if (filter->preds)
827 __free_preds(filter); 829 __free_preds(filter);
828 830
829 filter->preds = 831 filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL);
830 kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
831 832
832 if (!filter->preds) 833 if (!filter->preds)
833 return -ENOMEM; 834 return -ENOMEM;
@@ -900,6 +901,11 @@ int filter_assign_type(const char *type)
900 return FILTER_OTHER; 901 return FILTER_OTHER;
901} 902}
902 903
904static bool is_function_field(struct ftrace_event_field *field)
905{
906 return field->filter_type == FILTER_TRACE_FN;
907}
908
903static bool is_string_field(struct ftrace_event_field *field) 909static bool is_string_field(struct ftrace_event_field *field)
904{ 910{
905 return field->filter_type == FILTER_DYN_STRING || 911 return field->filter_type == FILTER_DYN_STRING ||
@@ -987,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps,
987 fn = filter_pred_strloc; 993 fn = filter_pred_strloc;
988 else 994 else
989 fn = filter_pred_pchar; 995 fn = filter_pred_pchar;
996 } else if (is_function_field(field)) {
997 if (strcmp(field->name, "ip")) {
998 parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
999 return -EINVAL;
1000 }
990 } else { 1001 } else {
991 if (field->is_signed) 1002 if (field->is_signed)
992 ret = strict_strtoll(pred->regex.pattern, 0, &val); 1003 ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@ -1334,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
1334 1345
1335 strcpy(pred.regex.pattern, operand2); 1346 strcpy(pred.regex.pattern, operand2);
1336 pred.regex.len = strlen(pred.regex.pattern); 1347 pred.regex.len = strlen(pred.regex.pattern);
1337
1338#ifdef CONFIG_FTRACE_STARTUP_TEST
1339 pred.field = field; 1348 pred.field = field;
1340#endif
1341 return init_pred(ps, field, &pred) ? NULL : &pred; 1349 return init_pred(ps, field, &pred) ? NULL : &pred;
1342} 1350}
1343 1351
@@ -1486,7 +1494,7 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1486 children = count_leafs(preds, &preds[root->left]); 1494 children = count_leafs(preds, &preds[root->left]);
1487 children += count_leafs(preds, &preds[root->right]); 1495 children += count_leafs(preds, &preds[root->right]);
1488 1496
1489 root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL); 1497 root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL);
1490 if (!root->ops) 1498 if (!root->ops)
1491 return -ENOMEM; 1499 return -ENOMEM;
1492 1500
@@ -1950,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event)
1950 __free_filter(filter); 1958 __free_filter(filter);
1951} 1959}
1952 1960
1961struct function_filter_data {
1962 struct ftrace_ops *ops;
1963 int first_filter;
1964 int first_notrace;
1965};
1966
1967#ifdef CONFIG_FUNCTION_TRACER
1968static char **
1969ftrace_function_filter_re(char *buf, int len, int *count)
1970{
1971 char *str, *sep, **re;
1972
1973 str = kstrndup(buf, len, GFP_KERNEL);
1974 if (!str)
1975 return NULL;
1976
1977 /*
1978 * The argv_split function takes white space
1979 * as a separator, so convert ',' into spaces.
1980 */
1981 while ((sep = strchr(str, ',')))
1982 *sep = ' ';
1983
1984 re = argv_split(GFP_KERNEL, str, count);
1985 kfree(str);
1986 return re;
1987}
1988
1989static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
1990 int reset, char *re, int len)
1991{
1992 int ret;
1993
1994 if (filter)
1995 ret = ftrace_set_filter(ops, re, len, reset);
1996 else
1997 ret = ftrace_set_notrace(ops, re, len, reset);
1998
1999 return ret;
2000}
2001
2002static int __ftrace_function_set_filter(int filter, char *buf, int len,
2003 struct function_filter_data *data)
2004{
2005 int i, re_cnt, ret;
2006 int *reset;
2007 char **re;
2008
2009 reset = filter ? &data->first_filter : &data->first_notrace;
2010
2011 /*
2012 * The 'ip' field could have multiple filters set, separated
2013 * either by space or comma. We first cut the filter and apply
2014 * all pieces separatelly.
2015 */
2016 re = ftrace_function_filter_re(buf, len, &re_cnt);
2017 if (!re)
2018 return -EINVAL;
2019
2020 for (i = 0; i < re_cnt; i++) {
2021 ret = ftrace_function_set_regexp(data->ops, filter, *reset,
2022 re[i], strlen(re[i]));
2023 if (ret)
2024 break;
2025
2026 if (*reset)
2027 *reset = 0;
2028 }
2029
2030 argv_free(re);
2031 return ret;
2032}
2033
2034static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
2035{
2036 struct ftrace_event_field *field = pred->field;
2037
2038 if (leaf) {
2039 /*
2040 * Check the leaf predicate for function trace, verify:
2041 * - only '==' and '!=' is used
2042 * - the 'ip' field is used
2043 */
2044 if ((pred->op != OP_EQ) && (pred->op != OP_NE))
2045 return -EINVAL;
2046
2047 if (strcmp(field->name, "ip"))
2048 return -EINVAL;
2049 } else {
2050 /*
2051 * Check the non leaf predicate for function trace, verify:
2052 * - only '||' is used
2053 */
2054 if (pred->op != OP_OR)
2055 return -EINVAL;
2056 }
2057
2058 return 0;
2059}
2060
2061static int ftrace_function_set_filter_cb(enum move_type move,
2062 struct filter_pred *pred,
2063 int *err, void *data)
2064{
2065 /* Checking the node is valid for function trace. */
2066 if ((move != MOVE_DOWN) ||
2067 (pred->left != FILTER_PRED_INVALID)) {
2068 *err = ftrace_function_check_pred(pred, 0);
2069 } else {
2070 *err = ftrace_function_check_pred(pred, 1);
2071 if (*err)
2072 return WALK_PRED_ABORT;
2073
2074 *err = __ftrace_function_set_filter(pred->op == OP_EQ,
2075 pred->regex.pattern,
2076 pred->regex.len,
2077 data);
2078 }
2079
2080 return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
2081}
2082
2083static int ftrace_function_set_filter(struct perf_event *event,
2084 struct event_filter *filter)
2085{
2086 struct function_filter_data data = {
2087 .first_filter = 1,
2088 .first_notrace = 1,
2089 .ops = &event->ftrace_ops,
2090 };
2091
2092 return walk_pred_tree(filter->preds, filter->root,
2093 ftrace_function_set_filter_cb, &data);
2094}
2095#else
2096static int ftrace_function_set_filter(struct perf_event *event,
2097 struct event_filter *filter)
2098{
2099 return -ENODEV;
2100}
2101#endif /* CONFIG_FUNCTION_TRACER */
2102
1953int ftrace_profile_set_filter(struct perf_event *event, int event_id, 2103int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1954 char *filter_str) 2104 char *filter_str)
1955{ 2105{
@@ -1970,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1970 goto out_unlock; 2120 goto out_unlock;
1971 2121
1972 err = create_filter(call, filter_str, false, &filter); 2122 err = create_filter(call, filter_str, false, &filter);
1973 if (!err) 2123 if (err)
1974 event->filter = filter; 2124 goto free_filter;
2125
2126 if (ftrace_event_is_function(call))
2127 err = ftrace_function_set_filter(event, filter);
1975 else 2128 else
2129 event->filter = filter;
2130
2131free_filter:
2132 if (err || ftrace_event_is_function(call))
1976 __free_filter(filter); 2133 __free_filter(filter);
1977 2134
1978out_unlock: 2135out_unlock:
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index bbeec31e0ae3..7b46c9bd22ae 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -18,6 +18,16 @@
18#undef TRACE_SYSTEM 18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM ftrace 19#define TRACE_SYSTEM ftrace
20 20
21/*
22 * The FTRACE_ENTRY_REG macro allows ftrace entry to define register
23 * function and thus become accesible via perf.
24 */
25#undef FTRACE_ENTRY_REG
26#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
27 filter, regfn) \
28 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
29 filter)
30
21/* not needed for this file */ 31/* not needed for this file */
22#undef __field_struct 32#undef __field_struct
23#define __field_struct(type, item) 33#define __field_struct(type, item)
@@ -44,21 +54,22 @@
44#define F_printk(fmt, args...) fmt, args 54#define F_printk(fmt, args...) fmt, args
45 55
46#undef FTRACE_ENTRY 56#undef FTRACE_ENTRY
47#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 57#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
48struct ____ftrace_##name { \ 58struct ____ftrace_##name { \
49 tstruct \ 59 tstruct \
50}; \ 60}; \
51static void __always_unused ____ftrace_check_##name(void) \ 61static void __always_unused ____ftrace_check_##name(void) \
52{ \ 62{ \
53 struct ____ftrace_##name *__entry = NULL; \ 63 struct ____ftrace_##name *__entry = NULL; \
54 \ 64 \
55 /* force compile-time check on F_printk() */ \ 65 /* force compile-time check on F_printk() */ \
56 printk(print); \ 66 printk(print); \
57} 67}
58 68
59#undef FTRACE_ENTRY_DUP 69#undef FTRACE_ENTRY_DUP
60#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ 70#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \
61 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) 71 FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
72 filter)
62 73
63#include "trace_entries.h" 74#include "trace_entries.h"
64 75
@@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void) \
67 ret = trace_define_field(event_call, #type, #item, \ 78 ret = trace_define_field(event_call, #type, #item, \
68 offsetof(typeof(field), item), \ 79 offsetof(typeof(field), item), \
69 sizeof(field.item), \ 80 sizeof(field.item), \
70 is_signed_type(type), FILTER_OTHER); \ 81 is_signed_type(type), filter_type); \
71 if (ret) \ 82 if (ret) \
72 return ret; 83 return ret;
73 84
@@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void) \
77 offsetof(typeof(field), \ 88 offsetof(typeof(field), \
78 container.item), \ 89 container.item), \
79 sizeof(field.container.item), \ 90 sizeof(field.container.item), \
80 is_signed_type(type), FILTER_OTHER); \ 91 is_signed_type(type), filter_type); \
81 if (ret) \ 92 if (ret) \
82 return ret; 93 return ret;
83 94
@@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void) \
91 ret = trace_define_field(event_call, event_storage, #item, \ 102 ret = trace_define_field(event_call, event_storage, #item, \
92 offsetof(typeof(field), item), \ 103 offsetof(typeof(field), item), \
93 sizeof(field.item), \ 104 sizeof(field.item), \
94 is_signed_type(type), FILTER_OTHER); \ 105 is_signed_type(type), filter_type); \
95 mutex_unlock(&event_storage_mutex); \ 106 mutex_unlock(&event_storage_mutex); \
96 if (ret) \ 107 if (ret) \
97 return ret; \ 108 return ret; \
@@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void) \
104 offsetof(typeof(field), \ 115 offsetof(typeof(field), \
105 container.item), \ 116 container.item), \
106 sizeof(field.container.item), \ 117 sizeof(field.container.item), \
107 is_signed_type(type), FILTER_OTHER); \ 118 is_signed_type(type), filter_type); \
108 if (ret) \ 119 if (ret) \
109 return ret; 120 return ret;
110 121
@@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \
112#define __dynamic_array(type, item) \ 123#define __dynamic_array(type, item) \
113 ret = trace_define_field(event_call, #type, #item, \ 124 ret = trace_define_field(event_call, #type, #item, \
114 offsetof(typeof(field), item), \ 125 offsetof(typeof(field), item), \
115 0, is_signed_type(type), FILTER_OTHER);\ 126 0, is_signed_type(type), filter_type);\
116 if (ret) \ 127 if (ret) \
117 return ret; 128 return ret;
118 129
119#undef FTRACE_ENTRY 130#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 131#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
121int \ 132int \
122ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ 133ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
123{ \ 134{ \
124 struct struct_name field; \ 135 struct struct_name field; \
125 int ret; \ 136 int ret; \
137 int filter_type = filter; \
126 \ 138 \
127 tstruct; \ 139 tstruct; \
128 \ 140 \
@@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
152#undef F_printk 164#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args) 165#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154 166
155#undef FTRACE_ENTRY 167#undef FTRACE_ENTRY_REG
156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ 168#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
169 regfn) \
157 \ 170 \
158struct ftrace_event_class event_class_ftrace_##call = { \ 171struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \ 172 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \ 173 .define_fields = ftrace_define_fields_##call, \
161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 174 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
175 .reg = regfn, \
162}; \ 176}; \
163 \ 177 \
164struct ftrace_event_call __used event_##call = { \ 178struct ftrace_event_call __used event_##call = { \
@@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = { \
170struct ftrace_event_call __used \ 184struct ftrace_event_call __used \
171__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; 185__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
172 186
187#undef FTRACE_ENTRY
188#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
189 FTRACE_ENTRY_REG(call, struct_name, etype, \
190 PARAMS(tstruct), PARAMS(print), filter, NULL)
191
192int ftrace_event_is_function(struct ftrace_event_call *call)
193{
194 return call == &event_function;
195}
196
173#include "trace_entries.h" 197#include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 00d527c945a4..580a05ec926b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1892#endif /* CONFIG_PERF_EVENTS */ 1892#endif /* CONFIG_PERF_EVENTS */
1893 1893
1894static __kprobes 1894static __kprobes
1895int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) 1895int kprobe_register(struct ftrace_event_call *event,
1896 enum trace_reg type, void *data)
1896{ 1897{
1897 struct trace_probe *tp = (struct trace_probe *)event->data; 1898 struct trace_probe *tp = (struct trace_probe *)event->data;
1898 1899
@@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1909 case TRACE_REG_PERF_UNREGISTER: 1910 case TRACE_REG_PERF_UNREGISTER:
1910 disable_trace_probe(tp, TP_FLAG_PROFILE); 1911 disable_trace_probe(tp, TP_FLAG_PROFILE);
1911 return 0; 1912 return 0;
1913 case TRACE_REG_PERF_OPEN:
1914 case TRACE_REG_PERF_CLOSE:
1915 case TRACE_REG_PERF_ADD:
1916 case TRACE_REG_PERF_DEL:
1917 return 0;
1912#endif 1918#endif
1913 } 1919 }
1914 return 0; 1920 return 0;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 0d6ff3555942..c5a01873567d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
300 unsigned long mask; 300 unsigned long mask;
301 const char *str; 301 const char *str;
302 const char *ret = p->buffer + p->len; 302 const char *ret = p->buffer + p->len;
303 int i; 303 int i, first = 1;
304 304
305 for (i = 0; flag_array[i].name && flags; i++) { 305 for (i = 0; flag_array[i].name && flags; i++) {
306 306
@@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
310 310
311 str = flag_array[i].name; 311 str = flag_array[i].name;
312 flags &= ~mask; 312 flags &= ~mask;
313 if (p->len && delim) 313 if (!first && delim)
314 trace_seq_puts(p, delim); 314 trace_seq_puts(p, delim);
315 else
316 first = 0;
315 trace_seq_puts(p, str); 317 trace_seq_puts(p, str);
316 } 318 }
317 319
318 /* check for left over flags */ 320 /* check for left over flags */
319 if (flags) { 321 if (flags) {
320 if (p->len && delim) 322 if (!first && delim)
321 trace_seq_puts(p, delim); 323 trace_seq_puts(p, delim);
322 trace_seq_printf(p, "0x%lx", flags); 324 trace_seq_printf(p, "0x%lx", flags);
323 } 325 }
@@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
344 break; 346 break;
345 } 347 }
346 348
347 if (!p->len) 349 if (ret == (const char *)(p->buffer + p->len))
348 trace_seq_printf(p, "0x%lx", val); 350 trace_seq_printf(p, "0x%lx", val);
349 351
350 trace_seq_putc(p, 0); 352 trace_seq_putc(p, 0);
@@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
370 break; 372 break;
371 } 373 }
372 374
373 if (!p->len) 375 if (ret == (const char *)(p->buffer + p->len))
374 trace_seq_printf(p, "0x%llx", val); 376 trace_seq_printf(p, "0x%llx", val);
375 377
376 trace_seq_putc(p, 0); 378 trace_seq_putc(p, 0);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cb654542c1a1..96fc73369099 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
17static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 17static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
18 18
19static int syscall_enter_register(struct ftrace_event_call *event, 19static int syscall_enter_register(struct ftrace_event_call *event,
20 enum trace_reg type); 20 enum trace_reg type, void *data);
21static int syscall_exit_register(struct ftrace_event_call *event, 21static int syscall_exit_register(struct ftrace_event_call *event,
22 enum trace_reg type); 22 enum trace_reg type, void *data);
23 23
24static int syscall_enter_define_fields(struct ftrace_event_call *call); 24static int syscall_enter_define_fields(struct ftrace_event_call *call);
25static int syscall_exit_define_fields(struct ftrace_event_call *call); 25static int syscall_exit_define_fields(struct ftrace_event_call *call);
@@ -468,8 +468,8 @@ int __init init_ftrace_syscalls(void)
468 unsigned long addr; 468 unsigned long addr;
469 int i; 469 int i;
470 470
471 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * 471 syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
472 NR_syscalls, GFP_KERNEL); 472 GFP_KERNEL);
473 if (!syscalls_metadata) { 473 if (!syscalls_metadata) {
474 WARN_ON(1); 474 WARN_ON(1);
475 return -ENOMEM; 475 return -ENOMEM;
@@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
649#endif /* CONFIG_PERF_EVENTS */ 649#endif /* CONFIG_PERF_EVENTS */
650 650
651static int syscall_enter_register(struct ftrace_event_call *event, 651static int syscall_enter_register(struct ftrace_event_call *event,
652 enum trace_reg type) 652 enum trace_reg type, void *data)
653{ 653{
654 switch (type) { 654 switch (type) {
655 case TRACE_REG_REGISTER: 655 case TRACE_REG_REGISTER:
@@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event,
664 case TRACE_REG_PERF_UNREGISTER: 664 case TRACE_REG_PERF_UNREGISTER:
665 perf_sysenter_disable(event); 665 perf_sysenter_disable(event);
666 return 0; 666 return 0;
667 case TRACE_REG_PERF_OPEN:
668 case TRACE_REG_PERF_CLOSE:
669 case TRACE_REG_PERF_ADD:
670 case TRACE_REG_PERF_DEL:
671 return 0;
667#endif 672#endif
668 } 673 }
669 return 0; 674 return 0;
670} 675}
671 676
672static int syscall_exit_register(struct ftrace_event_call *event, 677static int syscall_exit_register(struct ftrace_event_call *event,
673 enum trace_reg type) 678 enum trace_reg type, void *data)
674{ 679{
675 switch (type) { 680 switch (type) {
676 case TRACE_REG_REGISTER: 681 case TRACE_REG_REGISTER:
@@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event,
685 case TRACE_REG_PERF_UNREGISTER: 690 case TRACE_REG_PERF_UNREGISTER:
686 perf_sysexit_disable(event); 691 perf_sysexit_disable(event);
687 return 0; 692 return 0;
693 case TRACE_REG_PERF_OPEN:
694 case TRACE_REG_PERF_CLOSE:
695 case TRACE_REG_PERF_ADD:
696 case TRACE_REG_PERF_DEL:
697 return 0;
688#endif 698#endif
689 } 699 }
690 return 0; 700 return 0;
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f1539decd99d..d96ba22dabfa 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,7 +25,7 @@
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/jump_label.h> 28#include <linux/static_key.h>
29 29
30extern struct tracepoint * const __start___tracepoints_ptrs[]; 30extern struct tracepoint * const __start___tracepoints_ptrs[];
31extern struct tracepoint * const __stop___tracepoints_ptrs[]; 31extern struct tracepoint * const __stop___tracepoints_ptrs[];
@@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
256{ 256{
257 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 257 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
258 258
259 if (elem->regfunc && !jump_label_enabled(&elem->key) && active) 259 if (elem->regfunc && !static_key_enabled(&elem->key) && active)
260 elem->regfunc(); 260 elem->regfunc();
261 else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) 261 else if (elem->unregfunc && static_key_enabled(&elem->key) && !active)
262 elem->unregfunc(); 262 elem->unregfunc();
263 263
264 /* 264 /*
@@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
269 * is used. 269 * is used.
270 */ 270 */
271 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 271 rcu_assign_pointer(elem->funcs, (*entry)->funcs);
272 if (active && !jump_label_enabled(&elem->key)) 272 if (active && !static_key_enabled(&elem->key))
273 jump_label_inc(&elem->key); 273 static_key_slow_inc(&elem->key);
274 else if (!active && jump_label_enabled(&elem->key)) 274 else if (!active && static_key_enabled(&elem->key))
275 jump_label_dec(&elem->key); 275 static_key_slow_dec(&elem->key);
276} 276}
277 277
278/* 278/*
@@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
283 */ 283 */
284static void disable_tracepoint(struct tracepoint *elem) 284static void disable_tracepoint(struct tracepoint *elem)
285{ 285{
286 if (elem->unregfunc && jump_label_enabled(&elem->key)) 286 if (elem->unregfunc && static_key_enabled(&elem->key))
287 elem->unregfunc(); 287 elem->unregfunc();
288 288
289 if (jump_label_enabled(&elem->key)) 289 if (static_key_enabled(&elem->key))
290 jump_label_dec(&elem->key); 290 static_key_slow_dec(&elem->key);
291 rcu_assign_pointer(elem->funcs, NULL); 291 rcu_assign_pointer(elem->funcs, NULL);
292} 292}
293 293
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d117262deba3..14bc092fb12c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -3,12 +3,9 @@
3 * 3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. 4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 * 5 *
6 * this code detects hard lockups: incidents in where on a CPU 6 * Note: Most of this code is borrowed heavily from the original softlockup
7 * the kernel does not respond to anything except NMI. 7 * detector, so thanks to Ingo for the initial implementation.
8 * 8 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well. 9 * to those contributors as well.
13 */ 10 */
14 11
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void)
117{ 114{
118 /* 115 /*
119 * convert watchdog_thresh from seconds to ns 116 * convert watchdog_thresh from seconds to ns
120 * the divide by 5 is to give hrtimer 5 chances to 117 * the divide by 5 is to give hrtimer several chances (two
121 * increment before the hardlockup detector generates 118 * or three with the current relation between the soft
122 * a warning 119 * and hard thresholds) to increment before the
120 * hardlockup detector generates a warning
123 */ 121 */
124 return get_softlockup_thresh() * (NSEC_PER_SEC / 5); 122 return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
125} 123}
@@ -336,9 +334,11 @@ static int watchdog(void *unused)
336 334
337 set_current_state(TASK_INTERRUPTIBLE); 335 set_current_state(TASK_INTERRUPTIBLE);
338 /* 336 /*
339 * Run briefly once per second to reset the softlockup timestamp. 337 * Run briefly (kicked by the hrtimer callback function) once every
340 * If this gets delayed for more than 60 seconds then the 338 * get_sample_period() seconds (4 seconds by default) to reset the
341 * debug-printout triggers in watchdog_timer_fn(). 339 * softlockup timestamp. If this gets delayed for more than
340 * 2*watchdog_thresh seconds then the debug-printout triggers in
341 * watchdog_timer_fn().
342 */ 342 */
343 while (!kthread_should_stop()) { 343 while (!kthread_should_stop()) {
344 __touch_watchdog(); 344 __touch_watchdog();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d27a2aa3e815..05037dc9bde7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR
166 hard and soft lockups. 166 hard and soft lockups.
167 167
168 Softlockups are bugs that cause the kernel to loop in kernel 168 Softlockups are bugs that cause the kernel to loop in kernel
169 mode for more than 60 seconds, without giving other tasks a 169 mode for more than 20 seconds, without giving other tasks a
170 chance to run. The current stack trace is displayed upon 170 chance to run. The current stack trace is displayed upon
171 detection and the system will stay locked up. 171 detection and the system will stay locked up.
172 172
173 Hardlockups are bugs that cause the CPU to loop in kernel mode 173 Hardlockups are bugs that cause the CPU to loop in kernel mode
174 for more than 60 seconds, without letting other interrupts have a 174 for more than 10 seconds, without letting other interrupts have a
175 chance to run. The current stack trace is displayed upon detection 175 chance to run. The current stack trace is displayed upon detection
176 and the system will stay locked up. 176 and the system will stay locked up.
177 177
178 The overhead should be minimal. A periodic hrtimer runs to 178 The overhead should be minimal. A periodic hrtimer runs to
179 generate interrupts and kick the watchdog task every 10-12 seconds. 179 generate interrupts and kick the watchdog task every 4 seconds.
180 An NMI is generated every 60 seconds or so to check for hardlockups. 180 An NMI is generated every 10 seconds or so to check for hardlockups.
181
182 The frequency of hrtimer and NMI events and the soft and hard lockup
183 thresholds can be controlled through the sysctl watchdog_thresh.
181 184
182config HARDLOCKUP_DETECTOR 185config HARDLOCKUP_DETECTOR
183 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ 186 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
189 help 192 help
190 Say Y here to enable the kernel to panic on "hard lockups", 193 Say Y here to enable the kernel to panic on "hard lockups",
191 which are bugs that cause the kernel to loop in kernel 194 which are bugs that cause the kernel to loop in kernel
192 mode with interrupts disabled for more than 60 seconds. 195 mode with interrupts disabled for more than 10 seconds (configurable
196 using the watchdog_thresh sysctl).
193 197
194 Say N if unsure. 198 Say N if unsure.
195 199
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
206 help 210 help
207 Say Y here to enable the kernel to panic on "soft lockups", 211 Say Y here to enable the kernel to panic on "soft lockups",
208 which are bugs that cause the kernel to loop in kernel 212 which are bugs that cause the kernel to loop in kernel
209 mode for more than 60 seconds, without giving other tasks a 213 mode for more than 20 seconds (configurable using the watchdog_thresh
210 chance to run. 214 sysctl), without giving other tasks a chance to run.
211 215
212 The panic can be used in combination with panic_timeout, 216 The panic can be used in combination with panic_timeout,
213 to cause the system to reboot automatically after a 217 to cause the system to reboot automatically after a
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ca32f6b3105..6982bfd6a781 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -134,7 +134,7 @@
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h> 135#include <linux/cpu_rmap.h>
136#include <linux/net_tstamp.h> 136#include <linux/net_tstamp.h>
137#include <linux/jump_label.h> 137#include <linux/static_key.h>
138#include <net/flow_keys.h> 138#include <net/flow_keys.h>
139 139
140#include "net-sysfs.h" 140#include "net-sysfs.h"
@@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1441} 1441}
1442EXPORT_SYMBOL(call_netdevice_notifiers); 1442EXPORT_SYMBOL(call_netdevice_notifiers);
1443 1443
1444static struct jump_label_key netstamp_needed __read_mostly; 1444static struct static_key netstamp_needed __read_mostly;
1445#ifdef HAVE_JUMP_LABEL 1445#ifdef HAVE_JUMP_LABEL
1446/* We are not allowed to call jump_label_dec() from irq context 1446/* We are not allowed to call static_key_slow_dec() from irq context
1447 * If net_disable_timestamp() is called from irq context, defer the 1447 * If net_disable_timestamp() is called from irq context, defer the
1448 * jump_label_dec() calls. 1448 * static_key_slow_dec() calls.
1449 */ 1449 */
1450static atomic_t netstamp_needed_deferred; 1450static atomic_t netstamp_needed_deferred;
1451#endif 1451#endif
@@ -1457,12 +1457,12 @@ void net_enable_timestamp(void)
1457 1457
1458 if (deferred) { 1458 if (deferred) {
1459 while (--deferred) 1459 while (--deferred)
1460 jump_label_dec(&netstamp_needed); 1460 static_key_slow_dec(&netstamp_needed);
1461 return; 1461 return;
1462 } 1462 }
1463#endif 1463#endif
1464 WARN_ON(in_interrupt()); 1464 WARN_ON(in_interrupt());
1465 jump_label_inc(&netstamp_needed); 1465 static_key_slow_inc(&netstamp_needed);
1466} 1466}
1467EXPORT_SYMBOL(net_enable_timestamp); 1467EXPORT_SYMBOL(net_enable_timestamp);
1468 1468
@@ -1474,19 +1474,19 @@ void net_disable_timestamp(void)
1474 return; 1474 return;
1475 } 1475 }
1476#endif 1476#endif
1477 jump_label_dec(&netstamp_needed); 1477 static_key_slow_dec(&netstamp_needed);
1478} 1478}
1479EXPORT_SYMBOL(net_disable_timestamp); 1479EXPORT_SYMBOL(net_disable_timestamp);
1480 1480
1481static inline void net_timestamp_set(struct sk_buff *skb) 1481static inline void net_timestamp_set(struct sk_buff *skb)
1482{ 1482{
1483 skb->tstamp.tv64 = 0; 1483 skb->tstamp.tv64 = 0;
1484 if (static_branch(&netstamp_needed)) 1484 if (static_key_false(&netstamp_needed))
1485 __net_timestamp(skb); 1485 __net_timestamp(skb);
1486} 1486}
1487 1487
1488#define net_timestamp_check(COND, SKB) \ 1488#define net_timestamp_check(COND, SKB) \
1489 if (static_branch(&netstamp_needed)) { \ 1489 if (static_key_false(&netstamp_needed)) { \
1490 if ((COND) && !(SKB)->tstamp.tv64) \ 1490 if ((COND) && !(SKB)->tstamp.tv64) \
1491 __net_timestamp(SKB); \ 1491 __net_timestamp(SKB); \
1492 } \ 1492 } \
@@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2660struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2660struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2661EXPORT_SYMBOL(rps_sock_flow_table); 2661EXPORT_SYMBOL(rps_sock_flow_table);
2662 2662
2663struct jump_label_key rps_needed __read_mostly; 2663struct static_key rps_needed __read_mostly;
2664 2664
2665static struct rps_dev_flow * 2665static struct rps_dev_flow *
2666set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2666set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb)
2945 2945
2946 trace_netif_rx(skb); 2946 trace_netif_rx(skb);
2947#ifdef CONFIG_RPS 2947#ifdef CONFIG_RPS
2948 if (static_branch(&rps_needed)) { 2948 if (static_key_false(&rps_needed)) {
2949 struct rps_dev_flow voidflow, *rflow = &voidflow; 2949 struct rps_dev_flow voidflow, *rflow = &voidflow;
2950 int cpu; 2950 int cpu;
2951 2951
@@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb)
3309 return NET_RX_SUCCESS; 3309 return NET_RX_SUCCESS;
3310 3310
3311#ifdef CONFIG_RPS 3311#ifdef CONFIG_RPS
3312 if (static_branch(&rps_needed)) { 3312 if (static_key_false(&rps_needed)) {
3313 struct rps_dev_flow voidflow, *rflow = &voidflow; 3313 struct rps_dev_flow voidflow, *rflow = &voidflow;
3314 int cpu, ret; 3314 int cpu, ret;
3315 3315
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a1727cda03d7..495586232aa1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
608 spin_unlock(&rps_map_lock); 608 spin_unlock(&rps_map_lock);
609 609
610 if (map) 610 if (map)
611 jump_label_inc(&rps_needed); 611 static_key_slow_inc(&rps_needed);
612 if (old_map) { 612 if (old_map) {
613 kfree_rcu(old_map, rcu); 613 kfree_rcu(old_map, rcu);
614 jump_label_dec(&rps_needed); 614 static_key_slow_dec(&rps_needed);
615 } 615 }
616 free_cpumask_var(mask); 616 free_cpumask_var(mask);
617 return len; 617 return len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 02f8dfe320b7..95aff9c7419b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,7 +111,7 @@
111#include <linux/init.h> 111#include <linux/init.h>
112#include <linux/highmem.h> 112#include <linux/highmem.h>
113#include <linux/user_namespace.h> 113#include <linux/user_namespace.h>
114#include <linux/jump_label.h> 114#include <linux/static_key.h>
115#include <linux/memcontrol.h> 115#include <linux/memcontrol.h>
116 116
117#include <asm/uaccess.h> 117#include <asm/uaccess.h>
@@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
184static struct lock_class_key af_family_keys[AF_MAX]; 184static struct lock_class_key af_family_keys[AF_MAX];
185static struct lock_class_key af_family_slock_keys[AF_MAX]; 185static struct lock_class_key af_family_slock_keys[AF_MAX];
186 186
187struct jump_label_key memcg_socket_limit_enabled; 187struct static_key memcg_socket_limit_enabled;
188EXPORT_SYMBOL(memcg_socket_limit_enabled); 188EXPORT_SYMBOL(memcg_socket_limit_enabled);
189 189
190/* 190/*
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d05559d4d9cd..0c2850874254 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
69 if (sock_table != orig_sock_table) { 69 if (sock_table != orig_sock_table) {
70 rcu_assign_pointer(rps_sock_flow_table, sock_table); 70 rcu_assign_pointer(rps_sock_flow_table, sock_table);
71 if (sock_table) 71 if (sock_table)
72 jump_label_inc(&rps_needed); 72 static_key_slow_inc(&rps_needed);
73 if (orig_sock_table) { 73 if (orig_sock_table) {
74 jump_label_dec(&rps_needed); 74 static_key_slow_dec(&rps_needed);
75 synchronize_rcu(); 75 synchronize_rcu();
76 vfree(orig_sock_table); 76 vfree(orig_sock_table);
77 } 77 }
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 49978788a9dc..602fb305365f 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
111 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); 111 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
112 112
113 if (val != RESOURCE_MAX) 113 if (val != RESOURCE_MAX)
114 jump_label_dec(&memcg_socket_limit_enabled); 114 static_key_slow_dec(&memcg_socket_limit_enabled);
115} 115}
116EXPORT_SYMBOL(tcp_destroy_cgroup); 116EXPORT_SYMBOL(tcp_destroy_cgroup);
117 117
@@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
143 net->ipv4.sysctl_tcp_mem[i]); 143 net->ipv4.sysctl_tcp_mem[i]);
144 144
145 if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) 145 if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
146 jump_label_dec(&memcg_socket_limit_enabled); 146 static_key_slow_dec(&memcg_socket_limit_enabled);
147 else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) 147 else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
148 jump_label_inc(&memcg_socket_limit_enabled); 148 static_key_slow_inc(&memcg_socket_limit_enabled);
149 149
150 return 0; 150 return 0;
151} 151}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b4e8ff05b301..e1b7e051332e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
56EXPORT_SYMBOL(nf_hooks); 56EXPORT_SYMBOL(nf_hooks);
57 57
58#if defined(CONFIG_JUMP_LABEL) 58#if defined(CONFIG_JUMP_LABEL)
59struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 59struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
60EXPORT_SYMBOL(nf_hooks_needed); 60EXPORT_SYMBOL(nf_hooks_needed);
61#endif 61#endif
62 62
@@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg)
77 list_add_rcu(&reg->list, elem->list.prev); 77 list_add_rcu(&reg->list, elem->list.prev);
78 mutex_unlock(&nf_hook_mutex); 78 mutex_unlock(&nf_hook_mutex);
79#if defined(CONFIG_JUMP_LABEL) 79#if defined(CONFIG_JUMP_LABEL)
80 jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); 80 static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
81#endif 81#endif
82 return 0; 82 return 0;
83} 83}
@@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
89 list_del_rcu(&reg->list); 89 list_del_rcu(&reg->list);
90 mutex_unlock(&nf_hook_mutex); 90 mutex_unlock(&nf_hook_mutex);
91#if defined(CONFIG_JUMP_LABEL) 91#if defined(CONFIG_JUMP_LABEL)
92 jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); 92 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
93#endif 93#endif
94 synchronize_net(); 94 synchronize_net();
95} 95}
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 4626a398836a..ca600e09c8d4 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,3 +1,10 @@
1OUTPUT := ./
2ifeq ("$(origin O)", "command line")
3 ifneq ($(O),)
4 OUTPUT := $(O)/
5 endif
6endif
7
1MAN1_TXT= \ 8MAN1_TXT= \
2 $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ 9 $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
3 $(wildcard perf-*.txt)) \ 10 $(wildcard perf-*.txt)) \
@@ -6,10 +13,11 @@ MAN5_TXT=
6MAN7_TXT= 13MAN7_TXT=
7 14
8MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) 15MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
9MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) 16_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
10MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) 17_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
11 18
12DOC_HTML=$(MAN_HTML) 19MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
20MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
13 21
14ARTICLES = 22ARTICLES =
15# with their own formatting rules. 23# with their own formatting rules.
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica
18SP_ARTICLES += $(API_DOCS) 26SP_ARTICLES += $(API_DOCS)
19SP_ARTICLES += technical/api-index 27SP_ARTICLES += technical/api-index
20 28
21DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) 29_DOC_HTML = $(_MAN_HTML)
30_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
31DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
22 32
23DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) 33_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
24DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) 34_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
25DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) 35_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
36
37DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
38DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
39DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
26 40
27# Make the path relative to DESTDIR, not prefix 41# Make the path relative to DESTDIR, not prefix
28ifndef DESTDIR 42ifndef DESTDIR
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1)
150man5: $(DOC_MAN5) 164man5: $(DOC_MAN5)
151man7: $(DOC_MAN7) 165man7: $(DOC_MAN7)
152 166
153info: perf.info perfman.info 167info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
154 168
155pdf: user-manual.pdf 169pdf: $(OUTPUT)user-manual.pdf
156 170
157install: install-man 171install: install-man
158 172
@@ -166,7 +180,7 @@ install-man: man
166 180
167install-info: info 181install-info: info
168 $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) 182 $(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
169 $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) 183 $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
170 if test -r $(DESTDIR)$(infodir)/dir; then \ 184 if test -r $(DESTDIR)$(infodir)/dir; then \
171 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ 185 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
172 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ 186 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
@@ -176,7 +190,7 @@ install-info: info
176 190
177install-pdf: pdf 191install-pdf: pdf
178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) 192 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) 193 $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
180 194
181#install-html: html 195#install-html: html
182# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) 196# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
@@ -189,14 +203,14 @@ install-pdf: pdf
189# 203#
190# Determine "include::" file references in asciidoc files. 204# Determine "include::" file references in asciidoc files.
191# 205#
192doc.dep : $(wildcard *.txt) build-docdep.perl 206$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
193 $(QUIET_GEN)$(RM) $@+ $@ && \ 207 $(QUIET_GEN)$(RM) $@+ $@ && \
194 $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ 208 $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
195 mv $@+ $@ 209 mv $@+ $@
196 210
197-include doc.dep 211-include $(OUPTUT)doc.dep
198 212
199cmds_txt = cmds-ancillaryinterrogators.txt \ 213_cmds_txt = cmds-ancillaryinterrogators.txt \
200 cmds-ancillarymanipulators.txt \ 214 cmds-ancillarymanipulators.txt \
201 cmds-mainporcelain.txt \ 215 cmds-mainporcelain.txt \
202 cmds-plumbinginterrogators.txt \ 216 cmds-plumbinginterrogators.txt \
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \
205 cmds-synchelpers.txt \ 219 cmds-synchelpers.txt \
206 cmds-purehelpers.txt \ 220 cmds-purehelpers.txt \
207 cmds-foreignscminterface.txt 221 cmds-foreignscminterface.txt
222cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
208 223
209$(cmds_txt): cmd-list.made 224$(cmds_txt): $(OUTPUT)cmd-list.made
210 225
211cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) 226$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
212 $(QUIET_GEN)$(RM) $@ && \ 227 $(QUIET_GEN)$(RM) $@ && \
213 $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ 228 $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
214 date >$@ 229 date >$@
215 230
216clean: 231clean:
217 $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 232 $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
218 $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info 233 $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
219 $(RM) howto-index.txt howto/*.html doc.dep 234 $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
220 $(RM) technical/api-*.html technical/api-index.txt 235 $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
221 $(RM) $(cmds_txt) *.made 236 $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
222 237 $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
223$(MAN_HTML): %.html : %.txt 238 $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
239 $(RM) $(cmds_txt) $(OUTPUT)*.made
240
241$(MAN_HTML): $(OUTPUT)%.html : %.txt
224 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 242 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
225 $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ 243 $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
226 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 244 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
227 mv $@+ $@ 245 mv $@+ $@
228 246
229%.1 %.5 %.7 : %.xml 247$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
230 $(QUIET_XMLTO)$(RM) $@ && \ 248 $(QUIET_XMLTO)$(RM) $@ && \
231 xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< 249 xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
232 250
233%.xml : %.txt 251$(OUTPUT)%.xml : %.txt
234 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 252 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
235 $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ 253 $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
236 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 254 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt
239XSLT = docbook.xsl 257XSLT = docbook.xsl
240XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css 258XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
241 259
242user-manual.html: user-manual.xml 260$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
243 $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< 261 $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
244 262
245perf.info: user-manual.texi 263$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
246 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi 264 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
247 265
248user-manual.texi: user-manual.xml 266$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
249 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ 267 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
250 $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ 268 $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
251 $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ 269 $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
252 rm $@++ && \ 270 rm $@++ && \
253 mv $@+ $@ 271 mv $@+ $@
254 272
255user-manual.pdf: user-manual.xml 273$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
256 $(QUIET_DBLATEX)$(RM) $@+ $@ && \ 274 $(QUIET_DBLATEX)$(RM) $@+ $@ && \
257 $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ 275 $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
258 mv $@+ $@ 276 mv $@+ $@
259 277
260perfman.texi: $(MAN_XML) cat-texi.perl 278$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
261 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ 279 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
262 ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ 280 ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
263 --to-stdout $(xml) &&) true) > $@++ && \ 281 --to-stdout $(xml) &&) true) > $@++ && \
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl
265 rm $@++ && \ 283 rm $@++ && \
266 mv $@+ $@ 284 mv $@+ $@
267 285
268perfman.info: perfman.texi 286$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
269 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi 287 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
270 288
271$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml 289$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index d6b2a4f2108b..c7f5f55634ac 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf lock' {record|report|trace} 11'perf lock' {record|report|script|info}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command.
20 produces the file "perf.data" which contains tracing 20 produces the file "perf.data" which contains tracing
21 results of lock events. 21 results of lock events.
22 22
23 'perf lock trace' shows raw lock events.
24
25 'perf lock report' reports statistical data. 23 'perf lock report' reports statistical data.
26 24
25 'perf lock script' shows raw lock events.
26
27 'perf lock info' shows metadata like threads or addresses
28 of lock instances.
29
27COMMON OPTIONS 30COMMON OPTIONS
28-------------- 31--------------
29 32
@@ -47,6 +50,17 @@ REPORT OPTIONS
47 Sorting key. Possible values: acquired (default), contended, 50 Sorting key. Possible values: acquired (default), contended,
48 wait_total, wait_max, wait_min. 51 wait_total, wait_max, wait_min.
49 52
53INFO OPTIONS
54------------
55
56-t::
57--threads::
58 dump thread list in perf.data
59
60-m::
61--map::
62 dump map of lock instances (address:name table)
63
50SEE ALSO 64SEE ALSO
51-------- 65--------
52linkperf:perf[1] 66linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2937f7e14bb7..a1386b2fff00 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,11 +52,15 @@ OPTIONS
52 52
53-p:: 53-p::
54--pid=:: 54--pid=::
55 Record events on existing process ID. 55 Record events on existing process ID (comma separated list).
56 56
57-t:: 57-t::
58--tid=:: 58--tid=::
59 Record events on existing thread ID. 59 Record events on existing thread ID (comma separated list).
60
61-u::
62--uid=::
63 Record events in threads owned by uid. Name or number.
60 64
61-r:: 65-r::
62--realtime=:: 66--realtime=::
@@ -148,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
148corresponding events, i.e., they always refer to events defined earlier on the command 152corresponding events, i.e., they always refer to events defined earlier on the command
149line. 153line.
150 154
155-b::
156--branch-any::
157Enable taken branch stack sampling. Any type of taken branch may be sampled.
158This is a shortcut for --branch-filter any. See --branch-filter for more infos.
159
160-j::
161--branch-filter::
162Enable taken branch stack sampling. Each sample captures a series of consecutive
163taken branches. The number of branches captured with each sample depends on the
164underlying hardware, the type of branches of interest, and the executed code.
165It is possible to select the types of branches captured by enabling filters. The
166following filters are defined:
167
168 - any: any type of branches
169 - any_call: any function call or system call
170 - any_ret: any function return or system call return
171 - any_ind: any indirect branch
172 - u: only when the branch target is at the user level
173 - k: only when the branch target is in the kernel
174 - hv: only when the target is at the hypervisor level
175
176+
177The option requires at least one branch type among any, any_call, any_ret, ind_call.
178The privilege levels may be ommitted, in which case, the privilege levels of the associated
179event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
180levels are subject to permissions. When sampling on multiple events, branch stack sampling
181is enabled for all the sampling events. The sampled branch type is the same for all events.
182The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
183Note that this feature may not be available on all processors.
184
151SEE ALSO 185SEE ALSO
152-------- 186--------
153linkperf:perf-stat[1], linkperf:perf-list[1] 187linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9b430e98712e..87feeee8b90c 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -153,6 +153,16 @@ OPTIONS
153 information which may be very large and thus may clutter the display. 153 information which may be very large and thus may clutter the display.
154 It currently includes: cpu and numa topology of the host system. 154 It currently includes: cpu and numa topology of the host system.
155 155
156-b::
157--branch-stack::
158 Use the addresses of sampled taken branches instead of the instruction
159 address to build the histograms. To generate meaningful output, the
160 perf.data file must have been obtained using perf record -b or
161 perf record --branch-filter xxx where xxx is a branch filter option.
162 perf report is able to auto-detect whether a perf.data file contains
163 branch stacks and it will automatically switch to the branch view mode,
164 unless --no-branch-stack is used.
165
156SEE ALSO 166SEE ALSO
157-------- 167--------
158linkperf:perf-stat[1], linkperf:perf-annotate[1] 168linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2f6cef43da25..e9cbfcddfa3f 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
115-f:: 115-f::
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
119 Field list can be prepended with the type, trace, sw or hw, 119 Field list can be prepended with the type, trace, sw or hw,
120 to indicate to which event type the field list applies. 120 to indicate to which event type the field list applies.
121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace 121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -200,6 +200,9 @@ OPTIONS
200 It currently includes: cpu and numa topology of the host system. 200 It currently includes: cpu and numa topology of the host system.
201 It can only be used with the perf script report mode. 201 It can only be used with the perf script report mode.
202 202
203--show-kernel-path::
204 Try to resolve the path of [kernel.kallsyms]
205
203SEE ALSO 206SEE ALSO
204-------- 207--------
205linkperf:perf-record[1], linkperf:perf-script-perl[1], 208linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8966b9ab2014..2fa173b51970 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -35,11 +35,11 @@ OPTIONS
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36-p:: 36-p::
37--pid=<pid>:: 37--pid=<pid>::
38 stat events on existing process id 38 stat events on existing process id (comma separated list)
39 39
40-t:: 40-t::
41--tid=<tid>:: 41--tid=<tid>::
42 stat events on existing thread id 42 stat events on existing thread id (comma separated list)
43 43
44 44
45-a:: 45-a::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b1a5bbbfebef..4a5680cb242e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -72,11 +72,15 @@ Default is to monitor all CPUS.
72 72
73-p <pid>:: 73-p <pid>::
74--pid=<pid>:: 74--pid=<pid>::
75 Profile events on existing Process ID. 75 Profile events on existing Process ID (comma separated list).
76 76
77-t <tid>:: 77-t <tid>::
78--tid=<tid>:: 78--tid=<tid>::
79 Profile events on existing thread ID. 79 Profile events on existing thread ID (comma separated list).
80
81-u::
82--uid=::
83 Record events in threads owned by uid. Name or number.
80 84
81-r <priority>:: 85-r <priority>::
82--realtime=<priority>:: 86--realtime=<priority>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1078c5fadd5b..5476bc0a1eac 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@ lib/rbtree.c
9include/linux/swab.h 9include/linux/swab.h
10arch/*/include/asm/unistd*.h 10arch/*/include/asm/unistd*.h
11arch/*/lib/memcpy*.S 11arch/*/lib/memcpy*.S
12arch/*/lib/memset*.S
12include/linux/poison.h 13include/linux/poison.h
13include/linux/magic.h 14include/linux/magic.h
14include/linux/hw_breakpoint.h 15include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 8a4b9bccf8b2..74fd7f89208a 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -15,6 +15,16 @@ endif
15 15
16# Define V to have a more verbose compile. 16# Define V to have a more verbose compile.
17# 17#
18# Define O to save output files in a separate directory.
19#
20# Define ARCH as name of target architecture if you want cross-builds.
21#
22# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
23#
24# Define NO_LIBPERL to disable perl script extension.
25#
26# Define NO_LIBPYTHON to disable python script extension.
27#
18# Define PYTHON to point to the python binary if the default 28# Define PYTHON to point to the python binary if the default
19# `python' is not correct; for example: PYTHON=python2 29# `python' is not correct; for example: PYTHON=python2
20# 30#
@@ -32,6 +42,10 @@ endif
32# Define NO_DWARF if you do not want debug-info analysis feature at all. 42# Define NO_DWARF if you do not want debug-info analysis feature at all.
33# 43#
34# Define WERROR=0 to disable treating any warnings as errors. 44# Define WERROR=0 to disable treating any warnings as errors.
45#
46# Define NO_NEWT if you do not want TUI support.
47#
48# Define NO_DEMANGLE if you do not want C++ symbol demangling.
35 49
36$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 50$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
37 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 51 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
@@ -61,7 +75,7 @@ ifeq ($(ARCH),x86_64)
61 ifeq (${IS_X86_64}, 1) 75 ifeq (${IS_X86_64}, 1)
62 RAW_ARCH := x86_64 76 RAW_ARCH := x86_64
63 ARCH_CFLAGS := -DARCH_X86_64 77 ARCH_CFLAGS := -DARCH_X86_64
64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S 78 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
65 endif 79 endif
66endif 80endif
67 81
@@ -183,7 +197,10 @@ SCRIPT_SH += perf-archive.sh
183grep-libs = $(filter -l%,$(1)) 197grep-libs = $(filter -l%,$(1))
184strip-libs = $(filter-out -l%,$(1)) 198strip-libs = $(filter-out -l%,$(1))
185 199
186$(OUTPUT)python/perf.so: $(PYRF_OBJS) 200PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
201PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
202
203$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
187 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ 204 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
188 --quiet build_ext; \ 205 --quiet build_ext; \
189 mkdir -p $(OUTPUT)python && \ 206 mkdir -p $(OUTPUT)python && \
@@ -258,6 +275,7 @@ LIB_H += util/callchain.h
258LIB_H += util/build-id.h 275LIB_H += util/build-id.h
259LIB_H += util/debug.h 276LIB_H += util/debug.h
260LIB_H += util/debugfs.h 277LIB_H += util/debugfs.h
278LIB_H += util/sysfs.h
261LIB_H += util/event.h 279LIB_H += util/event.h
262LIB_H += util/evsel.h 280LIB_H += util/evsel.h
263LIB_H += util/evlist.h 281LIB_H += util/evlist.h
@@ -304,6 +322,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o
304LIB_OBJS += $(OUTPUT)util/config.o 322LIB_OBJS += $(OUTPUT)util/config.o
305LIB_OBJS += $(OUTPUT)util/ctype.o 323LIB_OBJS += $(OUTPUT)util/ctype.o
306LIB_OBJS += $(OUTPUT)util/debugfs.o 324LIB_OBJS += $(OUTPUT)util/debugfs.o
325LIB_OBJS += $(OUTPUT)util/sysfs.o
307LIB_OBJS += $(OUTPUT)util/environment.o 326LIB_OBJS += $(OUTPUT)util/environment.o
308LIB_OBJS += $(OUTPUT)util/event.o 327LIB_OBJS += $(OUTPUT)util/event.o
309LIB_OBJS += $(OUTPUT)util/evlist.o 328LIB_OBJS += $(OUTPUT)util/evlist.o
@@ -361,8 +380,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
361BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 380BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
362ifeq ($(RAW_ARCH),x86_64) 381ifeq ($(RAW_ARCH),x86_64)
363BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o 382BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
383BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
364endif 384endif
365BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 385BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
386BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
366 387
367BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 388BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
368BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o 389BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
@@ -794,7 +815,6 @@ help:
794 @echo ' quick-install-html - install the html documentation quickly' 815 @echo ' quick-install-html - install the html documentation quickly'
795 @echo '' 816 @echo ''
796 @echo 'Perf maintainer targets:' 817 @echo 'Perf maintainer targets:'
797 @echo ' distclean - alias to clean'
798 @echo ' clean - clean all binary objects and build output' 818 @echo ' clean - clean all binary objects and build output'
799 819
800doc: 820doc:
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index f7781c6267c0..a09bece6dad2 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -4,6 +4,7 @@
4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); 4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); 5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); 6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
7extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
7 8
8#define BENCH_FORMAT_DEFAULT_STR "default" 9#define BENCH_FORMAT_DEFAULT_STR "default"
9#define BENCH_FORMAT_DEFAULT 0 10#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d588b87696fc..d66ab799b35f 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -2,3 +2,11 @@
2MEMCPY_FN(__memcpy, 2MEMCPY_FN(__memcpy,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S") 4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
5
6MEMCPY_FN(memcpy_c,
7 "x86-64-movsq",
8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
9
10MEMCPY_FN(memcpy_c_e,
11 "x86-64-movsb",
12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 185a96d66dd1..fcd9cf00600a 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,4 +1,8 @@
1 1#define memcpy MEMCPY /* don't hide glibc's memcpy() */
2#define altinstr_replacement text
3#define globl p2align 4; .globl
4#define Lmemcpy_c globl memcpy_c; memcpy_c
5#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
2#include "../../../arch/x86/lib/memcpy_64.S" 6#include "../../../arch/x86/lib/memcpy_64.S"
3/* 7/*
4 * We need to provide note.GNU-stack section, saying that we want 8 * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db82021f4b91..71557225bf92 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -5,7 +5,6 @@
5 * 5 *
6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7 */ 7 */
8#include <ctype.h>
9 8
10#include "../perf.h" 9#include "../perf.h"
11#include "../util/util.h" 10#include "../util/util.h"
@@ -24,6 +23,7 @@
24 23
25static const char *length_str = "1MB"; 24static const char *length_str = "1MB";
26static const char *routine = "default"; 25static const char *routine = "default";
26static int iterations = 1;
27static bool use_clock; 27static bool use_clock;
28static int clock_fd; 28static int clock_fd;
29static bool only_prefault; 29static bool only_prefault;
@@ -35,6 +35,8 @@ static const struct option options[] = {
35 "available unit: B, MB, GB (upper and lower)"), 35 "available unit: B, MB, GB (upper and lower)"),
36 OPT_STRING('r', "routine", &routine, "default", 36 OPT_STRING('r', "routine", &routine, "default",
37 "Specify routine to copy"), 37 "Specify routine to copy"),
38 OPT_INTEGER('i', "iterations", &iterations,
39 "repeat memcpy() invocation this number of times"),
38 OPT_BOOLEAN('c', "clock", &use_clock, 40 OPT_BOOLEAN('c', "clock", &use_clock,
39 "Use CPU clock for measuring"), 41 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault, 42 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121{ 123{
122 u64 clock_start = 0ULL, clock_end = 0ULL; 124 u64 clock_start = 0ULL, clock_end = 0ULL;
123 void *src = NULL, *dst = NULL; 125 void *src = NULL, *dst = NULL;
126 int i;
124 127
125 alloc_mem(&src, &dst, len); 128 alloc_mem(&src, &dst, len);
126 129
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
128 fn(dst, src, len); 131 fn(dst, src, len);
129 132
130 clock_start = get_clock(); 133 clock_start = get_clock();
131 fn(dst, src, len); 134 for (i = 0; i < iterations; ++i)
135 fn(dst, src, len);
132 clock_end = get_clock(); 136 clock_end = get_clock();
133 137
134 free(src); 138 free(src);
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140{ 144{
141 struct timeval tv_start, tv_end, tv_diff; 145 struct timeval tv_start, tv_end, tv_diff;
142 void *src = NULL, *dst = NULL; 146 void *src = NULL, *dst = NULL;
147 int i;
143 148
144 alloc_mem(&src, &dst, len); 149 alloc_mem(&src, &dst, len);
145 150
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
147 fn(dst, src, len); 152 fn(dst, src, len);
148 153
149 BUG_ON(gettimeofday(&tv_start, NULL)); 154 BUG_ON(gettimeofday(&tv_start, NULL));
150 fn(dst, src, len); 155 for (i = 0; i < iterations; ++i)
156 fn(dst, src, len);
151 BUG_ON(gettimeofday(&tv_end, NULL)); 157 BUG_ON(gettimeofday(&tv_end, NULL));
152 158
153 timersub(&tv_end, &tv_start, &tv_diff); 159 timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 000000000000..a040fa77665b
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMSET_FN(fn, name, desc) \
5 extern void *fn(void *, int, size_t);
6
7#include "mem-memset-x86-64-asm-def.h"
8
9#undef MEMSET_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 000000000000..a71dff97c1f5
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
1
2MEMSET_FN(__memset,
3 "x86-64-unrolled",
4 "unrolled memset() in arch/x86/lib/memset_64.S")
5
6MEMSET_FN(memset_c,
7 "x86-64-stosq",
8 "movsq-based memset() in arch/x86/lib/memset_64.S")
9
10MEMSET_FN(memset_c_e,
11 "x86-64-stosb",
12 "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 000000000000..9e5af89ed13a
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,13 @@
1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text
3#define globl p2align 4; .globl
4#define Lmemset_c globl memset_c; memset_c
5#define Lmemset_c_e globl memset_c_e; memset_c_e
6#include "../../../arch/x86/lib/memset_64.S"
7
8/*
9 * We need to provide note.GNU-stack section, saying that we want
10 * NOT executable stack. Otherwise the final linking will assume that
11 * the ELF stack should not be restricted at all and set it RWX.
12 */
13.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644
index 000000000000..e9079185bd72
--- /dev/null
+++ b/tools/perf/bench/mem-memset.c
@@ -0,0 +1,297 @@
1/*
2 * mem-memset.c
3 *
4 * memset: Simple memory set in various ways
5 *
6 * Trivial clone of mem-memcpy.c.
7 */
8
9#include "../perf.h"
10#include "../util/util.h"
11#include "../util/parse-options.h"
12#include "../util/header.h"
13#include "bench.h"
14#include "mem-memset-arch.h"
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/time.h>
20#include <errno.h>
21
22#define K 1024
23
24static const char *length_str = "1MB";
25static const char *routine = "default";
26static int iterations = 1;
27static bool use_clock;
28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
31
32static const struct option options[] = {
33 OPT_STRING('l', "length", &length_str, "1MB",
34 "Specify length of memory to copy. "
35 "available unit: B, MB, GB (upper and lower)"),
36 OPT_STRING('r', "routine", &routine, "default",
37 "Specify routine to copy"),
38 OPT_INTEGER('i', "iterations", &iterations,
39 "repeat memset() invocation this number of times"),
40 OPT_BOOLEAN('c', "clock", &use_clock,
41 "Use CPU clock for measuring"),
42 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
43 "Show only the result with page faults before memset()"),
44 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
45 "Show only the result without page faults before memset()"),
46 OPT_END()
47};
48
49typedef void *(*memset_t)(void *, int, size_t);
50
51struct routine {
52 const char *name;
53 const char *desc;
54 memset_t fn;
55};
56
57static const struct routine routines[] = {
58 { "default",
59 "Default memset() provided by glibc",
60 memset },
61#ifdef ARCH_X86_64
62
63#define MEMSET_FN(fn, name, desc) { name, desc, fn },
64#include "mem-memset-x86-64-asm-def.h"
65#undef MEMSET_FN
66
67#endif
68
69 { NULL,
70 NULL,
71 NULL }
72};
73
74static const char * const bench_mem_memset_usage[] = {
75 "perf bench mem memset <options>",
76 NULL
77};
78
79static struct perf_event_attr clock_attr = {
80 .type = PERF_TYPE_HARDWARE,
81 .config = PERF_COUNT_HW_CPU_CYCLES
82};
83
84static void init_clock(void)
85{
86 clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
87
88 if (clock_fd < 0 && errno == ENOSYS)
89 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
90 else
91 BUG_ON(clock_fd < 0);
92}
93
94static u64 get_clock(void)
95{
96 int ret;
97 u64 clk;
98
99 ret = read(clock_fd, &clk, sizeof(u64));
100 BUG_ON(ret != sizeof(u64));
101
102 return clk;
103}
104
105static double timeval2double(struct timeval *ts)
106{
107 return (double)ts->tv_sec +
108 (double)ts->tv_usec / (double)1000000;
109}
110
111static void alloc_mem(void **dst, size_t length)
112{
113 *dst = zalloc(length);
114 if (!dst)
115 die("memory allocation failed - maybe length is too large?\n");
116}
117
118static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
119{
120 u64 clock_start = 0ULL, clock_end = 0ULL;
121 void *dst = NULL;
122 int i;
123
124 alloc_mem(&dst, len);
125
126 if (prefault)
127 fn(dst, -1, len);
128
129 clock_start = get_clock();
130 for (i = 0; i < iterations; ++i)
131 fn(dst, i, len);
132 clock_end = get_clock();
133
134 free(dst);
135 return clock_end - clock_start;
136}
137
138static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
139{
140 struct timeval tv_start, tv_end, tv_diff;
141 void *dst = NULL;
142 int i;
143
144 alloc_mem(&dst, len);
145
146 if (prefault)
147 fn(dst, -1, len);
148
149 BUG_ON(gettimeofday(&tv_start, NULL));
150 for (i = 0; i < iterations; ++i)
151 fn(dst, i, len);
152 BUG_ON(gettimeofday(&tv_end, NULL));
153
154 timersub(&tv_end, &tv_start, &tv_diff);
155
156 free(dst);
157 return (double)((double)len / timeval2double(&tv_diff));
158}
159
160#define pf (no_prefault ? 0 : 1)
161
162#define print_bps(x) do { \
163 if (x < K) \
164 printf(" %14lf B/Sec", x); \
165 else if (x < K * K) \
166 printf(" %14lfd KB/Sec", x / K); \
167 else if (x < K * K * K) \
168 printf(" %14lf MB/Sec", x / K / K); \
169 else \
170 printf(" %14lf GB/Sec", x / K / K / K); \
171 } while (0)
172
173int bench_mem_memset(int argc, const char **argv,
174 const char *prefix __used)
175{
176 int i;
177 size_t len;
178 double result_bps[2];
179 u64 result_clock[2];
180
181 argc = parse_options(argc, argv, options,
182 bench_mem_memset_usage, 0);
183
184 if (use_clock)
185 init_clock();
186
187 len = (size_t)perf_atoll((char *)length_str);
188
189 result_clock[0] = result_clock[1] = 0ULL;
190 result_bps[0] = result_bps[1] = 0.0;
191
192 if ((s64)len <= 0) {
193 fprintf(stderr, "Invalid length:%s\n", length_str);
194 return 1;
195 }
196
197 /* same to without specifying either of prefault and no-prefault */
198 if (only_prefault && no_prefault)
199 only_prefault = no_prefault = false;
200
201 for (i = 0; routines[i].name; i++) {
202 if (!strcmp(routines[i].name, routine))
203 break;
204 }
205 if (!routines[i].name) {
206 printf("Unknown routine:%s\n", routine);
207 printf("Available routines...\n");
208 for (i = 0; routines[i].name; i++) {
209 printf("\t%s ... %s\n",
210 routines[i].name, routines[i].desc);
211 }
212 return 1;
213 }
214
215 if (bench_format == BENCH_FORMAT_DEFAULT)
216 printf("# Copying %s Bytes ...\n\n", length_str);
217
218 if (!only_prefault && !no_prefault) {
219 /* show both of results */
220 if (use_clock) {
221 result_clock[0] =
222 do_memset_clock(routines[i].fn, len, false);
223 result_clock[1] =
224 do_memset_clock(routines[i].fn, len, true);
225 } else {
226 result_bps[0] =
227 do_memset_gettimeofday(routines[i].fn,
228 len, false);
229 result_bps[1] =
230 do_memset_gettimeofday(routines[i].fn,
231 len, true);
232 }
233 } else {
234 if (use_clock) {
235 result_clock[pf] =
236 do_memset_clock(routines[i].fn,
237 len, only_prefault);
238 } else {
239 result_bps[pf] =
240 do_memset_gettimeofday(routines[i].fn,
241 len, only_prefault);
242 }
243 }
244
245 switch (bench_format) {
246 case BENCH_FORMAT_DEFAULT:
247 if (!only_prefault && !no_prefault) {
248 if (use_clock) {
249 printf(" %14lf Clock/Byte\n",
250 (double)result_clock[0]
251 / (double)len);
252 printf(" %14lf Clock/Byte (with prefault)\n ",
253 (double)result_clock[1]
254 / (double)len);
255 } else {
256 print_bps(result_bps[0]);
257 printf("\n");
258 print_bps(result_bps[1]);
259 printf(" (with prefault)\n");
260 }
261 } else {
262 if (use_clock) {
263 printf(" %14lf Clock/Byte",
264 (double)result_clock[pf]
265 / (double)len);
266 } else
267 print_bps(result_bps[pf]);
268
269 printf("%s\n", only_prefault ? " (with prefault)" : "");
270 }
271 break;
272 case BENCH_FORMAT_SIMPLE:
273 if (!only_prefault && !no_prefault) {
274 if (use_clock) {
275 printf("%lf %lf\n",
276 (double)result_clock[0] / (double)len,
277 (double)result_clock[1] / (double)len);
278 } else {
279 printf("%lf %lf\n",
280 result_bps[0], result_bps[1]);
281 }
282 } else {
283 if (use_clock) {
284 printf("%lf\n", (double)result_clock[pf]
285 / (double)len);
286 } else
287 printf("%lf\n", result_bps[pf]);
288 }
289 break;
290 default:
291 /* reaching this means there's some disaster: */
292 die("unknown format: %d\n", bench_format);
293 break;
294 }
295
296 return 0;
297}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index fcb96269852a..b0e74ab2d7a2 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
52 { "memcpy", 52 { "memcpy",
53 "Simple memory copy in various ways", 53 "Simple memory copy in various ways",
54 bench_mem_memcpy }, 54 bench_mem_memcpy },
55 { "memset",
56 "Simple memory set in various ways",
57 bench_mem_memset },
55 suite_all, 58 suite_all,
56 { NULL, 59 { NULL,
57 NULL, 60 NULL,
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2296c391d0f5..12c814838993 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -922,12 +922,12 @@ static const struct option info_options[] = {
922 OPT_BOOLEAN('t', "threads", &info_threads, 922 OPT_BOOLEAN('t', "threads", &info_threads,
923 "dump thread list in perf.data"), 923 "dump thread list in perf.data"),
924 OPT_BOOLEAN('m', "map", &info_map, 924 OPT_BOOLEAN('m', "map", &info_map,
925 "map of lock instances (name:address table)"), 925 "map of lock instances (address:name table)"),
926 OPT_END() 926 OPT_END()
927}; 927};
928 928
929static const char * const lock_usage[] = { 929static const char * const lock_usage[] = {
930 "perf lock [<options>] {record|trace|report}", 930 "perf lock [<options>] {record|report|script|info}",
931 NULL 931 NULL
932}; 932};
933 933
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index fb8566181f27..4935c09dd5b5 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -58,7 +58,7 @@ static struct {
58 struct perf_probe_event events[MAX_PROBES]; 58 struct perf_probe_event events[MAX_PROBES];
59 struct strlist *dellist; 59 struct strlist *dellist;
60 struct line_range line_range; 60 struct line_range line_range;
61 const char *target_module; 61 const char *target;
62 int max_probe_points; 62 int max_probe_points;
63 struct strfilter *filter; 63 struct strfilter *filter;
64} params; 64} params;
@@ -246,7 +246,7 @@ static const struct option options[] = {
246 "file", "vmlinux pathname"), 246 "file", "vmlinux pathname"),
247 OPT_STRING('s', "source", &symbol_conf.source_prefix, 247 OPT_STRING('s', "source", &symbol_conf.source_prefix,
248 "directory", "path to kernel source"), 248 "directory", "path to kernel source"),
249 OPT_STRING('m', "module", &params.target_module, 249 OPT_STRING('m', "module", &params.target,
250 "modname|path", 250 "modname|path",
251 "target module name (for online) or path (for offline)"), 251 "target module name (for online) or path (for offline)"),
252#endif 252#endif
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
333 if (!params.filter) 333 if (!params.filter)
334 params.filter = strfilter__new(DEFAULT_FUNC_FILTER, 334 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
335 NULL); 335 NULL);
336 ret = show_available_funcs(params.target_module, 336 ret = show_available_funcs(params.target,
337 params.filter); 337 params.filter);
338 strfilter__delete(params.filter); 338 strfilter__delete(params.filter);
339 if (ret < 0) 339 if (ret < 0)
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
354 usage_with_options(probe_usage, options); 354 usage_with_options(probe_usage, options);
355 } 355 }
356 356
357 ret = show_line_range(&params.line_range, params.target_module); 357 ret = show_line_range(&params.line_range, params.target);
358 if (ret < 0) 358 if (ret < 0)
359 pr_err(" Error: Failed to show lines. (%d)\n", ret); 359 pr_err(" Error: Failed to show lines. (%d)\n", ret);
360 return ret; 360 return ret;
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
371 371
372 ret = show_available_vars(params.events, params.nevents, 372 ret = show_available_vars(params.events, params.nevents,
373 params.max_probe_points, 373 params.max_probe_points,
374 params.target_module, 374 params.target,
375 params.filter, 375 params.filter,
376 params.show_ext_vars); 376 params.show_ext_vars);
377 strfilter__delete(params.filter); 377 strfilter__delete(params.filter);
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
393 if (params.nevents) { 393 if (params.nevents) {
394 ret = add_perf_probe_events(params.events, params.nevents, 394 ret = add_perf_probe_events(params.events, params.nevents,
395 params.max_probe_points, 395 params.max_probe_points,
396 params.target_module, 396 params.target,
397 params.force_add); 397 params.force_add);
398 if (ret < 0) { 398 if (ret < 0) {
399 pr_err(" Error: Failed to add events. (%d)\n", ret); 399 pr_err(" Error: Failed to add events. (%d)\n", ret);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 227b6ae99785..be4e1eee782e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -44,6 +44,7 @@ struct perf_record {
44 struct perf_evlist *evlist; 44 struct perf_evlist *evlist;
45 struct perf_session *session; 45 struct perf_session *session;
46 const char *progname; 46 const char *progname;
47 const char *uid_str;
47 int output; 48 int output;
48 unsigned int page_size; 49 unsigned int page_size;
49 int realtime_prio; 50 int realtime_prio;
@@ -208,7 +209,7 @@ fallback_missing_features:
208 if (opts->exclude_guest_missing) 209 if (opts->exclude_guest_missing)
209 attr->exclude_guest = attr->exclude_host = 0; 210 attr->exclude_guest = attr->exclude_host = 0;
210retry_sample_id: 211retry_sample_id:
211 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 212 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
212try_again: 213try_again:
213 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 214 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214 opts->group, group_fd) < 0) { 215 opts->group, group_fd) < 0) {
@@ -227,11 +228,11 @@ try_again:
227 "guest or host samples.\n"); 228 "guest or host samples.\n");
228 opts->exclude_guest_missing = true; 229 opts->exclude_guest_missing = true;
229 goto fallback_missing_features; 230 goto fallback_missing_features;
230 } else if (opts->sample_id_all_avail) { 231 } else if (!opts->sample_id_all_missing) {
231 /* 232 /*
232 * Old kernel, no attr->sample_id_type_all field 233 * Old kernel, no attr->sample_id_type_all field
233 */ 234 */
234 opts->sample_id_all_avail = false; 235 opts->sample_id_all_missing = true;
235 if (!opts->sample_time && !opts->raw_samples && !time_needed) 236 if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 attr->sample_type &= ~PERF_SAMPLE_TIME; 237 attr->sample_type &= ~PERF_SAMPLE_TIME;
237 238
@@ -396,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
396{ 397{
397 struct stat st; 398 struct stat st;
398 int flags; 399 int flags;
399 int err, output; 400 int err, output, feat;
400 unsigned long waking = 0; 401 unsigned long waking = 0;
401 const bool forks = argc > 0; 402 const bool forks = argc > 0;
402 struct machine *machine; 403 struct machine *machine;
@@ -463,8 +464,17 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
463 464
464 rec->session = session; 465 rec->session = session;
465 466
466 if (!rec->no_buildid) 467 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
467 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 468 perf_header__set_feat(&session->header, feat);
469
470 if (rec->no_buildid)
471 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
472
473 if (!have_tracepoints(&evsel_list->entries))
474 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
475
476 if (!rec->opts.branch_stack)
477 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
468 478
469 if (!rec->file_new) { 479 if (!rec->file_new) {
470 err = perf_session__read_header(session, output); 480 err = perf_session__read_header(session, output);
@@ -472,22 +482,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
472 goto out_delete_session; 482 goto out_delete_session;
473 } 483 }
474 484
475 if (have_tracepoints(&evsel_list->entries))
476 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
477
478 perf_header__set_feat(&session->header, HEADER_HOSTNAME);
479 perf_header__set_feat(&session->header, HEADER_OSRELEASE);
480 perf_header__set_feat(&session->header, HEADER_ARCH);
481 perf_header__set_feat(&session->header, HEADER_CPUDESC);
482 perf_header__set_feat(&session->header, HEADER_NRCPUS);
483 perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
484 perf_header__set_feat(&session->header, HEADER_CMDLINE);
485 perf_header__set_feat(&session->header, HEADER_VERSION);
486 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
487 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
488 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
489 perf_header__set_feat(&session->header, HEADER_CPUID);
490
491 if (forks) { 485 if (forks) {
492 err = perf_evlist__prepare_workload(evsel_list, opts, argv); 486 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
493 if (err < 0) { 487 if (err < 0) {
@@ -647,6 +641,90 @@ out_delete_session:
647 return err; 641 return err;
648} 642}
649 643
644#define BRANCH_OPT(n, m) \
645 { .name = n, .mode = (m) }
646
647#define BRANCH_END { .name = NULL }
648
649struct branch_mode {
650 const char *name;
651 int mode;
652};
653
654static const struct branch_mode branch_modes[] = {
655 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
656 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
657 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
658 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
659 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
660 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
661 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
662 BRANCH_END
663};
664
665static int
666parse_branch_stack(const struct option *opt, const char *str, int unset)
667{
668#define ONLY_PLM \
669 (PERF_SAMPLE_BRANCH_USER |\
670 PERF_SAMPLE_BRANCH_KERNEL |\
671 PERF_SAMPLE_BRANCH_HV)
672
673 uint64_t *mode = (uint64_t *)opt->value;
674 const struct branch_mode *br;
675 char *s, *os = NULL, *p;
676 int ret = -1;
677
678 if (unset)
679 return 0;
680
681 /*
682 * cannot set it twice, -b + --branch-filter for instance
683 */
684 if (*mode)
685 return -1;
686
687 /* str may be NULL in case no arg is passed to -b */
688 if (str) {
689 /* because str is read-only */
690 s = os = strdup(str);
691 if (!s)
692 return -1;
693
694 for (;;) {
695 p = strchr(s, ',');
696 if (p)
697 *p = '\0';
698
699 for (br = branch_modes; br->name; br++) {
700 if (!strcasecmp(s, br->name))
701 break;
702 }
703 if (!br->name) {
704 ui__warning("unknown branch filter %s,"
705 " check man page\n", s);
706 goto error;
707 }
708
709 *mode |= br->mode;
710
711 if (!p)
712 break;
713
714 s = p + 1;
715 }
716 }
717 ret = 0;
718
719 /* default to any branch */
720 if ((*mode & ~ONLY_PLM) == 0) {
721 *mode = PERF_SAMPLE_BRANCH_ANY;
722 }
723error:
724 free(os);
725 return ret;
726}
727
650static const char * const record_usage[] = { 728static const char * const record_usage[] = {
651 "perf record [<options>] [<command>]", 729 "perf record [<options>] [<command>]",
652 "perf record [<options>] -- <command> [<options>]", 730 "perf record [<options>] -- <command> [<options>]",
@@ -665,13 +743,10 @@ static const char * const record_usage[] = {
665 */ 743 */
666static struct perf_record record = { 744static struct perf_record record = {
667 .opts = { 745 .opts = {
668 .target_pid = -1,
669 .target_tid = -1,
670 .mmap_pages = UINT_MAX, 746 .mmap_pages = UINT_MAX,
671 .user_freq = UINT_MAX, 747 .user_freq = UINT_MAX,
672 .user_interval = ULLONG_MAX, 748 .user_interval = ULLONG_MAX,
673 .freq = 1000, 749 .freq = 1000,
674 .sample_id_all_avail = true,
675 }, 750 },
676 .write_mode = WRITE_FORCE, 751 .write_mode = WRITE_FORCE,
677 .file_new = true, 752 .file_new = true,
@@ -690,9 +765,9 @@ const struct option record_options[] = {
690 parse_events_option), 765 parse_events_option),
691 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 766 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
692 "event filter", parse_filter), 767 "event filter", parse_filter),
693 OPT_INTEGER('p', "pid", &record.opts.target_pid, 768 OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
694 "record events on existing process id"), 769 "record events on existing process id"),
695 OPT_INTEGER('t', "tid", &record.opts.target_tid, 770 OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
696 "record events on existing thread id"), 771 "record events on existing thread id"),
697 OPT_INTEGER('r', "realtime", &record.realtime_prio, 772 OPT_INTEGER('r', "realtime", &record.realtime_prio,
698 "collect data with this RT SCHED_FIFO priority"), 773 "collect data with this RT SCHED_FIFO priority"),
@@ -738,6 +813,15 @@ const struct option record_options[] = {
738 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 813 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
739 "monitor event in cgroup name only", 814 "monitor event in cgroup name only",
740 parse_cgroups), 815 parse_cgroups),
816 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
817
818 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
819 "branch any", "sample any taken branches",
820 parse_branch_stack),
821
822 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
823 "branch filter mask", "branch stack filter modes",
824 parse_branch_stack),
741 OPT_END() 825 OPT_END()
742}; 826};
743 827
@@ -758,8 +842,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
758 842
759 argc = parse_options(argc, argv, record_options, record_usage, 843 argc = parse_options(argc, argv, record_options, record_usage,
760 PARSE_OPT_STOP_AT_NON_OPTION); 844 PARSE_OPT_STOP_AT_NON_OPTION);
761 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && 845 if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
762 !rec->opts.system_wide && !rec->opts.cpu_list) 846 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
763 usage_with_options(record_usage, record_options); 847 usage_with_options(record_usage, record_options);
764 848
765 if (rec->force && rec->append_file) { 849 if (rec->force && rec->append_file) {
@@ -799,11 +883,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
799 goto out_symbol_exit; 883 goto out_symbol_exit;
800 } 884 }
801 885
802 if (rec->opts.target_pid != -1) 886 rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
887 rec->opts.target_pid);
888 if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
889 goto out_free_fd;
890
891 if (rec->opts.target_pid)
803 rec->opts.target_tid = rec->opts.target_pid; 892 rec->opts.target_tid = rec->opts.target_pid;
804 893
805 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, 894 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
806 rec->opts.target_tid, rec->opts.cpu_list) < 0) 895 rec->opts.target_tid, rec->opts.uid,
896 rec->opts.cpu_list) < 0)
807 usage_with_options(record_usage, record_options); 897 usage_with_options(record_usage, record_options);
808 898
809 list_for_each_entry(pos, &evsel_list->entries, node) { 899 list_for_each_entry(pos, &evsel_list->entries, node) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 25d34d483e49..8e91c6eba18a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,82 @@ struct perf_report {
53 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 53 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
54}; 54};
55 55
56static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
57 struct addr_location *al,
58 struct perf_sample *sample,
59 struct perf_evsel *evsel,
60 struct machine *machine)
61{
62 struct perf_report *rep = container_of(tool, struct perf_report, tool);
63 struct symbol *parent = NULL;
64 int err = 0;
65 unsigned i;
66 struct hist_entry *he;
67 struct branch_info *bi, *bx;
68
69 if ((sort__has_parent || symbol_conf.use_callchain)
70 && sample->callchain) {
71 err = machine__resolve_callchain(machine, evsel, al->thread,
72 sample->callchain, &parent);
73 if (err)
74 return err;
75 }
76
77 bi = machine__resolve_bstack(machine, al->thread,
78 sample->branch_stack);
79 if (!bi)
80 return -ENOMEM;
81
82 for (i = 0; i < sample->branch_stack->nr; i++) {
83 if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
84 continue;
85 /*
86 * The report shows the percentage of total branches captured
87 * and not events sampled. Thus we use a pseudo period of 1.
88 */
89 he = __hists__add_branch_entry(&evsel->hists, al, parent,
90 &bi[i], 1);
91 if (he) {
92 struct annotation *notes;
93 err = -ENOMEM;
94 bx = he->branch_info;
95 if (bx->from.sym && use_browser > 0) {
96 notes = symbol__annotation(bx->from.sym);
97 if (!notes->src
98 && symbol__alloc_hist(bx->from.sym) < 0)
99 goto out;
100
101 err = symbol__inc_addr_samples(bx->from.sym,
102 bx->from.map,
103 evsel->idx,
104 bx->from.al_addr);
105 if (err)
106 goto out;
107 }
108
109 if (bx->to.sym && use_browser > 0) {
110 notes = symbol__annotation(bx->to.sym);
111 if (!notes->src
112 && symbol__alloc_hist(bx->to.sym) < 0)
113 goto out;
114
115 err = symbol__inc_addr_samples(bx->to.sym,
116 bx->to.map,
117 evsel->idx,
118 bx->to.al_addr);
119 if (err)
120 goto out;
121 }
122 evsel->hists.stats.total_period += 1;
123 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
124 err = 0;
125 } else
126 return -ENOMEM;
127 }
128out:
129 return err;
130}
131
56static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, 132static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
57 struct addr_location *al, 133 struct addr_location *al,
58 struct perf_sample *sample, 134 struct perf_sample *sample,
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
126 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 202 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
127 return 0; 203 return 0;
128 204
129 if (al.map != NULL) 205 if (sort__branch_mode == 1) {
130 al.map->dso->hit = 1; 206 if (perf_report__add_branch_hist_entry(tool, &al, sample,
207 evsel, machine)) {
208 pr_debug("problem adding lbr entry, skipping event\n");
209 return -1;
210 }
211 } else {
212 if (al.map != NULL)
213 al.map->dso->hit = 1;
131 214
132 if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { 215 if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
133 pr_debug("problem incrementing symbol period, skipping event\n"); 216 pr_debug("problem incrementing symbol period, skipping event\n");
134 return -1; 217 return -1;
218 }
135 } 219 }
136
137 return 0; 220 return 0;
138} 221}
139 222
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
188 } 271 }
189 } 272 }
190 273
274 if (sort__branch_mode == 1) {
275 if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
276 fprintf(stderr, "selected -b but no branch data."
277 " Did you call perf record without"
278 " -b?\n");
279 return -1;
280 }
281 }
282
191 return 0; 283 return 0;
192} 284}
193 285
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
246{ 338{
247 int ret = -EINVAL; 339 int ret = -EINVAL;
248 u64 nr_samples; 340 u64 nr_samples;
249 struct perf_session *session; 341 struct perf_session *session = rep->session;
250 struct perf_evsel *pos; 342 struct perf_evsel *pos;
251 struct map *kernel_map; 343 struct map *kernel_map;
252 struct kmap *kernel_kmap; 344 struct kmap *kernel_kmap;
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
254 346
255 signal(SIGINT, sig_handler); 347 signal(SIGINT, sig_handler);
256 348
257 session = perf_session__new(rep->input_name, O_RDONLY,
258 rep->force, false, &rep->tool);
259 if (session == NULL)
260 return -ENOMEM;
261
262 rep->session = session;
263
264 if (rep->cpu_list) { 349 if (rep->cpu_list) {
265 ret = perf_session__cpu_bitmap(session, rep->cpu_list, 350 ret = perf_session__cpu_bitmap(session, rep->cpu_list,
266 rep->cpu_bitmap); 351 rep->cpu_bitmap);
@@ -427,9 +512,19 @@ setup:
427 return 0; 512 return 0;
428} 513}
429 514
515static int
516parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
517{
518 sort__branch_mode = !unset;
519 return 0;
520}
521
430int cmd_report(int argc, const char **argv, const char *prefix __used) 522int cmd_report(int argc, const char **argv, const char *prefix __used)
431{ 523{
524 struct perf_session *session;
432 struct stat st; 525 struct stat st;
526 bool has_br_stack = false;
527 int ret = -1;
433 char callchain_default_opt[] = "fractal,0.5,callee"; 528 char callchain_default_opt[] = "fractal,0.5,callee";
434 const char * const report_usage[] = { 529 const char * const report_usage[] = {
435 "perf report [<options>]", 530 "perf report [<options>]",
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
477 OPT_BOOLEAN(0, "stdio", &report.use_stdio, 572 OPT_BOOLEAN(0, "stdio", &report.use_stdio,
478 "Use the stdio interface"), 573 "Use the stdio interface"),
479 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 574 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
480 "sort by key(s): pid, comm, dso, symbol, parent"), 575 "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
576 " dso_from, symbol_to, symbol_from, mispredict"),
481 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 577 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
482 "Show sample percentage for different cpu modes"), 578 "Show sample percentage for different cpu modes"),
483 OPT_STRING('p', "parent", &parent_pattern, "regex", 579 OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
517 "Specify disassembler style (e.g. -M intel for intel syntax)"), 613 "Specify disassembler style (e.g. -M intel for intel syntax)"),
518 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 614 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
519 "Show a column with the sum of periods"), 615 "Show a column with the sum of periods"),
616 OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
617 "use branch records for histogram filling", parse_branch_mode),
520 OPT_END() 618 OPT_END()
521 }; 619 };
522 620
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
536 else 634 else
537 report.input_name = "perf.data"; 635 report.input_name = "perf.data";
538 } 636 }
637 session = perf_session__new(report.input_name, O_RDONLY,
638 report.force, false, &report.tool);
639 if (session == NULL)
640 return -ENOMEM;
539 641
540 if (strcmp(report.input_name, "-") != 0) 642 report.session = session;
643
644 has_br_stack = perf_header__has_feat(&session->header,
645 HEADER_BRANCH_STACK);
646
647 if (sort__branch_mode == -1 && has_br_stack)
648 sort__branch_mode = 1;
649
650 /* sort__branch_mode could be 0 if --no-branch-stack */
651 if (sort__branch_mode == 1) {
652 /*
653 * if no sort_order is provided, then specify
654 * branch-mode specific order
655 */
656 if (sort_order == default_sort_order)
657 sort_order = "comm,dso_from,symbol_from,"
658 "dso_to,symbol_to";
659
660 }
661
662 if (strcmp(report.input_name, "-") != 0) {
541 setup_browser(true); 663 setup_browser(true);
542 else 664 } else {
543 use_browser = 0; 665 use_browser = 0;
666 }
544 667
545 /* 668 /*
546 * Only in the newt browser we are doing integrated annotation, 669 * Only in the newt browser we are doing integrated annotation,
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
568 } 691 }
569 692
570 if (symbol__init() < 0) 693 if (symbol__init() < 0)
571 return -1; 694 goto error;
572 695
573 setup_sorting(report_usage, options); 696 setup_sorting(report_usage, options);
574 697
575 if (parent_pattern != default_parent_pattern) { 698 if (parent_pattern != default_parent_pattern) {
576 if (sort_dimension__add("parent") < 0) 699 if (sort_dimension__add("parent") < 0)
577 return -1; 700 goto error;
578 701
579 /* 702 /*
580 * Only show the parent fields if we explicitly 703 * Only show the parent fields if we explicitly
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
592 if (argc) 715 if (argc)
593 usage_with_options(report_usage, options); 716 usage_with_options(report_usage, options);
594 717
595 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
596 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); 718 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
597 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
598 719
599 return __cmd_report(&report); 720 if (sort__branch_mode == 1) {
721 sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
722 sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
723 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
724 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
725 } else {
726 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
727 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
728 }
729
730 ret = __cmd_report(&report);
731error:
732 perf_session__delete(session);
733 return ret;
600} 734}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb68ddf257b7..d4ce733b9eba 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -40,6 +40,7 @@ enum perf_output_field {
40 PERF_OUTPUT_SYM = 1U << 8, 40 PERF_OUTPUT_SYM = 1U << 8,
41 PERF_OUTPUT_DSO = 1U << 9, 41 PERF_OUTPUT_DSO = 1U << 9,
42 PERF_OUTPUT_ADDR = 1U << 10, 42 PERF_OUTPUT_ADDR = 1U << 10,
43 PERF_OUTPUT_SYMOFFSET = 1U << 11,
43}; 44};
44 45
45struct output_option { 46struct output_option {
@@ -57,6 +58,7 @@ struct output_option {
57 {.str = "sym", .field = PERF_OUTPUT_SYM}, 58 {.str = "sym", .field = PERF_OUTPUT_SYM},
58 {.str = "dso", .field = PERF_OUTPUT_DSO}, 59 {.str = "dso", .field = PERF_OUTPUT_DSO},
59 {.str = "addr", .field = PERF_OUTPUT_ADDR}, 60 {.str = "addr", .field = PERF_OUTPUT_ADDR},
61 {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
60}; 62};
61 63
62/* default set to maintain compatibility with current format */ 64/* default set to maintain compatibility with current format */
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
193 "to symbols.\n"); 195 "to symbols.\n");
194 return -EINVAL; 196 return -EINVAL;
195 } 197 }
198 if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
199 pr_err("Display of offsets requested but symbol is not"
200 "selected.\n");
201 return -EINVAL;
202 }
196 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { 203 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
197 pr_err("Display of DSO requested but neither sample IP nor " 204 pr_err("Display of DSO requested but neither sample IP nor "
198 "sample address\nis selected. Hence, no addresses to convert " 205 "sample address\nis selected. Hence, no addresses to convert "
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample,
300 } else 307 } else
301 evname = __event_name(attr->type, attr->config); 308 evname = __event_name(attr->type, attr->config);
302 309
303 printf("%s: ", evname ? evname : "(unknown)"); 310 printf("%s: ", evname ? evname : "[unknown]");
304 } 311 }
305} 312}
306 313
314static bool is_bts_event(struct perf_event_attr *attr)
315{
316 return ((attr->type == PERF_TYPE_HARDWARE) &&
317 (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
318 (attr->sample_period == 1));
319}
320
307static bool sample_addr_correlates_sym(struct perf_event_attr *attr) 321static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
308{ 322{
309 if ((attr->type == PERF_TYPE_SOFTWARE) && 323 if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
312 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) 326 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
313 return true; 327 return true;
314 328
329 if (is_bts_event(attr))
330 return true;
331
315 return false; 332 return false;
316} 333}
317 334
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event,
323{ 340{
324 struct addr_location al; 341 struct addr_location al;
325 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 342 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
326 const char *symname, *dsoname;
327 343
328 printf("%16" PRIx64, sample->addr); 344 printf("%16" PRIx64, sample->addr);
329 345
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event,
343 al.sym = map__find_symbol(al.map, al.addr, NULL); 359 al.sym = map__find_symbol(al.map, al.addr, NULL);
344 360
345 if (PRINT_FIELD(SYM)) { 361 if (PRINT_FIELD(SYM)) {
346 if (al.sym && al.sym->name) 362 printf(" ");
347 symname = al.sym->name; 363 if (PRINT_FIELD(SYMOFFSET))
364 symbol__fprintf_symname_offs(al.sym, &al, stdout);
348 else 365 else
349 symname = ""; 366 symbol__fprintf_symname(al.sym, stdout);
350
351 printf(" %16s", symname);
352 } 367 }
353 368
354 if (PRINT_FIELD(DSO)) { 369 if (PRINT_FIELD(DSO)) {
355 if (al.map && al.map->dso && al.map->dso->name) 370 printf(" (");
356 dsoname = al.map->dso->name; 371 map__fprintf_dsoname(al.map, stdout);
357 else 372 printf(")");
358 dsoname = ""; 373 }
374}
359 375
360 printf(" (%s)", dsoname); 376static void print_sample_bts(union perf_event *event,
377 struct perf_sample *sample,
378 struct perf_evsel *evsel,
379 struct machine *machine,
380 struct thread *thread)
381{
382 struct perf_event_attr *attr = &evsel->attr;
383
384 /* print branch_from information */
385 if (PRINT_FIELD(IP)) {
386 if (!symbol_conf.use_callchain)
387 printf(" ");
388 else
389 printf("\n");
390 perf_event__print_ip(event, sample, machine, evsel,
391 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
392 PRINT_FIELD(SYMOFFSET));
361 } 393 }
394
395 printf(" => ");
396
397 /* print branch_to information */
398 if (PRINT_FIELD(ADDR))
399 print_sample_addr(event, sample, machine, thread, attr);
400
401 printf("\n");
362} 402}
363 403
364static void process_event(union perf_event *event __unused, 404static void process_event(union perf_event *event __unused,
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused,
374 414
375 print_sample_start(sample, thread, attr); 415 print_sample_start(sample, thread, attr);
376 416
417 if (is_bts_event(attr)) {
418 print_sample_bts(event, sample, evsel, machine, thread);
419 return;
420 }
421
377 if (PRINT_FIELD(TRACE)) 422 if (PRINT_FIELD(TRACE))
378 print_trace_event(sample->cpu, sample->raw_data, 423 print_trace_event(sample->cpu, sample->raw_data,
379 sample->raw_size); 424 sample->raw_size);
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused,
387 else 432 else
388 printf("\n"); 433 printf("\n");
389 perf_event__print_ip(event, sample, machine, evsel, 434 perf_event__print_ip(event, sample, machine, evsel,
390 PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 435 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
436 PRINT_FIELD(SYMOFFSET));
391 } 437 }
392 438
393 printf("\n"); 439 printf("\n");
@@ -1097,7 +1143,10 @@ static const struct option options[] = {
1097 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 1143 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
1098 "Look for files with symbols relative to this directory"), 1144 "Look for files with symbols relative to this directory"),
1099 OPT_CALLBACK('f', "fields", NULL, "str", 1145 OPT_CALLBACK('f', "fields", NULL, "str",
1100 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1146 "comma separated output fields prepend with 'type:'. "
1147 "Valid types: hw,sw,trace,raw. "
1148 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
1149 "addr,symoff",
1101 parse_output_fields), 1150 parse_output_fields),
1102 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1151 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1103 "system-wide collection from all CPUs"), 1152 "system-wide collection from all CPUs"),
@@ -1106,6 +1155,9 @@ static const struct option options[] = {
1106 "only display events for these comms"), 1155 "only display events for these comms"),
1107 OPT_BOOLEAN('I', "show-info", &show_full_info, 1156 OPT_BOOLEAN('I', "show-info", &show_full_info,
1108 "display extended information from perf.data file"), 1157 "display extended information from perf.data file"),
1158 OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
1159 "Show the path of [kernel.kallsyms]"),
1160
1109 OPT_END() 1161 OPT_END()
1110}; 1162};
1111 1163
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f5d2a63eba66..ea40e4e8b227 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -182,8 +182,8 @@ static int run_count = 1;
182static bool no_inherit = false; 182static bool no_inherit = false;
183static bool scale = true; 183static bool scale = true;
184static bool no_aggr = false; 184static bool no_aggr = false;
185static pid_t target_pid = -1; 185static const char *target_pid;
186static pid_t target_tid = -1; 186static const char *target_tid;
187static pid_t child_pid = -1; 187static pid_t child_pid = -1;
188static bool null_run = false; 188static bool null_run = false;
189static int detailed_run = 0; 189static int detailed_run = 0;
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel,
296 if (system_wide) 296 if (system_wide)
297 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, 297 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
298 group, group_fd); 298 group, group_fd);
299 if (target_pid == -1 && target_tid == -1) { 299 if (!target_pid && !target_tid) {
300 attr->disabled = 1; 300 attr->disabled = 1;
301 attr->enable_on_exec = 1; 301 attr->enable_on_exec = 1;
302 } 302 }
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv)
446 exit(-1); 446 exit(-1);
447 } 447 }
448 448
449 if (target_tid == -1 && target_pid == -1 && !system_wide) 449 if (!target_tid && !target_pid && !system_wide)
450 evsel_list->threads->map[0] = child_pid; 450 evsel_list->threads->map[0] = child_pid;
451 451
452 /* 452 /*
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
576 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 576 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
577 fprintf(output, " # %8.3f CPUs utilized ", 577 fprintf(output, " # %8.3f CPUs utilized ",
578 avg / avg_stats(&walltime_nsecs_stats)); 578 avg / avg_stats(&walltime_nsecs_stats));
579 else
580 fprintf(output, " ");
579} 581}
580 582
581/* used for get_ratio_color() */ 583/* used for get_ratio_color() */
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
844 846
845 fprintf(output, " # %8.3f GHz ", ratio); 847 fprintf(output, " # %8.3f GHz ", ratio);
846 } else if (runtime_nsecs_stats[cpu].n != 0) { 848 } else if (runtime_nsecs_stats[cpu].n != 0) {
849 char unit = 'M';
850
847 total = avg_stats(&runtime_nsecs_stats[cpu]); 851 total = avg_stats(&runtime_nsecs_stats[cpu]);
848 852
849 if (total) 853 if (total)
850 ratio = 1000.0 * avg / total; 854 ratio = 1000.0 * avg / total;
855 if (ratio < 0.001) {
856 ratio *= 1000;
857 unit = 'K';
858 }
851 859
852 fprintf(output, " # %8.3f M/sec ", ratio); 860 fprintf(output, " # %8.3f %c/sec ", ratio, unit);
853 } else { 861 } else {
854 fprintf(output, " "); 862 fprintf(output, " ");
855 } 863 }
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv)
960 if (!csv_output) { 968 if (!csv_output) {
961 fprintf(output, "\n"); 969 fprintf(output, "\n");
962 fprintf(output, " Performance counter stats for "); 970 fprintf(output, " Performance counter stats for ");
963 if(target_pid == -1 && target_tid == -1) { 971 if (!target_pid && !target_tid) {
964 fprintf(output, "\'%s", argv[0]); 972 fprintf(output, "\'%s", argv[0]);
965 for (i = 1; i < argc; i++) 973 for (i = 1; i < argc; i++)
966 fprintf(output, " %s", argv[i]); 974 fprintf(output, " %s", argv[i]);
967 } else if (target_pid != -1) 975 } else if (target_pid)
968 fprintf(output, "process id \'%d", target_pid); 976 fprintf(output, "process id \'%s", target_pid);
969 else 977 else
970 fprintf(output, "thread id \'%d", target_tid); 978 fprintf(output, "thread id \'%s", target_tid);
971 979
972 fprintf(output, "\'"); 980 fprintf(output, "\'");
973 if (run_count > 1) 981 if (run_count > 1)
@@ -1041,10 +1049,10 @@ static const struct option options[] = {
1041 "event filter", parse_filter), 1049 "event filter", parse_filter),
1042 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1050 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1043 "child tasks do not inherit counters"), 1051 "child tasks do not inherit counters"),
1044 OPT_INTEGER('p', "pid", &target_pid, 1052 OPT_STRING('p', "pid", &target_pid, "pid",
1045 "stat events on existing process id"), 1053 "stat events on existing process id"),
1046 OPT_INTEGER('t', "tid", &target_tid, 1054 OPT_STRING('t', "tid", &target_tid, "tid",
1047 "stat events on existing thread id"), 1055 "stat events on existing thread id"),
1048 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1056 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1049 "system-wide collection from all CPUs"), 1057 "system-wide collection from all CPUs"),
1050 OPT_BOOLEAN('g', "group", &group, 1058 OPT_BOOLEAN('g', "group", &group,
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
1182 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1190 } else if (big_num_opt == 0) /* User passed --no-big-num */
1183 big_num = false; 1191 big_num = false;
1184 1192
1185 if (!argc && target_pid == -1 && target_tid == -1) 1193 if (!argc && !target_pid && !target_tid)
1186 usage_with_options(stat_usage, options); 1194 usage_with_options(stat_usage, options);
1187 if (run_count <= 0) 1195 if (run_count <= 0)
1188 usage_with_options(stat_usage, options); 1196 usage_with_options(stat_usage, options);
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
1198 if (add_default_attributes()) 1206 if (add_default_attributes())
1199 goto out; 1207 goto out;
1200 1208
1201 if (target_pid != -1) 1209 if (target_pid)
1202 target_tid = target_pid; 1210 target_tid = target_pid;
1203 1211
1204 evsel_list->threads = thread_map__new(target_pid, target_tid); 1212 evsel_list->threads = thread_map__new_str(target_pid,
1213 target_tid, UINT_MAX);
1205 if (evsel_list->threads == NULL) { 1214 if (evsel_list->threads == NULL) {
1206 pr_err("Problems finding threads of monitor\n"); 1215 pr_err("Problems finding threads of monitor\n");
1207 usage_with_options(stat_usage, options); 1216 usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 3854e869dce1..3e087ce8daa6 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,6 +15,8 @@
15#include "util/thread_map.h" 15#include "util/thread_map.h"
16#include "../../include/linux/hw_breakpoint.h" 16#include "../../include/linux/hw_breakpoint.h"
17 17
18#include <sys/mman.h>
19
18static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) 20static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
19{ 21{
20 bool *visited = symbol__priv(sym); 22 bool *visited = symbol__priv(sym);
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void)
276 return -1; 278 return -1;
277 } 279 }
278 280
279 threads = thread_map__new(-1, getpid()); 281 threads = thread_map__new(-1, getpid(), UINT_MAX);
280 if (threads == NULL) { 282 if (threads == NULL) {
281 pr_debug("thread_map__new\n"); 283 pr_debug("thread_map__new\n");
282 return -1; 284 return -1;
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void)
342 return -1; 344 return -1;
343 } 345 }
344 346
345 threads = thread_map__new(-1, getpid()); 347 threads = thread_map__new(-1, getpid(), UINT_MAX);
346 if (threads == NULL) { 348 if (threads == NULL) {
347 pr_debug("thread_map__new\n"); 349 pr_debug("thread_map__new\n");
348 return -1; 350 return -1;
@@ -490,7 +492,7 @@ static int test__basic_mmap(void)
490 expected_nr_events[i] = random() % 257; 492 expected_nr_events[i] = random() % 257;
491 } 493 }
492 494
493 threads = thread_map__new(-1, getpid()); 495 threads = thread_map__new(-1, getpid(), UINT_MAX);
494 if (threads == NULL) { 496 if (threads == NULL) {
495 pr_debug("thread_map__new\n"); 497 pr_debug("thread_map__new\n");
496 return -1; 498 return -1;
@@ -1008,12 +1010,9 @@ realloc:
1008static int test__PERF_RECORD(void) 1010static int test__PERF_RECORD(void)
1009{ 1011{
1010 struct perf_record_opts opts = { 1012 struct perf_record_opts opts = {
1011 .target_pid = -1,
1012 .target_tid = -1,
1013 .no_delay = true, 1013 .no_delay = true,
1014 .freq = 10, 1014 .freq = 10,
1015 .mmap_pages = 256, 1015 .mmap_pages = 256,
1016 .sample_id_all_avail = true,
1017 }; 1016 };
1018 cpu_set_t *cpu_mask = NULL; 1017 cpu_set_t *cpu_mask = NULL;
1019 size_t cpu_mask_size = 0; 1018 size_t cpu_mask_size = 0;
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void)
1054 * we're monitoring, the one forked there. 1053 * we're monitoring, the one forked there.
1055 */ 1054 */
1056 err = perf_evlist__create_maps(evlist, opts.target_pid, 1055 err = perf_evlist__create_maps(evlist, opts.target_pid,
1057 opts.target_tid, opts.cpu_list); 1056 opts.target_tid, UINT_MAX, opts.cpu_list);
1058 if (err < 0) { 1057 if (err < 0) {
1059 pr_debug("Not enough memory to create thread/cpu maps\n"); 1058 pr_debug("Not enough memory to create thread/cpu maps\n");
1060 goto out_delete_evlist; 1059 goto out_delete_evlist;
@@ -1296,6 +1295,173 @@ out:
1296 return (err < 0 || errs > 0) ? -1 : 0; 1295 return (err < 0 || errs > 0) ? -1 : 0;
1297} 1296}
1298 1297
1298
1299#if defined(__x86_64__) || defined(__i386__)
1300
1301#define barrier() asm volatile("" ::: "memory")
1302
1303static u64 rdpmc(unsigned int counter)
1304{
1305 unsigned int low, high;
1306
1307 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
1308
1309 return low | ((u64)high) << 32;
1310}
1311
1312static u64 rdtsc(void)
1313{
1314 unsigned int low, high;
1315
1316 asm volatile("rdtsc" : "=a" (low), "=d" (high));
1317
1318 return low | ((u64)high) << 32;
1319}
1320
1321static u64 mmap_read_self(void *addr)
1322{
1323 struct perf_event_mmap_page *pc = addr;
1324 u32 seq, idx, time_mult = 0, time_shift = 0;
1325 u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
1326
1327 do {
1328 seq = pc->lock;
1329 barrier();
1330
1331 enabled = pc->time_enabled;
1332 running = pc->time_running;
1333
1334 if (enabled != running) {
1335 cyc = rdtsc();
1336 time_mult = pc->time_mult;
1337 time_shift = pc->time_shift;
1338 time_offset = pc->time_offset;
1339 }
1340
1341 idx = pc->index;
1342 count = pc->offset;
1343 if (idx)
1344 count += rdpmc(idx - 1);
1345
1346 barrier();
1347 } while (pc->lock != seq);
1348
1349 if (enabled != running) {
1350 u64 quot, rem;
1351
1352 quot = (cyc >> time_shift);
1353 rem = cyc & ((1 << time_shift) - 1);
1354 delta = time_offset + quot * time_mult +
1355 ((rem * time_mult) >> time_shift);
1356
1357 enabled += delta;
1358 if (idx)
1359 running += delta;
1360
1361 quot = count / running;
1362 rem = count % running;
1363 count = quot * enabled + (rem * enabled) / running;
1364 }
1365
1366 return count;
1367}
1368
1369/*
1370 * If the RDPMC instruction faults then signal this back to the test parent task:
1371 */
1372static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
1373{
1374 exit(-1);
1375}
1376
1377static int __test__rdpmc(void)
1378{
1379 long page_size = sysconf(_SC_PAGE_SIZE);
1380 volatile int tmp = 0;
1381 u64 i, loops = 1000;
1382 int n;
1383 int fd;
1384 void *addr;
1385 struct perf_event_attr attr = {
1386 .type = PERF_TYPE_HARDWARE,
1387 .config = PERF_COUNT_HW_INSTRUCTIONS,
1388 .exclude_kernel = 1,
1389 };
1390 u64 delta_sum = 0;
1391 struct sigaction sa;
1392
1393 sigfillset(&sa.sa_mask);
1394 sa.sa_sigaction = segfault_handler;
1395 sigaction(SIGSEGV, &sa, NULL);
1396
1397 fprintf(stderr, "\n\n");
1398
1399 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
1400 if (fd < 0) {
1401 die("Error: sys_perf_event_open() syscall returned "
1402 "with %d (%s)\n", fd, strerror(errno));
1403 }
1404
1405 addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
1406 if (addr == (void *)(-1)) {
1407 die("Error: mmap() syscall returned "
1408 "with (%s)\n", strerror(errno));
1409 }
1410
1411 for (n = 0; n < 6; n++) {
1412 u64 stamp, now, delta;
1413
1414 stamp = mmap_read_self(addr);
1415
1416 for (i = 0; i < loops; i++)
1417 tmp++;
1418
1419 now = mmap_read_self(addr);
1420 loops *= 10;
1421
1422 delta = now - stamp;
1423 fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
1424
1425 delta_sum += delta;
1426 }
1427
1428 munmap(addr, page_size);
1429 close(fd);
1430
1431 fprintf(stderr, " ");
1432
1433 if (!delta_sum)
1434 return -1;
1435
1436 return 0;
1437}
1438
1439static int test__rdpmc(void)
1440{
1441 int status = 0;
1442 int wret = 0;
1443 int ret;
1444 int pid;
1445
1446 pid = fork();
1447 if (pid < 0)
1448 return -1;
1449
1450 if (!pid) {
1451 ret = __test__rdpmc();
1452
1453 exit(ret);
1454 }
1455
1456 wret = waitpid(pid, &status, 0);
1457 if (wret < 0 || status)
1458 return -1;
1459
1460 return 0;
1461}
1462
1463#endif
1464
1299static struct test { 1465static struct test {
1300 const char *desc; 1466 const char *desc;
1301 int (*func)(void); 1467 int (*func)(void);
@@ -1320,6 +1486,12 @@ static struct test {
1320 .desc = "parse events tests", 1486 .desc = "parse events tests",
1321 .func = test__parse_events, 1487 .func = test__parse_events,
1322 }, 1488 },
1489#if defined(__x86_64__) || defined(__i386__)
1490 {
1491 .desc = "x86 rdpmc test",
1492 .func = test__rdpmc,
1493 },
1494#endif
1323 { 1495 {
1324 .desc = "Validate PERF_RECORD_* events & perf_sample fields", 1496 .desc = "Validate PERF_RECORD_* events & perf_sample fields",
1325 .func = test__PERF_RECORD, 1497 .func = test__PERF_RECORD,
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
1412 if (symbol__init() < 0) 1584 if (symbol__init() < 0)
1413 return -1; 1585 return -1;
1414 1586
1415 setup_pager();
1416
1417 return __cmd_test(argc, argv); 1587 return __cmd_test(argc, argv);
1418} 1588}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ecff31257eb3..e3c63aef8efc 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,7 +64,6 @@
64#include <linux/unistd.h> 64#include <linux/unistd.h>
65#include <linux/types.h> 65#include <linux/types.h>
66 66
67
68void get_term_dimensions(struct winsize *ws) 67void get_term_dimensions(struct winsize *ws)
69{ 68{
70 char *s = getenv("LINES"); 69 char *s = getenv("LINES");
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg)
544 543
545static void *display_thread_tui(void *arg) 544static void *display_thread_tui(void *arg)
546{ 545{
546 struct perf_evsel *pos;
547 struct perf_top *top = arg; 547 struct perf_top *top = arg;
548 const char *help = "For a higher level overview, try: perf top --sort comm,dso"; 548 const char *help = "For a higher level overview, try: perf top --sort comm,dso";
549 549
550 perf_top__sort_new_samples(top); 550 perf_top__sort_new_samples(top);
551
552 /*
553 * Initialize the uid_filter_str, in the future the TUI will allow
554 * Zooming in/out UIDs. For now juse use whatever the user passed
555 * via --uid.
556 */
557 list_for_each_entry(pos, &top->evlist->entries, node)
558 pos->hists.uid_filter_str = top->uid_str;
559
551 perf_evlist__tui_browse_hists(top->evlist, help, 560 perf_evlist__tui_browse_hists(top->evlist, help,
552 perf_top__sort_new_samples, 561 perf_top__sort_new_samples,
553 top, top->delay_secs); 562 top, top->delay_secs);
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool,
668 return; 677 return;
669 } 678 }
670 679
680 if (!machine) {
681 pr_err("%u unprocessable samples recorded.",
682 top->session->hists.stats.nr_unprocessable_samples++);
683 return;
684 }
685
671 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) 686 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
672 top->exact_samples++; 687 top->exact_samples++;
673 688
@@ -861,7 +876,7 @@ fallback_missing_features:
861 if (top->exclude_guest_missing) 876 if (top->exclude_guest_missing)
862 attr->exclude_guest = attr->exclude_host = 0; 877 attr->exclude_guest = attr->exclude_host = 0;
863retry_sample_id: 878retry_sample_id:
864 attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; 879 attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
865try_again: 880try_again:
866 if (perf_evsel__open(counter, top->evlist->cpus, 881 if (perf_evsel__open(counter, top->evlist->cpus,
867 top->evlist->threads, top->group, 882 top->evlist->threads, top->group,
@@ -878,11 +893,11 @@ try_again:
878 "guest or host samples.\n"); 893 "guest or host samples.\n");
879 top->exclude_guest_missing = true; 894 top->exclude_guest_missing = true;
880 goto fallback_missing_features; 895 goto fallback_missing_features;
881 } else if (top->sample_id_all_avail) { 896 } else if (!top->sample_id_all_missing) {
882 /* 897 /*
883 * Old kernel, no attr->sample_id_type_all field 898 * Old kernel, no attr->sample_id_type_all field
884 */ 899 */
885 top->sample_id_all_avail = false; 900 top->sample_id_all_missing = true;
886 goto retry_sample_id; 901 goto retry_sample_id;
887 } 902 }
888 } 903 }
@@ -967,7 +982,7 @@ static int __cmd_top(struct perf_top *top)
967 if (ret) 982 if (ret)
968 goto out_delete; 983 goto out_delete;
969 984
970 if (top->target_tid != -1) 985 if (top->target_tid || top->uid != UINT_MAX)
971 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, 986 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
972 perf_event__process, 987 perf_event__process,
973 &top->session->host_machine); 988 &top->session->host_machine);
@@ -1105,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1105 struct perf_top top = { 1120 struct perf_top top = {
1106 .count_filter = 5, 1121 .count_filter = 5,
1107 .delay_secs = 2, 1122 .delay_secs = 2,
1108 .target_pid = -1, 1123 .uid = UINT_MAX,
1109 .target_tid = -1,
1110 .freq = 1000, /* 1 KHz */ 1124 .freq = 1000, /* 1 KHz */
1111 .sample_id_all_avail = true,
1112 .mmap_pages = 128, 1125 .mmap_pages = 128,
1113 .sym_pcnt_filter = 5, 1126 .sym_pcnt_filter = 5,
1114 }; 1127 };
@@ -1119,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1119 parse_events_option), 1132 parse_events_option),
1120 OPT_INTEGER('c', "count", &top.default_interval, 1133 OPT_INTEGER('c', "count", &top.default_interval,
1121 "event period to sample"), 1134 "event period to sample"),
1122 OPT_INTEGER('p', "pid", &top.target_pid, 1135 OPT_STRING('p', "pid", &top.target_pid, "pid",
1123 "profile events on existing process id"), 1136 "profile events on existing process id"),
1124 OPT_INTEGER('t', "tid", &top.target_tid, 1137 OPT_STRING('t', "tid", &top.target_tid, "tid",
1125 "profile events on existing thread id"), 1138 "profile events on existing thread id"),
1126 OPT_BOOLEAN('a', "all-cpus", &top.system_wide, 1139 OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1127 "system-wide collection from all CPUs"), 1140 "system-wide collection from all CPUs"),
@@ -1180,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1180 "Display raw encoding of assembly instructions (default)"), 1193 "Display raw encoding of assembly instructions (default)"),
1181 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1194 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1182 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1195 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1196 OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
1183 OPT_END() 1197 OPT_END()
1184 }; 1198 };
1185 1199
@@ -1205,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1205 1219
1206 setup_browser(false); 1220 setup_browser(false);
1207 1221
1222 top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
1223 if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
1224 goto out_delete_evlist;
1225
1208 /* CPU and PID are mutually exclusive */ 1226 /* CPU and PID are mutually exclusive */
1209 if (top.target_tid > 0 && top.cpu_list) { 1227 if (top.target_tid && top.cpu_list) {
1210 printf("WARNING: PID switch overriding CPU\n"); 1228 printf("WARNING: PID switch overriding CPU\n");
1211 sleep(1); 1229 sleep(1);
1212 top.cpu_list = NULL; 1230 top.cpu_list = NULL;
1213 } 1231 }
1214 1232
1215 if (top.target_pid != -1) 1233 if (top.target_pid)
1216 top.target_tid = top.target_pid; 1234 top.target_tid = top.target_pid;
1217 1235
1218 if (perf_evlist__create_maps(top.evlist, top.target_pid, 1236 if (perf_evlist__create_maps(top.evlist, top.target_pid,
1219 top.target_tid, top.cpu_list) < 0) 1237 top.target_tid, top.uid, top.cpu_list) < 0)
1220 usage_with_options(top_usage, options); 1238 usage_with_options(top_usage, options);
1221 1239
1222 if (!top.evlist->nr_entries && 1240 if (!top.evlist->nr_entries &&
@@ -1280,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1280 1298
1281 status = __cmd_top(&top); 1299 status = __cmd_top(&top);
1282 1300
1301out_delete_evlist:
1283 perf_evlist__delete(top.evlist); 1302 perf_evlist__delete(top.evlist);
1284 1303
1285 return status; 1304 return status;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 3afa39ac1d40..89e3355ab173 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -173,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr,
173 pid_t pid, int cpu, int group_fd, 173 pid_t pid, int cpu, int group_fd,
174 unsigned long flags) 174 unsigned long flags)
175{ 175{
176 attr->size = sizeof(*attr);
177 return syscall(__NR_perf_event_open, attr, pid, cpu, 176 return syscall(__NR_perf_event_open, attr, pid, cpu,
178 group_fd, flags); 177 group_fd, flags);
179} 178}
@@ -186,14 +185,32 @@ struct ip_callchain {
186 u64 ips[0]; 185 u64 ips[0];
187}; 186};
188 187
188struct branch_flags {
189 u64 mispred:1;
190 u64 predicted:1;
191 u64 reserved:62;
192};
193
194struct branch_entry {
195 u64 from;
196 u64 to;
197 struct branch_flags flags;
198};
199
200struct branch_stack {
201 u64 nr;
202 struct branch_entry entries[0];
203};
204
189extern bool perf_host, perf_guest; 205extern bool perf_host, perf_guest;
190extern const char perf_version_string[]; 206extern const char perf_version_string[];
191 207
192void pthread__unblock_sigwinch(void); 208void pthread__unblock_sigwinch(void);
193 209
194struct perf_record_opts { 210struct perf_record_opts {
195 pid_t target_pid; 211 const char *target_pid;
196 pid_t target_tid; 212 const char *target_tid;
213 uid_t uid;
197 bool call_graph; 214 bool call_graph;
198 bool group; 215 bool group;
199 bool inherit_stat; 216 bool inherit_stat;
@@ -204,13 +221,14 @@ struct perf_record_opts {
204 bool raw_samples; 221 bool raw_samples;
205 bool sample_address; 222 bool sample_address;
206 bool sample_time; 223 bool sample_time;
207 bool sample_id_all_avail; 224 bool sample_id_all_missing;
208 bool exclude_guest_missing; 225 bool exclude_guest_missing;
209 bool system_wide; 226 bool system_wide;
210 bool period; 227 bool period;
211 unsigned int freq; 228 unsigned int freq;
212 unsigned int mmap_pages; 229 unsigned int mmap_pages;
213 unsigned int user_freq; 230 unsigned int user_freq;
231 int branch_stack;
214 u64 default_interval; 232 u64 default_interval;
215 u64 user_interval; 233 u64 user_interval;
216 const char *cpu_list; 234 const char *cpu_list;
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index df638c438a9f..b11cca584238 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -19,7 +19,7 @@ def main():
19 cpus = perf.cpu_map() 19 cpus = perf.cpu_map()
20 threads = perf.thread_map() 20 threads = perf.thread_map()
21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0, 21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
22 wakeup_events = 1, sample_period = 1, 22 wakeup_events = 1, watermark = 1,
23 sample_id_all = 1, 23 sample_id_all = 1,
24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) 24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
25 evsel.open(cpus = cpus, threads = threads); 25 evsel.open(cpus = cpus, threads = threads);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 011ed2676604..e5a462f1d07c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -315,7 +315,7 @@ fallback:
315 "Please use:\n\n" 315 "Please use:\n\n"
316 " perf buildid-cache -av vmlinux\n\n" 316 " perf buildid-cache -av vmlinux\n\n"
317 "or:\n\n" 317 "or:\n\n"
318 " --vmlinux vmlinux", 318 " --vmlinux vmlinux\n",
319 sym->name, build_id_msg ?: ""); 319 sym->name, build_id_msg ?: "");
320 goto out_free_filename; 320 goto out_free_filename;
321 } 321 }
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c
index 5e230acae1e9..0a1adc1111fd 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/perf/util/bitmap.c
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
19 19
20 return w; 20 return w;
21} 21}
22
23void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
24 const unsigned long *bitmap2, int bits)
25{
26 int k;
27 int nr = BITS_TO_LONGS(bits);
28
29 for (k = 0; k < nr; k++)
30 dst[k] = bitmap1[k] | bitmap2[k];
31}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 6893eec693ab..adc72f09914d 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -166,6 +166,17 @@ out:
166 return cpus; 166 return cpus;
167} 167}
168 168
169size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
170{
171 int i;
172 size_t printed = fprintf(fp, "%d cpu%s: ",
173 map->nr, map->nr > 1 ? "s" : "");
174 for (i = 0; i < map->nr; ++i)
175 printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
176
177 return printed + fprintf(fp, "\n");
178}
179
169struct cpu_map *cpu_map__dummy_new(void) 180struct cpu_map *cpu_map__dummy_new(void)
170{ 181{
171 struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); 182 struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 072c0a374794..c41518573c6a 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,6 +1,8 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4#include <stdio.h>
5
4struct cpu_map { 6struct cpu_map {
5 int nr; 7 int nr;
6 int map[]; 8 int map[];
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list);
10struct cpu_map *cpu_map__dummy_new(void); 12struct cpu_map *cpu_map__dummy_new(void);
11void cpu_map__delete(struct cpu_map *map); 13void cpu_map__delete(struct cpu_map *map);
12 14
15size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
16
13#endif /* __PERF_CPUMAP_H */ 17#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 35073621e5de..aada3ac5e891 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * No surprises, and works with signed and unsigned chars. 4 * No surprises, and works with signed and unsigned chars.
5 */ 5 */
6#include "cache.h" 6#include "util.h"
7 7
8enum { 8enum {
9 S = GIT_SPACE, 9 S = GIT_SPACE,
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index ffc35e748e89..dd8b19319c03 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = {
15 0, 15 0,
16}; 16};
17 17
18/* use this to force a umount */
19void debugfs_force_cleanup(void)
20{
21 debugfs_find_mountpoint();
22 debugfs_premounted = 0;
23 debugfs_umount();
24}
25
26/* construct a full path to a debugfs element */
27int debugfs_make_path(const char *element, char *buffer, int size)
28{
29 int len;
30
31 if (strlen(debugfs_mountpoint) == 0) {
32 buffer[0] = '\0';
33 return -1;
34 }
35
36 len = strlen(debugfs_mountpoint) + strlen(element) + 1;
37 if (len >= size)
38 return len+1;
39
40 snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
41 return 0;
42}
43
44static int debugfs_found; 18static int debugfs_found;
45 19
46/* find the path to the mounted debugfs */ 20/* find the path to the mounted debugfs */
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs)
97 return 0; 71 return 0;
98} 72}
99 73
100
101int debugfs_valid_entry(const char *path)
102{
103 struct stat st;
104
105 if (stat(path, &st))
106 return -errno;
107
108 return 0;
109}
110
111static void debugfs_set_tracing_events_path(const char *mountpoint) 74static void debugfs_set_tracing_events_path(const char *mountpoint)
112{ 75{
113 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", 76 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint)
149 snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); 112 snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
150 debugfs_set_tracing_events_path(mountpoint); 113 debugfs_set_tracing_events_path(mountpoint);
151} 114}
152
153/* umount the debugfs */
154
155int debugfs_umount(void)
156{
157 char umountcmd[128];
158 int ret;
159
160 /* if it was already mounted, leave it */
161 if (debugfs_premounted)
162 return 0;
163
164 /* make sure it's a valid mount point */
165 ret = debugfs_valid_mountpoint(debugfs_mountpoint);
166 if (ret)
167 return ret;
168
169 snprintf(umountcmd, sizeof(umountcmd),
170 "/bin/umount %s", debugfs_mountpoint);
171 return system(umountcmd);
172}
173
174int debugfs_write(const char *entry, const char *value)
175{
176 char path[PATH_MAX + 1];
177 int ret, count;
178 int fd;
179
180 /* construct the path */
181 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
182
183 /* verify that it exists */
184 ret = debugfs_valid_entry(path);
185 if (ret)
186 return ret;
187
188 /* get how many chars we're going to write */
189 count = strlen(value);
190
191 /* open the debugfs entry */
192 fd = open(path, O_RDWR);
193 if (fd < 0)
194 return -errno;
195
196 while (count > 0) {
197 /* write it */
198 ret = write(fd, value, count);
199 if (ret <= 0) {
200 if (ret == EAGAIN)
201 continue;
202 close(fd);
203 return -errno;
204 }
205 count -= ret;
206 }
207
208 /* close it */
209 close(fd);
210
211 /* return success */
212 return 0;
213}
214
215/*
216 * read a debugfs entry
217 * returns the number of chars read or a negative errno
218 */
219int debugfs_read(const char *entry, char *buffer, size_t size)
220{
221 char path[PATH_MAX + 1];
222 int ret;
223 int fd;
224
225 /* construct the path */
226 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
227
228 /* verify that it exists */
229 ret = debugfs_valid_entry(path);
230 if (ret)
231 return ret;
232
233 /* open the debugfs entry */
234 fd = open(path, O_RDONLY);
235 if (fd < 0)
236 return -errno;
237
238 do {
239 /* read it */
240 ret = read(fd, buffer, size);
241 if (ret == 0) {
242 close(fd);
243 return EOF;
244 }
245 } while (ret < 0 && errno == EAGAIN);
246
247 /* close it */
248 close(fd);
249
250 /* make *sure* there's a null character at the end */
251 buffer[ret] = '\0';
252
253 /* return the number of chars read */
254 return ret;
255}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 4a878f735eb0..68f3e87ec57f 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -3,14 +3,8 @@
3 3
4const char *debugfs_find_mountpoint(void); 4const char *debugfs_find_mountpoint(void);
5int debugfs_valid_mountpoint(const char *debugfs); 5int debugfs_valid_mountpoint(const char *debugfs);
6int debugfs_valid_entry(const char *path);
7char *debugfs_mount(const char *mountpoint); 6char *debugfs_mount(const char *mountpoint);
8int debugfs_umount(void);
9void debugfs_set_path(const char *mountpoint); 7void debugfs_set_path(const char *mountpoint);
10int debugfs_write(const char *entry, const char *value);
11int debugfs_read(const char *entry, char *buffer, size_t size);
12void debugfs_force_cleanup(void);
13int debugfs_make_path(const char *element, char *buffer, int size);
14 8
15extern char debugfs_mountpoint[]; 9extern char debugfs_mountpoint[];
16extern char tracing_events_path[]; 10extern char tracing_events_path[];
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cbdeaad9c5e5..1b197280c621 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -81,6 +81,7 @@ struct perf_sample {
81 u32 raw_size; 81 u32 raw_size;
82 void *raw_data; 82 void *raw_data;
83 struct ip_callchain *callchain; 83 struct ip_callchain *callchain;
84 struct branch_stack *branch_stack;
84}; 85};
85 86
86#define BUILD_ID_SIZE 20 87#define BUILD_ID_SIZE 20
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ea32a061f1c8..159263d17c2d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
97 ++evlist->nr_entries; 97 ++evlist->nr_entries;
98} 98}
99 99
100static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 100void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
101 struct list_head *list, 101 struct list_head *list,
102 int nr_entries) 102 int nr_entries)
103{ 103{
104 list_splice_tail(list, &evlist->entries); 104 list_splice_tail(list, &evlist->entries);
105 evlist->nr_entries += nr_entries; 105 evlist->nr_entries += nr_entries;
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
597 return perf_evlist__mmap_per_cpu(evlist, prot, mask); 597 return perf_evlist__mmap_per_cpu(evlist, prot, mask);
598} 598}
599 599
600int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, 600int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
601 pid_t target_tid, const char *cpu_list) 601 const char *target_tid, uid_t uid, const char *cpu_list)
602{ 602{
603 evlist->threads = thread_map__new(target_pid, target_tid); 603 evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
604 604
605 if (evlist->threads == NULL) 605 if (evlist->threads == NULL)
606 return -1; 606 return -1;
607 607
608 if (cpu_list == NULL && target_tid != -1) 608 if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
609 evlist->cpus = cpu_map__dummy_new(); 609 evlist->cpus = cpu_map__dummy_new();
610 else 610 else
611 evlist->cpus = cpu_map__new(cpu_list); 611 evlist->cpus = cpu_map__new(cpu_list);
@@ -765,6 +765,7 @@ out_err:
765 list_for_each_entry_reverse(evsel, &evlist->entries, node) 765 list_for_each_entry_reverse(evsel, &evlist->entries, node)
766 perf_evsel__close(evsel, ncpus, nthreads); 766 perf_evsel__close(evsel, ncpus, nthreads);
767 767
768 errno = -err;
768 return err; 769 return err;
769} 770}
770 771
@@ -824,7 +825,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
824 exit(-1); 825 exit(-1);
825 } 826 }
826 827
827 if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) 828 if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
828 evlist->threads->map[0] = evlist->workload.pid; 829 evlist->threads->map[0] = evlist->workload.pid;
829 830
830 close(child_ready_pipe[1]); 831 close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8922aeed0467..21f1c9e57f13 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
106 evlist->threads = threads; 106 evlist->threads = threads;
107} 107}
108 108
109int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, 109int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
110 pid_t target_tid, const char *cpu_list); 110 const char *tid, uid_t uid, const char *cpu_list);
111void perf_evlist__delete_maps(struct perf_evlist *evlist); 111void perf_evlist__delete_maps(struct perf_evlist *evlist);
112int perf_evlist__set_filters(struct perf_evlist *evlist); 112int perf_evlist__set_filters(struct perf_evlist *evlist);
113 113
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
117 117
118bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); 118bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
119bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); 119bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
120
121void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
122 struct list_head *list,
123 int nr_entries);
124
120#endif /* __PERF_EVLIST_H */ 125#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7132ee834e0e..f421f7cbc0d3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
68 struct perf_event_attr *attr = &evsel->attr; 68 struct perf_event_attr *attr = &evsel->attr;
69 int track = !evsel->idx; /* only the first counter needs these */ 69 int track = !evsel->idx; /* only the first counter needs these */
70 70
71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 71 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
72 attr->inherit = !opts->no_inherit; 72 attr->inherit = !opts->no_inherit;
73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
74 PERF_FORMAT_TOTAL_TIME_RUNNING | 74 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
111 if (opts->period) 111 if (opts->period)
112 attr->sample_type |= PERF_SAMPLE_PERIOD; 112 attr->sample_type |= PERF_SAMPLE_PERIOD;
113 113
114 if (opts->sample_id_all_avail && 114 if (!opts->sample_id_all_missing &&
115 (opts->sample_time || opts->system_wide || 115 (opts->sample_time || opts->system_wide ||
116 !opts->no_inherit || opts->cpu_list)) 116 !opts->no_inherit || opts->cpu_list))
117 attr->sample_type |= PERF_SAMPLE_TIME; 117 attr->sample_type |= PERF_SAMPLE_TIME;
@@ -126,11 +126,15 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
126 attr->watermark = 0; 126 attr->watermark = 0;
127 attr->wakeup_events = 1; 127 attr->wakeup_events = 1;
128 } 128 }
129 if (opts->branch_stack) {
130 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
131 attr->branch_sample_type = opts->branch_stack;
132 }
129 133
130 attr->mmap = track; 134 attr->mmap = track;
131 attr->comm = track; 135 attr->comm = track;
132 136
133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 137 if (!opts->target_pid && !opts->target_tid && !opts->system_wide) {
134 attr->disabled = 1; 138 attr->disabled = 1;
135 attr->enable_on_exec = 1; 139 attr->enable_on_exec = 1;
136 } 140 }
@@ -536,7 +540,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
536 } 540 }
537 541
538 if (type & PERF_SAMPLE_READ) { 542 if (type & PERF_SAMPLE_READ) {
539 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 543 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
540 return -1; 544 return -1;
541 } 545 }
542 546
@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
576 data->raw_data = (void *) pdata; 580 data->raw_data = (void *) pdata;
577 } 581 }
578 582
583 if (type & PERF_SAMPLE_BRANCH_STACK) {
584 u64 sz;
585
586 data->branch_stack = (struct branch_stack *)array;
587 array++; /* nr */
588
589 sz = data->branch_stack->nr * sizeof(struct branch_entry);
590 sz /= sizeof(u64);
591 array += sz;
592 }
579 return 0; 593 return 0;
580} 594}
581 595
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 14bb035c5fd9..fcd9cf3ea63e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id)
63 return NULL; 63 return NULL;
64} 64}
65 65
66static const char *__perf_magic = "PERFFILE"; 66/*
67 * magic2 = "PERFILE2"
68 * must be a numerical value to let the endianness
69 * determine the memory layout. That way we are able
70 * to detect endianness when reading the perf.data file
71 * back.
72 *
73 * we check for legacy (PERFFILE) format.
74 */
75static const char *__perf_magic1 = "PERFFILE";
76static const u64 __perf_magic2 = 0x32454c4946524550ULL;
77static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
67 78
68#define PERF_MAGIC (*(u64 *)__perf_magic) 79#define PERF_MAGIC __perf_magic2
69 80
70struct perf_file_attr { 81struct perf_file_attr {
71 struct perf_event_attr attr; 82 struct perf_event_attr attr;
@@ -1012,6 +1023,12 @@ write_it:
1012 return do_write_string(fd, buffer); 1023 return do_write_string(fd, buffer);
1013} 1024}
1014 1025
1026static int write_branch_stack(int fd __used, struct perf_header *h __used,
1027 struct perf_evlist *evlist __used)
1028{
1029 return 0;
1030}
1031
1015static void print_hostname(struct perf_header *ph, int fd, FILE *fp) 1032static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
1016{ 1033{
1017 char *str = do_read_string(fd, ph); 1034 char *str = do_read_string(fd, ph);
@@ -1133,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
1133 uint64_t id; 1150 uint64_t id;
1134 void *buf = NULL; 1151 void *buf = NULL;
1135 char *str; 1152 char *str;
1136 u32 nre, sz, nr, i, j, msz; 1153 u32 nre, sz, nr, i, j;
1137 int ret; 1154 ssize_t ret;
1155 size_t msz;
1138 1156
1139 /* number of events */ 1157 /* number of events */
1140 ret = read(fd, &nre, sizeof(nre)); 1158 ret = read(fd, &nre, sizeof(nre));
@@ -1151,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
1151 if (ph->needs_swap) 1169 if (ph->needs_swap)
1152 sz = bswap_32(sz); 1170 sz = bswap_32(sz);
1153 1171
1154 /*
1155 * ensure it is at least to our ABI rev
1156 */
1157 if (sz < (u32)sizeof(attr))
1158 goto error;
1159
1160 memset(&attr, 0, sizeof(attr)); 1172 memset(&attr, 0, sizeof(attr));
1161 1173
1162 /* read entire region to sync up to next field */ 1174 /* buffer to hold on file attr struct */
1163 buf = malloc(sz); 1175 buf = malloc(sz);
1164 if (!buf) 1176 if (!buf)
1165 goto error; 1177 goto error;
1166 1178
1167 msz = sizeof(attr); 1179 msz = sizeof(attr);
1168 if (sz < msz) 1180 if (sz < (ssize_t)msz)
1169 msz = sz; 1181 msz = sz;
1170 1182
1171 for (i = 0 ; i < nre; i++) { 1183 for (i = 0 ; i < nre; i++) {
1172 1184
1185 /*
1186 * must read entire on-file attr struct to
1187 * sync up with layout.
1188 */
1173 ret = read(fd, buf, sz); 1189 ret = read(fd, buf, sz);
1174 if (ret != (ssize_t)sz) 1190 if (ret != (ssize_t)sz)
1175 goto error; 1191 goto error;
@@ -1305,25 +1321,204 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
1305 free(str); 1321 free(str);
1306} 1322}
1307 1323
1324static void print_branch_stack(struct perf_header *ph __used, int fd __used,
1325 FILE *fp)
1326{
1327 fprintf(fp, "# contains samples with branch stack\n");
1328}
1329
1330static int __event_process_build_id(struct build_id_event *bev,
1331 char *filename,
1332 struct perf_session *session)
1333{
1334 int err = -1;
1335 struct list_head *head;
1336 struct machine *machine;
1337 u16 misc;
1338 struct dso *dso;
1339 enum dso_kernel_type dso_type;
1340
1341 machine = perf_session__findnew_machine(session, bev->pid);
1342 if (!machine)
1343 goto out;
1344
1345 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1346
1347 switch (misc) {
1348 case PERF_RECORD_MISC_KERNEL:
1349 dso_type = DSO_TYPE_KERNEL;
1350 head = &machine->kernel_dsos;
1351 break;
1352 case PERF_RECORD_MISC_GUEST_KERNEL:
1353 dso_type = DSO_TYPE_GUEST_KERNEL;
1354 head = &machine->kernel_dsos;
1355 break;
1356 case PERF_RECORD_MISC_USER:
1357 case PERF_RECORD_MISC_GUEST_USER:
1358 dso_type = DSO_TYPE_USER;
1359 head = &machine->user_dsos;
1360 break;
1361 default:
1362 goto out;
1363 }
1364
1365 dso = __dsos__findnew(head, filename);
1366 if (dso != NULL) {
1367 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1368
1369 dso__set_build_id(dso, &bev->build_id);
1370
1371 if (filename[0] == '[')
1372 dso->kernel = dso_type;
1373
1374 build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1375 sbuild_id);
1376 pr_debug("build id event received for %s: %s\n",
1377 dso->long_name, sbuild_id);
1378 }
1379
1380 err = 0;
1381out:
1382 return err;
1383}
1384
1385static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1386 int input, u64 offset, u64 size)
1387{
1388 struct perf_session *session = container_of(header, struct perf_session, header);
1389 struct {
1390 struct perf_event_header header;
1391 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
1392 char filename[0];
1393 } old_bev;
1394 struct build_id_event bev;
1395 char filename[PATH_MAX];
1396 u64 limit = offset + size;
1397
1398 while (offset < limit) {
1399 ssize_t len;
1400
1401 if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1402 return -1;
1403
1404 if (header->needs_swap)
1405 perf_event_header__bswap(&old_bev.header);
1406
1407 len = old_bev.header.size - sizeof(old_bev);
1408 if (read(input, filename, len) != len)
1409 return -1;
1410
1411 bev.header = old_bev.header;
1412
1413 /*
1414 * As the pid is the missing value, we need to fill
1415 * it properly. The header.misc value give us nice hint.
1416 */
1417 bev.pid = HOST_KERNEL_ID;
1418 if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
1419 bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
1420 bev.pid = DEFAULT_GUEST_KERNEL_ID;
1421
1422 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
1423 __event_process_build_id(&bev, filename, session);
1424
1425 offset += bev.header.size;
1426 }
1427
1428 return 0;
1429}
1430
1431static int perf_header__read_build_ids(struct perf_header *header,
1432 int input, u64 offset, u64 size)
1433{
1434 struct perf_session *session = container_of(header, struct perf_session, header);
1435 struct build_id_event bev;
1436 char filename[PATH_MAX];
1437 u64 limit = offset + size, orig_offset = offset;
1438 int err = -1;
1439
1440 while (offset < limit) {
1441 ssize_t len;
1442
1443 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
1444 goto out;
1445
1446 if (header->needs_swap)
1447 perf_event_header__bswap(&bev.header);
1448
1449 len = bev.header.size - sizeof(bev);
1450 if (read(input, filename, len) != len)
1451 goto out;
1452 /*
1453 * The a1645ce1 changeset:
1454 *
1455 * "perf: 'perf kvm' tool for monitoring guest performance from host"
1456 *
1457 * Added a field to struct build_id_event that broke the file
1458 * format.
1459 *
1460 * Since the kernel build-id is the first entry, process the
1461 * table using the old format if the well known
1462 * '[kernel.kallsyms]' string for the kernel build-id has the
1463 * first 4 characters chopped off (where the pid_t sits).
1464 */
1465 if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
1466 if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
1467 return -1;
1468 return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
1469 }
1470
1471 __event_process_build_id(&bev, filename, session);
1472
1473 offset += bev.header.size;
1474 }
1475 err = 0;
1476out:
1477 return err;
1478}
1479
1480static int process_trace_info(struct perf_file_section *section __unused,
1481 struct perf_header *ph __unused,
1482 int feat __unused, int fd)
1483{
1484 trace_report(fd, false);
1485 return 0;
1486}
1487
1488static int process_build_id(struct perf_file_section *section,
1489 struct perf_header *ph,
1490 int feat __unused, int fd)
1491{
1492 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1493 pr_debug("Failed to read buildids, continuing...\n");
1494 return 0;
1495}
1496
1308struct feature_ops { 1497struct feature_ops {
1309 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); 1498 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
1310 void (*print)(struct perf_header *h, int fd, FILE *fp); 1499 void (*print)(struct perf_header *h, int fd, FILE *fp);
1500 int (*process)(struct perf_file_section *section,
1501 struct perf_header *h, int feat, int fd);
1311 const char *name; 1502 const char *name;
1312 bool full_only; 1503 bool full_only;
1313}; 1504};
1314 1505
1315#define FEAT_OPA(n, func) \ 1506#define FEAT_OPA(n, func) \
1316 [n] = { .name = #n, .write = write_##func, .print = print_##func } 1507 [n] = { .name = #n, .write = write_##func, .print = print_##func }
1508#define FEAT_OPP(n, func) \
1509 [n] = { .name = #n, .write = write_##func, .print = print_##func, \
1510 .process = process_##func }
1317#define FEAT_OPF(n, func) \ 1511#define FEAT_OPF(n, func) \
1318 [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } 1512 [n] = { .name = #n, .write = write_##func, .print = print_##func, \
1513 .full_only = true }
1319 1514
1320/* feature_ops not implemented: */ 1515/* feature_ops not implemented: */
1321#define print_trace_info NULL 1516#define print_trace_info NULL
1322#define print_build_id NULL 1517#define print_build_id NULL
1323 1518
1324static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { 1519static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1325 FEAT_OPA(HEADER_TRACE_INFO, trace_info), 1520 FEAT_OPP(HEADER_TRACE_INFO, trace_info),
1326 FEAT_OPA(HEADER_BUILD_ID, build_id), 1521 FEAT_OPP(HEADER_BUILD_ID, build_id),
1327 FEAT_OPA(HEADER_HOSTNAME, hostname), 1522 FEAT_OPA(HEADER_HOSTNAME, hostname),
1328 FEAT_OPA(HEADER_OSRELEASE, osrelease), 1523 FEAT_OPA(HEADER_OSRELEASE, osrelease),
1329 FEAT_OPA(HEADER_VERSION, version), 1524 FEAT_OPA(HEADER_VERSION, version),
@@ -1336,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1336 FEAT_OPA(HEADER_CMDLINE, cmdline), 1531 FEAT_OPA(HEADER_CMDLINE, cmdline),
1337 FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), 1532 FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
1338 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), 1533 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
1534 FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
1339}; 1535};
1340 1536
1341struct header_print_data { 1537struct header_print_data {
@@ -1620,24 +1816,128 @@ out_free:
1620 return err; 1816 return err;
1621} 1817}
1622 1818
1819static const int attr_file_abi_sizes[] = {
1820 [0] = PERF_ATTR_SIZE_VER0,
1821 [1] = PERF_ATTR_SIZE_VER1,
1822 0,
1823};
1824
1825/*
1826 * In the legacy file format, the magic number is not used to encode endianness.
1827 * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
1828 * on ABI revisions, we need to try all combinations for all endianness to
1829 * detect the endianness.
1830 */
1831static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
1832{
1833 uint64_t ref_size, attr_size;
1834 int i;
1835
1836 for (i = 0 ; attr_file_abi_sizes[i]; i++) {
1837 ref_size = attr_file_abi_sizes[i]
1838 + sizeof(struct perf_file_section);
1839 if (hdr_sz != ref_size) {
1840 attr_size = bswap_64(hdr_sz);
1841 if (attr_size != ref_size)
1842 continue;
1843
1844 ph->needs_swap = true;
1845 }
1846 pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
1847 i,
1848 ph->needs_swap);
1849 return 0;
1850 }
1851 /* could not determine endianness */
1852 return -1;
1853}
1854
1855#define PERF_PIPE_HDR_VER0 16
1856
1857static const size_t attr_pipe_abi_sizes[] = {
1858 [0] = PERF_PIPE_HDR_VER0,
1859 0,
1860};
1861
1862/*
1863 * In the legacy pipe format, there is an implicit assumption that endiannesss
1864 * between host recording the samples, and host parsing the samples is the
1865 * same. This is not always the case given that the pipe output may always be
1866 * redirected into a file and analyzed on a different machine with possibly a
1867 * different endianness and perf_event ABI revsions in the perf tool itself.
1868 */
1869static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
1870{
1871 u64 attr_size;
1872 int i;
1873
1874 for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
1875 if (hdr_sz != attr_pipe_abi_sizes[i]) {
1876 attr_size = bswap_64(hdr_sz);
1877 if (attr_size != hdr_sz)
1878 continue;
1879
1880 ph->needs_swap = true;
1881 }
1882 pr_debug("Pipe ABI%d perf.data file detected\n", i);
1883 return 0;
1884 }
1885 return -1;
1886}
1887
1888static int check_magic_endian(u64 magic, uint64_t hdr_sz,
1889 bool is_pipe, struct perf_header *ph)
1890{
1891 int ret;
1892
1893 /* check for legacy format */
1894 ret = memcmp(&magic, __perf_magic1, sizeof(magic));
1895 if (ret == 0) {
1896 pr_debug("legacy perf.data format\n");
1897 if (is_pipe)
1898 return try_all_pipe_abis(hdr_sz, ph);
1899
1900 return try_all_file_abis(hdr_sz, ph);
1901 }
1902 /*
1903 * the new magic number serves two purposes:
1904 * - unique number to identify actual perf.data files
1905 * - encode endianness of file
1906 */
1907
1908 /* check magic number with one endianness */
1909 if (magic == __perf_magic2)
1910 return 0;
1911
1912 /* check magic number with opposite endianness */
1913 if (magic != __perf_magic2_sw)
1914 return -1;
1915
1916 ph->needs_swap = true;
1917
1918 return 0;
1919}
1920
1623int perf_file_header__read(struct perf_file_header *header, 1921int perf_file_header__read(struct perf_file_header *header,
1624 struct perf_header *ph, int fd) 1922 struct perf_header *ph, int fd)
1625{ 1923{
1924 int ret;
1925
1626 lseek(fd, 0, SEEK_SET); 1926 lseek(fd, 0, SEEK_SET);
1627 1927
1628 if (readn(fd, header, sizeof(*header)) <= 0 || 1928 ret = readn(fd, header, sizeof(*header));
1629 memcmp(&header->magic, __perf_magic, sizeof(header->magic))) 1929 if (ret <= 0)
1630 return -1; 1930 return -1;
1631 1931
1632 if (header->attr_size != sizeof(struct perf_file_attr)) { 1932 if (check_magic_endian(header->magic,
1633 u64 attr_size = bswap_64(header->attr_size); 1933 header->attr_size, false, ph) < 0) {
1634 1934 pr_debug("magic/endian check failed\n");
1635 if (attr_size != sizeof(struct perf_file_attr)) 1935 return -1;
1636 return -1; 1936 }
1637 1937
1938 if (ph->needs_swap) {
1638 mem_bswap_64(header, offsetof(struct perf_file_header, 1939 mem_bswap_64(header, offsetof(struct perf_file_header,
1639 adds_features)); 1940 adds_features));
1640 ph->needs_swap = true;
1641 } 1941 }
1642 1942
1643 if (header->size != sizeof(*header)) { 1943 if (header->size != sizeof(*header)) {
@@ -1689,156 +1989,6 @@ int perf_file_header__read(struct perf_file_header *header,
1689 return 0; 1989 return 0;
1690} 1990}
1691 1991
1692static int __event_process_build_id(struct build_id_event *bev,
1693 char *filename,
1694 struct perf_session *session)
1695{
1696 int err = -1;
1697 struct list_head *head;
1698 struct machine *machine;
1699 u16 misc;
1700 struct dso *dso;
1701 enum dso_kernel_type dso_type;
1702
1703 machine = perf_session__findnew_machine(session, bev->pid);
1704 if (!machine)
1705 goto out;
1706
1707 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1708
1709 switch (misc) {
1710 case PERF_RECORD_MISC_KERNEL:
1711 dso_type = DSO_TYPE_KERNEL;
1712 head = &machine->kernel_dsos;
1713 break;
1714 case PERF_RECORD_MISC_GUEST_KERNEL:
1715 dso_type = DSO_TYPE_GUEST_KERNEL;
1716 head = &machine->kernel_dsos;
1717 break;
1718 case PERF_RECORD_MISC_USER:
1719 case PERF_RECORD_MISC_GUEST_USER:
1720 dso_type = DSO_TYPE_USER;
1721 head = &machine->user_dsos;
1722 break;
1723 default:
1724 goto out;
1725 }
1726
1727 dso = __dsos__findnew(head, filename);
1728 if (dso != NULL) {
1729 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1730
1731 dso__set_build_id(dso, &bev->build_id);
1732
1733 if (filename[0] == '[')
1734 dso->kernel = dso_type;
1735
1736 build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1737 sbuild_id);
1738 pr_debug("build id event received for %s: %s\n",
1739 dso->long_name, sbuild_id);
1740 }
1741
1742 err = 0;
1743out:
1744 return err;
1745}
1746
1747static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1748 int input, u64 offset, u64 size)
1749{
1750 struct perf_session *session = container_of(header, struct perf_session, header);
1751 struct {
1752 struct perf_event_header header;
1753 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
1754 char filename[0];
1755 } old_bev;
1756 struct build_id_event bev;
1757 char filename[PATH_MAX];
1758 u64 limit = offset + size;
1759
1760 while (offset < limit) {
1761 ssize_t len;
1762
1763 if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1764 return -1;
1765
1766 if (header->needs_swap)
1767 perf_event_header__bswap(&old_bev.header);
1768
1769 len = old_bev.header.size - sizeof(old_bev);
1770 if (read(input, filename, len) != len)
1771 return -1;
1772
1773 bev.header = old_bev.header;
1774
1775 /*
1776 * As the pid is the missing value, we need to fill
1777 * it properly. The header.misc value give us nice hint.
1778 */
1779 bev.pid = HOST_KERNEL_ID;
1780 if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
1781 bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
1782 bev.pid = DEFAULT_GUEST_KERNEL_ID;
1783
1784 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
1785 __event_process_build_id(&bev, filename, session);
1786
1787 offset += bev.header.size;
1788 }
1789
1790 return 0;
1791}
1792
1793static int perf_header__read_build_ids(struct perf_header *header,
1794 int input, u64 offset, u64 size)
1795{
1796 struct perf_session *session = container_of(header, struct perf_session, header);
1797 struct build_id_event bev;
1798 char filename[PATH_MAX];
1799 u64 limit = offset + size, orig_offset = offset;
1800 int err = -1;
1801
1802 while (offset < limit) {
1803 ssize_t len;
1804
1805 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
1806 goto out;
1807
1808 if (header->needs_swap)
1809 perf_event_header__bswap(&bev.header);
1810
1811 len = bev.header.size - sizeof(bev);
1812 if (read(input, filename, len) != len)
1813 goto out;
1814 /*
1815 * The a1645ce1 changeset:
1816 *
1817 * "perf: 'perf kvm' tool for monitoring guest performance from host"
1818 *
1819 * Added a field to struct build_id_event that broke the file
1820 * format.
1821 *
1822 * Since the kernel build-id is the first entry, process the
1823 * table using the old format if the well known
1824 * '[kernel.kallsyms]' string for the kernel build-id has the
1825 * first 4 characters chopped off (where the pid_t sits).
1826 */
1827 if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
1828 if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
1829 return -1;
1830 return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
1831 }
1832
1833 __event_process_build_id(&bev, filename, session);
1834
1835 offset += bev.header.size;
1836 }
1837 err = 0;
1838out:
1839 return err;
1840}
1841
1842static int perf_file_section__process(struct perf_file_section *section, 1992static int perf_file_section__process(struct perf_file_section *section,
1843 struct perf_header *ph, 1993 struct perf_header *ph,
1844 int feat, int fd, void *data __used) 1994 int feat, int fd, void *data __used)
@@ -1854,40 +2004,32 @@ static int perf_file_section__process(struct perf_file_section *section,
1854 return 0; 2004 return 0;
1855 } 2005 }
1856 2006
1857 switch (feat) { 2007 if (!feat_ops[feat].process)
1858 case HEADER_TRACE_INFO: 2008 return 0;
1859 trace_report(fd, false);
1860 break;
1861 case HEADER_BUILD_ID:
1862 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1863 pr_debug("Failed to read buildids, continuing...\n");
1864 break;
1865 default:
1866 break;
1867 }
1868 2009
1869 return 0; 2010 return feat_ops[feat].process(section, ph, feat, fd);
1870} 2011}
1871 2012
1872static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, 2013static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
1873 struct perf_header *ph, int fd, 2014 struct perf_header *ph, int fd,
1874 bool repipe) 2015 bool repipe)
1875{ 2016{
1876 if (readn(fd, header, sizeof(*header)) <= 0 || 2017 int ret;
1877 memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
1878 return -1;
1879 2018
1880 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) 2019 ret = readn(fd, header, sizeof(*header));
2020 if (ret <= 0)
1881 return -1; 2021 return -1;
1882 2022
1883 if (header->size != sizeof(*header)) { 2023 if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
1884 u64 size = bswap_64(header->size); 2024 pr_debug("endian/magic failed\n");
2025 return -1;
2026 }
1885 2027
1886 if (size != sizeof(*header)) 2028 if (ph->needs_swap)
1887 return -1; 2029 header->size = bswap_64(header->size);
1888 2030
1889 ph->needs_swap = true; 2031 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
1890 } 2032 return -1;
1891 2033
1892 return 0; 2034 return 0;
1893} 2035}
@@ -1908,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
1908 return 0; 2050 return 0;
1909} 2051}
1910 2052
2053static int read_attr(int fd, struct perf_header *ph,
2054 struct perf_file_attr *f_attr)
2055{
2056 struct perf_event_attr *attr = &f_attr->attr;
2057 size_t sz, left;
2058 size_t our_sz = sizeof(f_attr->attr);
2059 int ret;
2060
2061 memset(f_attr, 0, sizeof(*f_attr));
2062
2063 /* read minimal guaranteed structure */
2064 ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
2065 if (ret <= 0) {
2066 pr_debug("cannot read %d bytes of header attr\n",
2067 PERF_ATTR_SIZE_VER0);
2068 return -1;
2069 }
2070
2071 /* on file perf_event_attr size */
2072 sz = attr->size;
2073
2074 if (ph->needs_swap)
2075 sz = bswap_32(sz);
2076
2077 if (sz == 0) {
2078 /* assume ABI0 */
2079 sz = PERF_ATTR_SIZE_VER0;
2080 } else if (sz > our_sz) {
2081 pr_debug("file uses a more recent and unsupported ABI"
2082 " (%zu bytes extra)\n", sz - our_sz);
2083 return -1;
2084 }
2085 /* what we have not yet read and that we know about */
2086 left = sz - PERF_ATTR_SIZE_VER0;
2087 if (left) {
2088 void *ptr = attr;
2089 ptr += PERF_ATTR_SIZE_VER0;
2090
2091 ret = readn(fd, ptr, left);
2092 }
2093 /* read perf_file_section, ids are read in caller */
2094 ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
2095
2096 return ret <= 0 ? -1 : 0;
2097}
2098
1911int perf_session__read_header(struct perf_session *session, int fd) 2099int perf_session__read_header(struct perf_session *session, int fd)
1912{ 2100{
1913 struct perf_header *header = &session->header; 2101 struct perf_header *header = &session->header;
@@ -1923,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
1923 if (session->fd_pipe) 2111 if (session->fd_pipe)
1924 return perf_header__read_pipe(session, fd); 2112 return perf_header__read_pipe(session, fd);
1925 2113
1926 if (perf_file_header__read(&f_header, header, fd) < 0) { 2114 if (perf_file_header__read(&f_header, header, fd) < 0)
1927 pr_debug("incompatible file format\n");
1928 return -EINVAL; 2115 return -EINVAL;
1929 }
1930 2116
1931 nr_attrs = f_header.attrs.size / sizeof(f_attr); 2117 nr_attrs = f_header.attrs.size / f_header.attr_size;
1932 lseek(fd, f_header.attrs.offset, SEEK_SET); 2118 lseek(fd, f_header.attrs.offset, SEEK_SET);
1933 2119
1934 for (i = 0; i < nr_attrs; i++) { 2120 for (i = 0; i < nr_attrs; i++) {
1935 struct perf_evsel *evsel; 2121 struct perf_evsel *evsel;
1936 off_t tmp; 2122 off_t tmp;
1937 2123
1938 if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) 2124 if (read_attr(fd, header, &f_attr) < 0)
1939 goto out_errno; 2125 goto out_errno;
1940 2126
1941 if (header->needs_swap) 2127 if (header->needs_swap)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ac4ec956024e..21a6be09c129 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -11,6 +11,7 @@
11 11
12enum { 12enum {
13 HEADER_RESERVED = 0, /* always cleared */ 13 HEADER_RESERVED = 0, /* always cleared */
14 HEADER_FIRST_FEATURE = 1,
14 HEADER_TRACE_INFO = 1, 15 HEADER_TRACE_INFO = 1,
15 HEADER_BUILD_ID, 16 HEADER_BUILD_ID,
16 17
@@ -26,7 +27,7 @@ enum {
26 HEADER_EVENT_DESC, 27 HEADER_EVENT_DESC,
27 HEADER_CPU_TOPOLOGY, 28 HEADER_CPU_TOPOLOGY,
28 HEADER_NUMA_TOPOLOGY, 29 HEADER_NUMA_TOPOLOGY,
29 30 HEADER_BRANCH_STACK,
30 HEADER_LAST_FEATURE, 31 HEADER_LAST_FEATURE,
31 HEADER_FEAT_BITS = 256, 32 HEADER_FEAT_BITS = 256,
32}; 33};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index e11e482bd185..3dc99a9b71f5 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
50 hists__set_col_len(hists, col, 0); 50 hists__set_col_len(hists, col, 0);
51} 51}
52 52
53static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
54{
55 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
56
57 if (hists__col_len(hists, dso) < unresolved_col_width &&
58 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
59 !symbol_conf.dso_list)
60 hists__set_col_len(hists, dso, unresolved_col_width);
61}
62
53static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) 63static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
54{ 64{
65 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
55 u16 len; 66 u16 len;
56 67
57 if (h->ms.sym) 68 if (h->ms.sym)
58 hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); 69 hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
59 else { 70 else
60 const unsigned int unresolved_col_width = BITS_PER_LONG / 4; 71 hists__set_unres_dso_col_len(hists, HISTC_DSO);
61
62 if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
63 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
64 !symbol_conf.dso_list)
65 hists__set_col_len(hists, HISTC_DSO,
66 unresolved_col_width);
67 }
68 72
69 len = thread__comm_len(h->thread); 73 len = thread__comm_len(h->thread);
70 if (hists__new_col_len(hists, HISTC_COMM, len)) 74 if (hists__new_col_len(hists, HISTC_COMM, len))
@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
74 len = dso__name_len(h->ms.map->dso); 78 len = dso__name_len(h->ms.map->dso);
75 hists__new_col_len(hists, HISTC_DSO, len); 79 hists__new_col_len(hists, HISTC_DSO, len);
76 } 80 }
81
82 if (h->branch_info) {
83 int symlen;
84 /*
85 * +4 accounts for '[x] ' priv level info
86 * +2 account of 0x prefix on raw addresses
87 */
88 if (h->branch_info->from.sym) {
89 symlen = (int)h->branch_info->from.sym->namelen + 4;
90 hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
91
92 symlen = dso__name_len(h->branch_info->from.map->dso);
93 hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
94 } else {
95 symlen = unresolved_col_width + 4 + 2;
96 hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
97 hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
98 }
99
100 if (h->branch_info->to.sym) {
101 symlen = (int)h->branch_info->to.sym->namelen + 4;
102 hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
103
104 symlen = dso__name_len(h->branch_info->to.map->dso);
105 hists__new_col_len(hists, HISTC_DSO_TO, symlen);
106 } else {
107 symlen = unresolved_col_width + 4 + 2;
108 hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
109 hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
110 }
111 }
77} 112}
78 113
79static void hist_entry__add_cpumode_period(struct hist_entry *he, 114static void hist_entry__add_cpumode_period(struct hist_entry *he,
@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
195 return 0; 230 return 0;
196} 231}
197 232
198struct hist_entry *__hists__add_entry(struct hists *hists, 233static struct hist_entry *add_hist_entry(struct hists *hists,
234 struct hist_entry *entry,
199 struct addr_location *al, 235 struct addr_location *al,
200 struct symbol *sym_parent, u64 period) 236 u64 period)
201{ 237{
202 struct rb_node **p; 238 struct rb_node **p;
203 struct rb_node *parent = NULL; 239 struct rb_node *parent = NULL;
204 struct hist_entry *he; 240 struct hist_entry *he;
205 struct hist_entry entry = {
206 .thread = al->thread,
207 .ms = {
208 .map = al->map,
209 .sym = al->sym,
210 },
211 .cpu = al->cpu,
212 .ip = al->addr,
213 .level = al->level,
214 .period = period,
215 .parent = sym_parent,
216 .filtered = symbol__parent_filter(sym_parent),
217 };
218 int cmp; 241 int cmp;
219 242
220 pthread_mutex_lock(&hists->lock); 243 pthread_mutex_lock(&hists->lock);
@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
225 parent = *p; 248 parent = *p;
226 he = rb_entry(parent, struct hist_entry, rb_node_in); 249 he = rb_entry(parent, struct hist_entry, rb_node_in);
227 250
228 cmp = hist_entry__cmp(&entry, he); 251 cmp = hist_entry__cmp(entry, he);
229 252
230 if (!cmp) { 253 if (!cmp) {
231 he->period += period; 254 he->period += period;
@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
239 p = &(*p)->rb_right; 262 p = &(*p)->rb_right;
240 } 263 }
241 264
242 he = hist_entry__new(&entry); 265 he = hist_entry__new(entry);
243 if (!he) 266 if (!he)
244 goto out_unlock; 267 goto out_unlock;
245 268
@@ -252,6 +275,51 @@ out_unlock:
252 return he; 275 return he;
253} 276}
254 277
278struct hist_entry *__hists__add_branch_entry(struct hists *self,
279 struct addr_location *al,
280 struct symbol *sym_parent,
281 struct branch_info *bi,
282 u64 period)
283{
284 struct hist_entry entry = {
285 .thread = al->thread,
286 .ms = {
287 .map = bi->to.map,
288 .sym = bi->to.sym,
289 },
290 .cpu = al->cpu,
291 .ip = bi->to.addr,
292 .level = al->level,
293 .period = period,
294 .parent = sym_parent,
295 .filtered = symbol__parent_filter(sym_parent),
296 .branch_info = bi,
297 };
298
299 return add_hist_entry(self, &entry, al, period);
300}
301
302struct hist_entry *__hists__add_entry(struct hists *self,
303 struct addr_location *al,
304 struct symbol *sym_parent, u64 period)
305{
306 struct hist_entry entry = {
307 .thread = al->thread,
308 .ms = {
309 .map = al->map,
310 .sym = al->sym,
311 },
312 .cpu = al->cpu,
313 .ip = al->addr,
314 .level = al->level,
315 .period = period,
316 .parent = sym_parent,
317 .filtered = symbol__parent_filter(sym_parent),
318 };
319
320 return add_hist_entry(self, &entry, al, period);
321}
322
255int64_t 323int64_t
256hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) 324hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
257{ 325{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f55f0a8d1f81..9413f3e31fea 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -32,6 +32,7 @@ struct events_stats {
32 u32 nr_unknown_events; 32 u32 nr_unknown_events;
33 u32 nr_invalid_chains; 33 u32 nr_invalid_chains;
34 u32 nr_unknown_id; 34 u32 nr_unknown_id;
35 u32 nr_unprocessable_samples;
35}; 36};
36 37
37enum hist_column { 38enum hist_column {
@@ -41,6 +42,11 @@ enum hist_column {
41 HISTC_COMM, 42 HISTC_COMM,
42 HISTC_PARENT, 43 HISTC_PARENT,
43 HISTC_CPU, 44 HISTC_CPU,
45 HISTC_MISPREDICT,
46 HISTC_SYMBOL_FROM,
47 HISTC_SYMBOL_TO,
48 HISTC_DSO_FROM,
49 HISTC_DSO_TO,
44 HISTC_NR_COLS, /* Last entry */ 50 HISTC_NR_COLS, /* Last entry */
45}; 51};
46 52
@@ -55,6 +61,7 @@ struct hists {
55 u64 nr_entries; 61 u64 nr_entries;
56 const struct thread *thread_filter; 62 const struct thread *thread_filter;
57 const struct dso *dso_filter; 63 const struct dso *dso_filter;
64 const char *uid_filter_str;
58 pthread_mutex_t lock; 65 pthread_mutex_t lock;
59 struct events_stats stats; 66 struct events_stats stats;
60 u64 event_stream; 67 u64 event_stream;
@@ -72,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
72 struct hists *hists); 79 struct hists *hists);
73void hist_entry__free(struct hist_entry *); 80void hist_entry__free(struct hist_entry *);
74 81
82struct hist_entry *__hists__add_branch_entry(struct hists *self,
83 struct addr_location *al,
84 struct symbol *sym_parent,
85 struct branch_info *bi,
86 u64 period);
87
75void hists__output_resort(struct hists *self); 88void hists__output_resort(struct hists *self);
76void hists__output_resort_threaded(struct hists *hists); 89void hists__output_resort_threaded(struct hists *hists);
77void hists__collapse_resort(struct hists *self); 90void hists__collapse_resort(struct hists *self);
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
index bb4198e7837a..afe38199e922 100644
--- a/tools/perf/util/include/asm/dwarf2.h
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -2,10 +2,12 @@
2#ifndef PERF_DWARF2_H 2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H 3#define PERF_DWARF2_H
4 4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ 5/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
6 6
7#define CFI_STARTPROC 7#define CFI_STARTPROC
8#define CFI_ENDPROC 8#define CFI_ENDPROC
9#define CFI_REMEMBER_STATE
10#define CFI_RESTORE_STATE
9 11
10#endif /* PERF_DWARF2_H */ 12#endif /* PERF_DWARF2_H */
11 13
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
index eda4416efa0a..bb162e40c76c 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -5,6 +5,8 @@
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6 6
7int __bitmap_weight(const unsigned long *bitmap, int bits); 7int __bitmap_weight(const unsigned long *bitmap, int bits);
8void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
9 const unsigned long *bitmap2, int bits);
8 10
9#define BITMAP_LAST_WORD_MASK(nbits) \ 11#define BITMAP_LAST_WORD_MASK(nbits) \
10( \ 12( \
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
32 return __bitmap_weight(src, nbits); 34 return __bitmap_weight(src, nbits);
33} 35}
34 36
37static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
38 const unsigned long *src2, int nbits)
39{
40 if (small_const_nbits(nbits))
41 *dst = *src1 | *src2;
42 else
43 __bitmap_or(dst, src1, src2, nbits);
44}
45
35#endif /* _PERF_BITOPS_H */ 46#endif /* _PERF_BITOPS_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 316aa0ab7122..dea6d1c1a954 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp)
212 self->start, self->end, self->pgoff, self->dso->name); 212 self->start, self->end, self->pgoff, self->dso->name);
213} 213}
214 214
215size_t map__fprintf_dsoname(struct map *map, FILE *fp)
216{
217 const char *dsoname;
218
219 if (map && map->dso && (map->dso->name || map->dso->long_name)) {
220 if (symbol_conf.show_kernel_path && map->dso->long_name)
221 dsoname = map->dso->long_name;
222 else if (map->dso->name)
223 dsoname = map->dso->name;
224 } else
225 dsoname = "[unknown]";
226
227 return fprintf(fp, "%s", dsoname);
228}
229
215/* 230/*
216 * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. 231 * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
217 * map->dso->adjust_symbols==1 for ET_EXEC-like cases. 232 * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 2b8017f8a930..b100c20b7f94 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -118,6 +118,7 @@ void map__delete(struct map *self);
118struct map *map__clone(struct map *self); 118struct map *map__clone(struct map *self);
119int map__overlap(struct map *l, struct map *r); 119int map__overlap(struct map *l, struct map *r);
120size_t map__fprintf(struct map *self, FILE *fp); 120size_t map__fprintf(struct map *self, FILE *fp);
121size_t map__fprintf_dsoname(struct map *map, FILE *fp);
121 122
122int map__load(struct map *self, symbol_filter_t filter); 123int map__load(struct map *self, symbol_filter_t filter);
123struct symbol *map__find_symbol(struct map *self, 124struct symbol *map__find_symbol(struct map *self,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e33554a562b3..8a8ee64e72d1 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -34,7 +34,6 @@
34 34
35#include "util.h" 35#include "util.h"
36#include "event.h" 36#include "event.h"
37#include "string.h"
38#include "strlist.h" 37#include "strlist.h"
39#include "debug.h" 38#include "debug.h"
40#include "cache.h" 39#include "cache.h"
@@ -273,10 +272,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
273/* Try to find perf_probe_event with debuginfo */ 272/* Try to find perf_probe_event with debuginfo */
274static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 273static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
275 struct probe_trace_event **tevs, 274 struct probe_trace_event **tevs,
276 int max_tevs, const char *module) 275 int max_tevs, const char *target)
277{ 276{
278 bool need_dwarf = perf_probe_event_need_dwarf(pev); 277 bool need_dwarf = perf_probe_event_need_dwarf(pev);
279 struct debuginfo *dinfo = open_debuginfo(module); 278 struct debuginfo *dinfo = open_debuginfo(target);
280 int ntevs, ret = 0; 279 int ntevs, ret = 0;
281 280
282 if (!dinfo) { 281 if (!dinfo) {
@@ -295,9 +294,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
295 294
296 if (ntevs > 0) { /* Succeeded to find trace events */ 295 if (ntevs > 0) { /* Succeeded to find trace events */
297 pr_debug("find %d probe_trace_events.\n", ntevs); 296 pr_debug("find %d probe_trace_events.\n", ntevs);
298 if (module) 297 if (target)
299 ret = add_module_to_probe_trace_events(*tevs, ntevs, 298 ret = add_module_to_probe_trace_events(*tevs, ntevs,
300 module); 299 target);
301 return ret < 0 ? ret : ntevs; 300 return ret < 0 ? ret : ntevs;
302 } 301 }
303 302
@@ -1729,7 +1728,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1729 } 1728 }
1730 1729
1731 ret = 0; 1730 ret = 0;
1732 printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); 1731 printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
1733 for (i = 0; i < ntevs; i++) { 1732 for (i = 0; i < ntevs; i++) {
1734 tev = &tevs[i]; 1733 tev = &tevs[i];
1735 if (pev->event) 1734 if (pev->event)
@@ -1784,7 +1783,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1784 1783
1785 if (ret >= 0) { 1784 if (ret >= 0) {
1786 /* Show how to use the event. */ 1785 /* Show how to use the event. */
1787 printf("\nYou can now use it on all perf tools, such as:\n\n"); 1786 printf("\nYou can now use it in all perf tools, such as:\n\n");
1788 printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, 1787 printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
1789 tev->event); 1788 tev->event);
1790 } 1789 }
@@ -1796,14 +1795,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1796 1795
1797static int convert_to_probe_trace_events(struct perf_probe_event *pev, 1796static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1798 struct probe_trace_event **tevs, 1797 struct probe_trace_event **tevs,
1799 int max_tevs, const char *module) 1798 int max_tevs, const char *target)
1800{ 1799{
1801 struct symbol *sym; 1800 struct symbol *sym;
1802 int ret = 0, i; 1801 int ret = 0, i;
1803 struct probe_trace_event *tev; 1802 struct probe_trace_event *tev;
1804 1803
1805 /* Convert perf_probe_event with debuginfo */ 1804 /* Convert perf_probe_event with debuginfo */
1806 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); 1805 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
1807 if (ret != 0) 1806 if (ret != 0)
1808 return ret; /* Found in debuginfo or got an error */ 1807 return ret; /* Found in debuginfo or got an error */
1809 1808
@@ -1819,8 +1818,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1819 goto error; 1818 goto error;
1820 } 1819 }
1821 1820
1822 if (module) { 1821 if (target) {
1823 tev->point.module = strdup(module); 1822 tev->point.module = strdup(target);
1824 if (tev->point.module == NULL) { 1823 if (tev->point.module == NULL) {
1825 ret = -ENOMEM; 1824 ret = -ENOMEM;
1826 goto error; 1825 goto error;
@@ -1890,7 +1889,7 @@ struct __event_package {
1890}; 1889};
1891 1890
1892int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, 1891int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1893 int max_tevs, const char *module, bool force_add) 1892 int max_tevs, const char *target, bool force_add)
1894{ 1893{
1895 int i, j, ret; 1894 int i, j, ret;
1896 struct __event_package *pkgs; 1895 struct __event_package *pkgs;
@@ -1913,7 +1912,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1913 ret = convert_to_probe_trace_events(pkgs[i].pev, 1912 ret = convert_to_probe_trace_events(pkgs[i].pev,
1914 &pkgs[i].tevs, 1913 &pkgs[i].tevs,
1915 max_tevs, 1914 max_tevs,
1916 module); 1915 target);
1917 if (ret < 0) 1916 if (ret < 0)
1918 goto end; 1917 goto end;
1919 pkgs[i].ntevs = ret; 1918 pkgs[i].ntevs = ret;
@@ -1965,7 +1964,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
1965 goto error; 1964 goto error;
1966 } 1965 }
1967 1966
1968 printf("Remove event: %s\n", ent->s); 1967 printf("Removed event: %s\n", ent->s);
1969 return 0; 1968 return 0;
1970error: 1969error:
1971 pr_warning("Failed to delete event: %s\n", strerror(-ret)); 1970 pr_warning("Failed to delete event: %s\n", strerror(-ret));
@@ -2069,7 +2068,7 @@ static int filter_available_functions(struct map *map __unused,
2069 return 1; 2068 return 1;
2070} 2069}
2071 2070
2072int show_available_funcs(const char *module, struct strfilter *_filter) 2071int show_available_funcs(const char *target, struct strfilter *_filter)
2073{ 2072{
2074 struct map *map; 2073 struct map *map;
2075 int ret; 2074 int ret;
@@ -2080,9 +2079,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter)
2080 if (ret < 0) 2079 if (ret < 0)
2081 return ret; 2080 return ret;
2082 2081
2083 map = kernel_get_module_map(module); 2082 map = kernel_get_module_map(target);
2084 if (!map) { 2083 if (!map) {
2085 pr_err("Failed to find %s map.\n", (module) ? : "kernel"); 2084 pr_err("Failed to find %s map.\n", (target) ? : "kernel");
2086 return -EINVAL; 2085 return -EINVAL;
2087 } 2086 }
2088 available_func_filter = _filter; 2087 available_func_filter = _filter;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 74bd2e63c4b4..2cc162d3b78c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -30,7 +30,6 @@
30#include <stdlib.h> 30#include <stdlib.h>
31#include <string.h> 31#include <string.h>
32#include <stdarg.h> 32#include <stdarg.h>
33#include <ctype.h>
34#include <dwarf-regs.h> 33#include <dwarf-regs.h>
35 34
36#include <linux/bitops.h> 35#include <linux/bitops.h>
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 000000000000..2884e67ee625
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,19 @@
1#
2# List of files needed by perf python extention
3#
4# Each source file must be placed on its own line so that it can be
5# processed by Makefile and util/setup.py accordingly.
6#
7
8util/python.c
9util/ctype.c
10util/evlist.c
11util/evsel.c
12util/cpumap.c
13util/thread_map.c
14util/util.c
15util/xyarray.c
16util/cgroup.c
17util/debugfs.c
18util/strlist.c
19../../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 9dd47a4f2596..e03b58a48424 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -425,14 +425,14 @@ struct pyrf_thread_map {
425static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, 425static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
426 PyObject *args, PyObject *kwargs) 426 PyObject *args, PyObject *kwargs)
427{ 427{
428 static char *kwlist[] = { "pid", "tid", NULL }; 428 static char *kwlist[] = { "pid", "tid", "uid", NULL };
429 int pid = -1, tid = -1; 429 int pid = -1, tid = -1, uid = UINT_MAX;
430 430
431 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", 431 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
432 kwlist, &pid, &tid)) 432 kwlist, &pid, &tid, &uid))
433 return -1; 433 return -1;
434 434
435 pthreads->threads = thread_map__new(pid, tid); 435 pthreads->threads = thread_map__new(pid, tid, uid);
436 if (pthreads->threads == NULL) 436 if (pthreads->threads == NULL)
437 return -1; 437 return -1;
438 return 0; 438 return 0;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 0b2a48783172..c2623c6f9b51 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -24,7 +24,6 @@
24#include <stdio.h> 24#include <stdio.h>
25#include <stdlib.h> 25#include <stdlib.h>
26#include <string.h> 26#include <string.h>
27#include <ctype.h>
28#include <errno.h> 27#include <errno.h>
29 28
30#include "../../perf.h" 29#include "../../perf.h"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b5ca2558c7bb..002ebbf59f48 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
24 self->fd = STDIN_FILENO; 24 self->fd = STDIN_FILENO;
25 25
26 if (perf_session__read_header(self, self->fd) < 0) 26 if (perf_session__read_header(self, self->fd) < 0)
27 pr_err("incompatible file format"); 27 pr_err("incompatible file format (rerun with -v to learn more)");
28 28
29 return 0; 29 return 0;
30 } 30 }
@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
56 } 56 }
57 57
58 if (perf_session__read_header(self, self->fd) < 0) { 58 if (perf_session__read_header(self, self->fd) < 0) {
59 pr_err("incompatible file format"); 59 pr_err("incompatible file format (rerun with -v to learn more)");
60 goto out_close; 60 goto out_close;
61 } 61 }
62 62
@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym)
229 return 0; 229 return 0;
230} 230}
231 231
232static const u8 cpumodes[] = {
233 PERF_RECORD_MISC_USER,
234 PERF_RECORD_MISC_KERNEL,
235 PERF_RECORD_MISC_GUEST_USER,
236 PERF_RECORD_MISC_GUEST_KERNEL
237};
238#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
239
240static void ip__resolve_ams(struct machine *self, struct thread *thread,
241 struct addr_map_symbol *ams,
242 u64 ip)
243{
244 struct addr_location al;
245 size_t i;
246 u8 m;
247
248 memset(&al, 0, sizeof(al));
249
250 for (i = 0; i < NCPUMODES; i++) {
251 m = cpumodes[i];
252 /*
253 * We cannot use the header.misc hint to determine whether a
254 * branch stack address is user, kernel, guest, hypervisor.
255 * Branches may straddle the kernel/user/hypervisor boundaries.
256 * Thus, we have to try consecutively until we find a match
257 * or else, the symbol is unknown
258 */
259 thread__find_addr_location(thread, self, m, MAP__FUNCTION,
260 ip, &al, NULL);
261 if (al.sym)
262 goto found;
263 }
264found:
265 ams->addr = ip;
266 ams->al_addr = al.addr;
267 ams->sym = al.sym;
268 ams->map = al.map;
269}
270
271struct branch_info *machine__resolve_bstack(struct machine *self,
272 struct thread *thr,
273 struct branch_stack *bs)
274{
275 struct branch_info *bi;
276 unsigned int i;
277
278 bi = calloc(bs->nr, sizeof(struct branch_info));
279 if (!bi)
280 return NULL;
281
282 for (i = 0; i < bs->nr; i++) {
283 ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
284 ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
285 bi[i].flags = bs->entries[i].flags;
286 }
287 return bi;
288}
289
232int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, 290int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
233 struct thread *thread, 291 struct thread *thread,
234 struct ip_callchain *chain, 292 struct ip_callchain *chain,
@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample)
697 i, sample->callchain->ips[i]); 755 i, sample->callchain->ips[i]);
698} 756}
699 757
758static void branch_stack__printf(struct perf_sample *sample)
759{
760 uint64_t i;
761
762 printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
763
764 for (i = 0; i < sample->branch_stack->nr; i++)
765 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
766 i, sample->branch_stack->entries[i].from,
767 sample->branch_stack->entries[i].to);
768}
769
700static void perf_session__print_tstamp(struct perf_session *session, 770static void perf_session__print_tstamp(struct perf_session *session,
701 union perf_event *event, 771 union perf_event *event,
702 struct perf_sample *sample) 772 struct perf_sample *sample)
@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
744 814
745 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) 815 if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
746 callchain__printf(sample); 816 callchain__printf(sample);
817
818 if (session->sample_type & PERF_SAMPLE_BRANCH_STACK)
819 branch_stack__printf(sample);
747} 820}
748 821
749static struct machine * 822static struct machine *
@@ -796,6 +869,10 @@ static int perf_session_deliver_event(struct perf_session *session,
796 ++session->hists.stats.nr_unknown_id; 869 ++session->hists.stats.nr_unknown_id;
797 return -1; 870 return -1;
798 } 871 }
872 if (machine == NULL) {
873 ++session->hists.stats.nr_unprocessable_samples;
874 return -1;
875 }
799 return tool->sample(tool, event, sample, evsel, machine); 876 return tool->sample(tool, event, sample, evsel, machine);
800 case PERF_RECORD_MMAP: 877 case PERF_RECORD_MMAP:
801 return tool->mmap(tool, event, sample, machine); 878 return tool->mmap(tool, event, sample, machine);
@@ -964,6 +1041,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
964 session->hists.stats.nr_invalid_chains, 1041 session->hists.stats.nr_invalid_chains,
965 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); 1042 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
966 } 1043 }
1044
1045 if (session->hists.stats.nr_unprocessable_samples != 0) {
1046 ui__warning("%u unprocessable samples recorded.\n"
1047 "Do you have a KVM guest running and not using 'perf kvm'?\n",
1048 session->hists.stats.nr_unprocessable_samples);
1049 }
967} 1050}
968 1051
969#define session_done() (*(volatile int *)(&session_done)) 1052#define session_done() (*(volatile int *)(&session_done))
@@ -1293,10 +1376,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1293 1376
1294void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 1377void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1295 struct machine *machine, struct perf_evsel *evsel, 1378 struct machine *machine, struct perf_evsel *evsel,
1296 int print_sym, int print_dso) 1379 int print_sym, int print_dso, int print_symoffset)
1297{ 1380{
1298 struct addr_location al; 1381 struct addr_location al;
1299 const char *symname, *dsoname;
1300 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; 1382 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1301 struct callchain_cursor_node *node; 1383 struct callchain_cursor_node *node;
1302 1384
@@ -1324,20 +1406,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1324 1406
1325 printf("\t%16" PRIx64, node->ip); 1407 printf("\t%16" PRIx64, node->ip);
1326 if (print_sym) { 1408 if (print_sym) {
1327 if (node->sym && node->sym->name) 1409 printf(" ");
1328 symname = node->sym->name; 1410 symbol__fprintf_symname(node->sym, stdout);
1329 else
1330 symname = "";
1331
1332 printf(" %s", symname);
1333 } 1411 }
1334 if (print_dso) { 1412 if (print_dso) {
1335 if (node->map && node->map->dso && node->map->dso->name) 1413 printf(" (");
1336 dsoname = node->map->dso->name; 1414 map__fprintf_dsoname(al.map, stdout);
1337 else 1415 printf(")");
1338 dsoname = "";
1339
1340 printf(" (%s)", dsoname);
1341 } 1416 }
1342 printf("\n"); 1417 printf("\n");
1343 1418
@@ -1347,21 +1422,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1347 } else { 1422 } else {
1348 printf("%16" PRIx64, sample->ip); 1423 printf("%16" PRIx64, sample->ip);
1349 if (print_sym) { 1424 if (print_sym) {
1350 if (al.sym && al.sym->name) 1425 printf(" ");
1351 symname = al.sym->name; 1426 if (print_symoffset)
1427 symbol__fprintf_symname_offs(al.sym, &al,
1428 stdout);
1352 else 1429 else
1353 symname = ""; 1430 symbol__fprintf_symname(al.sym, stdout);
1354
1355 printf(" %s", symname);
1356 } 1431 }
1357 1432
1358 if (print_dso) { 1433 if (print_dso) {
1359 if (al.map && al.map->dso && al.map->dso->name) 1434 printf(" (");
1360 dsoname = al.map->dso->name; 1435 map__fprintf_dsoname(al.map, stdout);
1361 else 1436 printf(")");
1362 dsoname = "";
1363
1364 printf(" (%s)", dsoname);
1365 } 1437 }
1366 } 1438 }
1367} 1439}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 37bc38381fb6..7a5434c00565 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel
73 struct ip_callchain *chain, 73 struct ip_callchain *chain,
74 struct symbol **parent); 74 struct symbol **parent);
75 75
76struct branch_info *machine__resolve_bstack(struct machine *self,
77 struct thread *thread,
78 struct branch_stack *bs);
79
76bool perf_session__has_traces(struct perf_session *self, const char *msg); 80bool perf_session__has_traces(struct perf_session *self, const char *msg);
77 81
78void mem_bswap_64(void *src, int byte_size); 82void mem_bswap_64(void *src, int byte_size);
@@ -147,7 +151,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
147 151
148void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 152void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
149 struct machine *machine, struct perf_evsel *evsel, 153 struct machine *machine, struct perf_evsel *evsel,
150 int print_sym, int print_dso); 154 int print_sym, int print_dso, int print_symoffset);
151 155
152int perf_session__cpu_bitmap(struct perf_session *session, 156int perf_session__cpu_bitmap(struct perf_session *session,
153 const char *cpu_list, unsigned long *cpu_bitmap); 157 const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 36d4c5619575..d0f9f29cf181 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split()
24build_lib = getenv('PYTHON_EXTBUILD_LIB') 24build_lib = getenv('PYTHON_EXTBUILD_LIB')
25build_tmp = getenv('PYTHON_EXTBUILD_TMP') 25build_tmp = getenv('PYTHON_EXTBUILD_TMP')
26 26
27ext_sources = [f.strip() for f in file('util/python-ext-sources')
28 if len(f.strip()) > 0 and f[0] != '#']
29
27perf = Extension('perf', 30perf = Extension('perf',
28 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 31 sources = ext_sources,
29 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
30 'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
31 'util/debugfs.c'],
32 include_dirs = ['util/include'], 32 include_dirs = ['util/include'],
33 extra_compile_args = cflags, 33 extra_compile_args = cflags,
34 ) 34 )
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 076c9d4e1ea4..a27237430c5f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol";
8const char *sort_order = default_sort_order; 8const char *sort_order = default_sort_order;
9int sort__need_collapse = 0; 9int sort__need_collapse = 0;
10int sort__has_parent = 0; 10int sort__has_parent = 0;
11int sort__branch_mode = -1; /* -1 = means not set */
11 12
12enum sort_type sort__first_dimension; 13enum sort_type sort__first_dimension;
13 14
@@ -97,6 +98,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
97 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); 98 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
98} 99}
99 100
101static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
102{
103 struct dso *dso_l = map_l ? map_l->dso : NULL;
104 struct dso *dso_r = map_r ? map_r->dso : NULL;
105 const char *dso_name_l, *dso_name_r;
106
107 if (!dso_l || !dso_r)
108 return cmp_null(dso_l, dso_r);
109
110 if (verbose) {
111 dso_name_l = dso_l->long_name;
112 dso_name_r = dso_r->long_name;
113 } else {
114 dso_name_l = dso_l->short_name;
115 dso_name_r = dso_r->short_name;
116 }
117
118 return strcmp(dso_name_l, dso_name_r);
119}
120
100struct sort_entry sort_comm = { 121struct sort_entry sort_comm = {
101 .se_header = "Command", 122 .se_header = "Command",
102 .se_cmp = sort__comm_cmp, 123 .se_cmp = sort__comm_cmp,
@@ -110,36 +131,74 @@ struct sort_entry sort_comm = {
110static int64_t 131static int64_t
111sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) 132sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
112{ 133{
113 struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; 134 return _sort__dso_cmp(left->ms.map, right->ms.map);
114 struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; 135}
115 const char *dso_name_l, *dso_name_r;
116 136
117 if (!dso_l || !dso_r)
118 return cmp_null(dso_l, dso_r);
119 137
120 if (verbose) { 138static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
121 dso_name_l = dso_l->long_name; 139 u64 ip_l, u64 ip_r)
122 dso_name_r = dso_r->long_name; 140{
123 } else { 141 if (!sym_l || !sym_r)
124 dso_name_l = dso_l->short_name; 142 return cmp_null(sym_l, sym_r);
125 dso_name_r = dso_r->short_name; 143
144 if (sym_l == sym_r)
145 return 0;
146
147 if (sym_l)
148 ip_l = sym_l->start;
149 if (sym_r)
150 ip_r = sym_r->start;
151
152 return (int64_t)(ip_r - ip_l);
153}
154
155static int _hist_entry__dso_snprintf(struct map *map, char *bf,
156 size_t size, unsigned int width)
157{
158 if (map && map->dso) {
159 const char *dso_name = !verbose ? map->dso->short_name :
160 map->dso->long_name;
161 return repsep_snprintf(bf, size, "%-*s", width, dso_name);
126 } 162 }
127 163
128 return strcmp(dso_name_l, dso_name_r); 164 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
129} 165}
130 166
131static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, 167static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
132 size_t size, unsigned int width) 168 size_t size, unsigned int width)
133{ 169{
134 if (self->ms.map && self->ms.map->dso) { 170 return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
135 const char *dso_name = !verbose ? self->ms.map->dso->short_name : 171}
136 self->ms.map->dso->long_name; 172
137 return repsep_snprintf(bf, size, "%-*s", width, dso_name); 173static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
174 u64 ip, char level, char *bf, size_t size,
175 unsigned int width __used)
176{
177 size_t ret = 0;
178
179 if (verbose) {
180 char o = map ? dso__symtab_origin(map->dso) : '!';
181 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
182 BITS_PER_LONG / 4, ip, o);
138 } 183 }
139 184
140 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); 185 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
186 if (sym)
187 ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
188 width - ret,
189 sym->name);
190 else {
191 size_t len = BITS_PER_LONG / 4;
192 ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
193 len, ip);
194 ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
195 width - ret, "");
196 }
197
198 return ret;
141} 199}
142 200
201
143struct sort_entry sort_dso = { 202struct sort_entry sort_dso = {
144 .se_header = "Shared Object", 203 .se_header = "Shared Object",
145 .se_cmp = sort__dso_cmp, 204 .se_cmp = sort__dso_cmp,
@@ -147,8 +206,14 @@ struct sort_entry sort_dso = {
147 .se_width_idx = HISTC_DSO, 206 .se_width_idx = HISTC_DSO,
148}; 207};
149 208
150/* --sort symbol */ 209static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
210 size_t size, unsigned int width __used)
211{
212 return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
213 self->level, bf, size, width);
214}
151 215
216/* --sort symbol */
152static int64_t 217static int64_t
153sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) 218sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
154{ 219{
@@ -166,31 +231,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
166 ip_l = left->ms.sym->start; 231 ip_l = left->ms.sym->start;
167 ip_r = right->ms.sym->start; 232 ip_r = right->ms.sym->start;
168 233
169 return (int64_t)(ip_r - ip_l); 234 return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
170}
171
172static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
173 size_t size, unsigned int width __used)
174{
175 size_t ret = 0;
176
177 if (verbose) {
178 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
179 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
180 BITS_PER_LONG / 4, self->ip, o);
181 }
182
183 if (!sort_dso.elide)
184 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
185
186 if (self->ms.sym)
187 ret += repsep_snprintf(bf + ret, size - ret, "%s",
188 self->ms.sym->name);
189 else
190 ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
191 BITS_PER_LONG / 4, self->ip);
192
193 return ret;
194} 235}
195 236
196struct sort_entry sort_sym = { 237struct sort_entry sort_sym = {
@@ -249,19 +290,155 @@ struct sort_entry sort_cpu = {
249 .se_width_idx = HISTC_CPU, 290 .se_width_idx = HISTC_CPU,
250}; 291};
251 292
293static int64_t
294sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
295{
296 return _sort__dso_cmp(left->branch_info->from.map,
297 right->branch_info->from.map);
298}
299
300static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
301 size_t size, unsigned int width)
302{
303 return _hist_entry__dso_snprintf(self->branch_info->from.map,
304 bf, size, width);
305}
306
307struct sort_entry sort_dso_from = {
308 .se_header = "Source Shared Object",
309 .se_cmp = sort__dso_from_cmp,
310 .se_snprintf = hist_entry__dso_from_snprintf,
311 .se_width_idx = HISTC_DSO_FROM,
312};
313
314static int64_t
315sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
316{
317 return _sort__dso_cmp(left->branch_info->to.map,
318 right->branch_info->to.map);
319}
320
321static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
322 size_t size, unsigned int width)
323{
324 return _hist_entry__dso_snprintf(self->branch_info->to.map,
325 bf, size, width);
326}
327
328static int64_t
329sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
330{
331 struct addr_map_symbol *from_l = &left->branch_info->from;
332 struct addr_map_symbol *from_r = &right->branch_info->from;
333
334 if (!from_l->sym && !from_r->sym)
335 return right->level - left->level;
336
337 return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr,
338 from_r->addr);
339}
340
341static int64_t
342sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
343{
344 struct addr_map_symbol *to_l = &left->branch_info->to;
345 struct addr_map_symbol *to_r = &right->branch_info->to;
346
347 if (!to_l->sym && !to_r->sym)
348 return right->level - left->level;
349
350 return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr);
351}
352
353static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
354 size_t size, unsigned int width __used)
355{
356 struct addr_map_symbol *from = &self->branch_info->from;
357 return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
358 self->level, bf, size, width);
359
360}
361
362static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
363 size_t size, unsigned int width __used)
364{
365 struct addr_map_symbol *to = &self->branch_info->to;
366 return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
367 self->level, bf, size, width);
368
369}
370
371struct sort_entry sort_dso_to = {
372 .se_header = "Target Shared Object",
373 .se_cmp = sort__dso_to_cmp,
374 .se_snprintf = hist_entry__dso_to_snprintf,
375 .se_width_idx = HISTC_DSO_TO,
376};
377
378struct sort_entry sort_sym_from = {
379 .se_header = "Source Symbol",
380 .se_cmp = sort__sym_from_cmp,
381 .se_snprintf = hist_entry__sym_from_snprintf,
382 .se_width_idx = HISTC_SYMBOL_FROM,
383};
384
385struct sort_entry sort_sym_to = {
386 .se_header = "Target Symbol",
387 .se_cmp = sort__sym_to_cmp,
388 .se_snprintf = hist_entry__sym_to_snprintf,
389 .se_width_idx = HISTC_SYMBOL_TO,
390};
391
392static int64_t
393sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
394{
395 const unsigned char mp = left->branch_info->flags.mispred !=
396 right->branch_info->flags.mispred;
397 const unsigned char p = left->branch_info->flags.predicted !=
398 right->branch_info->flags.predicted;
399
400 return mp || p;
401}
402
403static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
404 size_t size, unsigned int width){
405 static const char *out = "N/A";
406
407 if (self->branch_info->flags.predicted)
408 out = "N";
409 else if (self->branch_info->flags.mispred)
410 out = "Y";
411
412 return repsep_snprintf(bf, size, "%-*s", width, out);
413}
414
415struct sort_entry sort_mispredict = {
416 .se_header = "Branch Mispredicted",
417 .se_cmp = sort__mispredict_cmp,
418 .se_snprintf = hist_entry__mispredict_snprintf,
419 .se_width_idx = HISTC_MISPREDICT,
420};
421
252struct sort_dimension { 422struct sort_dimension {
253 const char *name; 423 const char *name;
254 struct sort_entry *entry; 424 struct sort_entry *entry;
255 int taken; 425 int taken;
256}; 426};
257 427
428#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
429
258static struct sort_dimension sort_dimensions[] = { 430static struct sort_dimension sort_dimensions[] = {
259 { .name = "pid", .entry = &sort_thread, }, 431 DIM(SORT_PID, "pid", sort_thread),
260 { .name = "comm", .entry = &sort_comm, }, 432 DIM(SORT_COMM, "comm", sort_comm),
261 { .name = "dso", .entry = &sort_dso, }, 433 DIM(SORT_DSO, "dso", sort_dso),
262 { .name = "symbol", .entry = &sort_sym, }, 434 DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
263 { .name = "parent", .entry = &sort_parent, }, 435 DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
264 { .name = "cpu", .entry = &sort_cpu, }, 436 DIM(SORT_SYM, "symbol", sort_sym),
437 DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
438 DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
439 DIM(SORT_PARENT, "parent", sort_parent),
440 DIM(SORT_CPU, "cpu", sort_cpu),
441 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
265}; 442};
266 443
267int sort_dimension__add(const char *tok) 444int sort_dimension__add(const char *tok)
@@ -273,7 +450,6 @@ int sort_dimension__add(const char *tok)
273 450
274 if (strncasecmp(tok, sd->name, strlen(tok))) 451 if (strncasecmp(tok, sd->name, strlen(tok)))
275 continue; 452 continue;
276
277 if (sd->entry == &sort_parent) { 453 if (sd->entry == &sort_parent) {
278 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); 454 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
279 if (ret) { 455 if (ret) {
@@ -305,6 +481,16 @@ int sort_dimension__add(const char *tok)
305 sort__first_dimension = SORT_PARENT; 481 sort__first_dimension = SORT_PARENT;
306 else if (!strcmp(sd->name, "cpu")) 482 else if (!strcmp(sd->name, "cpu"))
307 sort__first_dimension = SORT_CPU; 483 sort__first_dimension = SORT_CPU;
484 else if (!strcmp(sd->name, "symbol_from"))
485 sort__first_dimension = SORT_SYM_FROM;
486 else if (!strcmp(sd->name, "symbol_to"))
487 sort__first_dimension = SORT_SYM_TO;
488 else if (!strcmp(sd->name, "dso_from"))
489 sort__first_dimension = SORT_DSO_FROM;
490 else if (!strcmp(sd->name, "dso_to"))
491 sort__first_dimension = SORT_DSO_TO;
492 else if (!strcmp(sd->name, "mispredict"))
493 sort__first_dimension = SORT_MISPREDICT;
308 } 494 }
309 495
310 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 496 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
@@ -312,7 +498,6 @@ int sort_dimension__add(const char *tok)
312 498
313 return 0; 499 return 0;
314 } 500 }
315
316 return -ESRCH; 501 return -ESRCH;
317} 502}
318 503
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3f67ae395752..472aa5a63a58 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -31,11 +31,16 @@ extern const char *parent_pattern;
31extern const char default_sort_order[]; 31extern const char default_sort_order[];
32extern int sort__need_collapse; 32extern int sort__need_collapse;
33extern int sort__has_parent; 33extern int sort__has_parent;
34extern int sort__branch_mode;
34extern char *field_sep; 35extern char *field_sep;
35extern struct sort_entry sort_comm; 36extern struct sort_entry sort_comm;
36extern struct sort_entry sort_dso; 37extern struct sort_entry sort_dso;
37extern struct sort_entry sort_sym; 38extern struct sort_entry sort_sym;
38extern struct sort_entry sort_parent; 39extern struct sort_entry sort_parent;
40extern struct sort_entry sort_dso_from;
41extern struct sort_entry sort_dso_to;
42extern struct sort_entry sort_sym_from;
43extern struct sort_entry sort_sym_to;
39extern enum sort_type sort__first_dimension; 44extern enum sort_type sort__first_dimension;
40 45
41/** 46/**
@@ -72,6 +77,7 @@ struct hist_entry {
72 struct hist_entry *pair; 77 struct hist_entry *pair;
73 struct rb_root sorted_chain; 78 struct rb_root sorted_chain;
74 }; 79 };
80 struct branch_info *branch_info;
75 struct callchain_root callchain[0]; 81 struct callchain_root callchain[0];
76}; 82};
77 83
@@ -82,6 +88,11 @@ enum sort_type {
82 SORT_SYM, 88 SORT_SYM,
83 SORT_PARENT, 89 SORT_PARENT,
84 SORT_CPU, 90 SORT_CPU,
91 SORT_DSO_FROM,
92 SORT_DSO_TO,
93 SORT_SYM_FROM,
94 SORT_SYM_TO,
95 SORT_MISPREDICT,
85}; 96};
86 97
87/* 98/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 0975438c3e72..5dd83c3e2c0c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,4 +1,3 @@
1#include <ctype.h>
2#include <dirent.h> 1#include <dirent.h>
3#include <errno.h> 2#include <errno.h>
4#include <libgen.h> 3#include <libgen.h>
@@ -12,6 +11,7 @@
12#include <unistd.h> 11#include <unistd.h>
13#include <inttypes.h> 12#include <inttypes.h>
14#include "build-id.h" 13#include "build-id.h"
14#include "util.h"
15#include "debug.h" 15#include "debug.h"
16#include "symbol.h" 16#include "symbol.h"
17#include "strlist.h" 17#include "strlist.h"
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
263 sym->name); 263 sym->name);
264} 264}
265 265
266size_t symbol__fprintf_symname_offs(const struct symbol *sym,
267 const struct addr_location *al, FILE *fp)
268{
269 unsigned long offset;
270 size_t length;
271
272 if (sym && sym->name) {
273 length = fprintf(fp, "%s", sym->name);
274 if (al) {
275 offset = al->addr - sym->start;
276 length += fprintf(fp, "+0x%lx", offset);
277 }
278 return length;
279 } else
280 return fprintf(fp, "[unknown]");
281}
282
283size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
284{
285 return symbol__fprintf_symname_offs(sym, NULL, fp);
286}
287
266void dso__set_long_name(struct dso *dso, char *name) 288void dso__set_long_name(struct dso *dso, char *name)
267{ 289{
268 if (name == NULL) 290 if (name == NULL)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 123c2e14353e..ac49ef208a5f 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -5,6 +5,7 @@
5#include <stdbool.h> 5#include <stdbool.h>
6#include <stdint.h> 6#include <stdint.h>
7#include "map.h" 7#include "map.h"
8#include "../perf.h"
8#include <linux/list.h> 9#include <linux/list.h>
9#include <linux/rbtree.h> 10#include <linux/rbtree.h>
10#include <stdio.h> 11#include <stdio.h>
@@ -70,6 +71,7 @@ struct symbol_conf {
70 unsigned short priv_size; 71 unsigned short priv_size;
71 unsigned short nr_events; 72 unsigned short nr_events;
72 bool try_vmlinux_path, 73 bool try_vmlinux_path,
74 show_kernel_path,
73 use_modules, 75 use_modules,
74 sort_by_name, 76 sort_by_name,
75 show_nr_samples, 77 show_nr_samples,
@@ -95,7 +97,11 @@ struct symbol_conf {
95 *col_width_list_str; 97 *col_width_list_str;
96 struct strlist *dso_list, 98 struct strlist *dso_list,
97 *comm_list, 99 *comm_list,
98 *sym_list; 100 *sym_list,
101 *dso_from_list,
102 *dso_to_list,
103 *sym_from_list,
104 *sym_to_list;
99 const char *symfs; 105 const char *symfs;
100}; 106};
101 107
@@ -119,6 +125,19 @@ struct map_symbol {
119 bool has_children; 125 bool has_children;
120}; 126};
121 127
128struct addr_map_symbol {
129 struct map *map;
130 struct symbol *sym;
131 u64 addr;
132 u64 al_addr;
133};
134
135struct branch_info {
136 struct addr_map_symbol from;
137 struct addr_map_symbol to;
138 struct branch_flags flags;
139};
140
122struct addr_location { 141struct addr_location {
123 struct thread *thread; 142 struct thread *thread;
124 struct map *map; 143 struct map *map;
@@ -241,6 +260,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines);
241 260
242int symbol__init(void); 261int symbol__init(void);
243void symbol__exit(void); 262void symbol__exit(void);
263size_t symbol__fprintf_symname_offs(const struct symbol *sym,
264 const struct addr_location *al, FILE *fp);
265size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
244bool symbol_type__is_a(char symbol_type, enum map_type map_type); 266bool symbol_type__is_a(char symbol_type, enum map_type map_type);
245 267
246size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); 268size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
new file mode 100644
index 000000000000..48c6902e749f
--- /dev/null
+++ b/tools/perf/util/sysfs.c
@@ -0,0 +1,60 @@
1
2#include "util.h"
3#include "sysfs.h"
4
5static const char * const sysfs_known_mountpoints[] = {
6 "/sys",
7 0,
8};
9
10static int sysfs_found;
11char sysfs_mountpoint[PATH_MAX];
12
13static int sysfs_valid_mountpoint(const char *sysfs)
14{
15 struct statfs st_fs;
16
17 if (statfs(sysfs, &st_fs) < 0)
18 return -ENOENT;
19 else if (st_fs.f_type != (long) SYSFS_MAGIC)
20 return -ENOENT;
21
22 return 0;
23}
24
25const char *sysfs_find_mountpoint(void)
26{
27 const char * const *ptr;
28 char type[100];
29 FILE *fp;
30
31 if (sysfs_found)
32 return (const char *) sysfs_mountpoint;
33
34 ptr = sysfs_known_mountpoints;
35 while (*ptr) {
36 if (sysfs_valid_mountpoint(*ptr) == 0) {
37 sysfs_found = 1;
38 strcpy(sysfs_mountpoint, *ptr);
39 return sysfs_mountpoint;
40 }
41 ptr++;
42 }
43
44 /* give up and parse /proc/mounts */
45 fp = fopen("/proc/mounts", "r");
46 if (fp == NULL)
47 return NULL;
48
49 while (!sysfs_found &&
50 fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
51 sysfs_mountpoint, type) == 2) {
52
53 if (strcmp(type, "sysfs") == 0)
54 sysfs_found = 1;
55 }
56
57 fclose(fp);
58
59 return sysfs_found ? sysfs_mountpoint : NULL;
60}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
new file mode 100644
index 000000000000..a813b7203938
--- /dev/null
+++ b/tools/perf/util/sysfs.h
@@ -0,0 +1,6 @@
1#ifndef __SYSFS_H__
2#define __SYSFS_H__
3
4const char *sysfs_find_mountpoint(void);
5
6#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index a5df131b77c3..84d9bd782004 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -1,6 +1,13 @@
1#include <dirent.h> 1#include <dirent.h>
2#include <limits.h>
3#include <stdbool.h>
2#include <stdlib.h> 4#include <stdlib.h>
3#include <stdio.h> 5#include <stdio.h>
6#include <sys/types.h>
7#include <sys/stat.h>
8#include <unistd.h>
9#include "strlist.h"
10#include <string.h>
4#include "thread_map.h" 11#include "thread_map.h"
5 12
6/* Skip "." and ".." directories */ 13/* Skip "." and ".." directories */
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
23 sprintf(name, "/proc/%d/task", pid); 30 sprintf(name, "/proc/%d/task", pid);
24 items = scandir(name, &namelist, filter, NULL); 31 items = scandir(name, &namelist, filter, NULL);
25 if (items <= 0) 32 if (items <= 0)
26 return NULL; 33 return NULL;
27 34
28 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); 35 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
29 if (threads != NULL) { 36 if (threads != NULL) {
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
51 return threads; 58 return threads;
52} 59}
53 60
54struct thread_map *thread_map__new(pid_t pid, pid_t tid) 61struct thread_map *thread_map__new_by_uid(uid_t uid)
62{
63 DIR *proc;
64 int max_threads = 32, items, i;
65 char path[256];
66 struct dirent dirent, *next, **namelist = NULL;
67 struct thread_map *threads = malloc(sizeof(*threads) +
68 max_threads * sizeof(pid_t));
69 if (threads == NULL)
70 goto out;
71
72 proc = opendir("/proc");
73 if (proc == NULL)
74 goto out_free_threads;
75
76 threads->nr = 0;
77
78 while (!readdir_r(proc, &dirent, &next) && next) {
79 char *end;
80 bool grow = false;
81 struct stat st;
82 pid_t pid = strtol(dirent.d_name, &end, 10);
83
84 if (*end) /* only interested in proper numerical dirents */
85 continue;
86
87 snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
88
89 if (stat(path, &st) != 0)
90 continue;
91
92 if (st.st_uid != uid)
93 continue;
94
95 snprintf(path, sizeof(path), "/proc/%d/task", pid);
96 items = scandir(path, &namelist, filter, NULL);
97 if (items <= 0)
98 goto out_free_closedir;
99
100 while (threads->nr + items >= max_threads) {
101 max_threads *= 2;
102 grow = true;
103 }
104
105 if (grow) {
106 struct thread_map *tmp;
107
108 tmp = realloc(threads, (sizeof(*threads) +
109 max_threads * sizeof(pid_t)));
110 if (tmp == NULL)
111 goto out_free_namelist;
112
113 threads = tmp;
114 }
115
116 for (i = 0; i < items; i++)
117 threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
118
119 for (i = 0; i < items; i++)
120 free(namelist[i]);
121 free(namelist);
122
123 threads->nr += items;
124 }
125
126out_closedir:
127 closedir(proc);
128out:
129 return threads;
130
131out_free_threads:
132 free(threads);
133 return NULL;
134
135out_free_namelist:
136 for (i = 0; i < items; i++)
137 free(namelist[i]);
138 free(namelist);
139
140out_free_closedir:
141 free(threads);
142 threads = NULL;
143 goto out_closedir;
144}
145
146struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
55{ 147{
56 if (pid != -1) 148 if (pid != -1)
57 return thread_map__new_by_pid(pid); 149 return thread_map__new_by_pid(pid);
150
151 if (tid == -1 && uid != UINT_MAX)
152 return thread_map__new_by_uid(uid);
153
58 return thread_map__new_by_tid(tid); 154 return thread_map__new_by_tid(tid);
59} 155}
60 156
157static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
158{
159 struct thread_map *threads = NULL, *nt;
160 char name[256];
161 int items, total_tasks = 0;
162 struct dirent **namelist = NULL;
163 int i, j = 0;
164 pid_t pid, prev_pid = INT_MAX;
165 char *end_ptr;
166 struct str_node *pos;
167 struct strlist *slist = strlist__new(false, pid_str);
168
169 if (!slist)
170 return NULL;
171
172 strlist__for_each(pos, slist) {
173 pid = strtol(pos->s, &end_ptr, 10);
174
175 if (pid == INT_MIN || pid == INT_MAX ||
176 (*end_ptr != '\0' && *end_ptr != ','))
177 goto out_free_threads;
178
179 if (pid == prev_pid)
180 continue;
181
182 sprintf(name, "/proc/%d/task", pid);
183 items = scandir(name, &namelist, filter, NULL);
184 if (items <= 0)
185 goto out_free_threads;
186
187 total_tasks += items;
188 nt = realloc(threads, (sizeof(*threads) +
189 sizeof(pid_t) * total_tasks));
190 if (nt == NULL)
191 goto out_free_threads;
192
193 threads = nt;
194
195 if (threads) {
196 for (i = 0; i < items; i++)
197 threads->map[j++] = atoi(namelist[i]->d_name);
198 threads->nr = total_tasks;
199 }
200
201 for (i = 0; i < items; i++)
202 free(namelist[i]);
203 free(namelist);
204
205 if (!threads)
206 break;
207 }
208
209out:
210 strlist__delete(slist);
211 return threads;
212
213out_free_threads:
214 free(threads);
215 threads = NULL;
216 goto out;
217}
218
219static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
220{
221 struct thread_map *threads = NULL, *nt;
222 int ntasks = 0;
223 pid_t tid, prev_tid = INT_MAX;
224 char *end_ptr;
225 struct str_node *pos;
226 struct strlist *slist;
227
228 /* perf-stat expects threads to be generated even if tid not given */
229 if (!tid_str) {
230 threads = malloc(sizeof(*threads) + sizeof(pid_t));
231 if (threads != NULL) {
232 threads->map[0] = -1;
233 threads->nr = 1;
234 }
235 return threads;
236 }
237
238 slist = strlist__new(false, tid_str);
239 if (!slist)
240 return NULL;
241
242 strlist__for_each(pos, slist) {
243 tid = strtol(pos->s, &end_ptr, 10);
244
245 if (tid == INT_MIN || tid == INT_MAX ||
246 (*end_ptr != '\0' && *end_ptr != ','))
247 goto out_free_threads;
248
249 if (tid == prev_tid)
250 continue;
251
252 ntasks++;
253 nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
254
255 if (nt == NULL)
256 goto out_free_threads;
257
258 threads = nt;
259 threads->map[ntasks - 1] = tid;
260 threads->nr = ntasks;
261 }
262out:
263 return threads;
264
265out_free_threads:
266 free(threads);
267 threads = NULL;
268 goto out;
269}
270
271struct thread_map *thread_map__new_str(const char *pid, const char *tid,
272 uid_t uid)
273{
274 if (pid)
275 return thread_map__new_by_pid_str(pid);
276
277 if (!tid && uid != UINT_MAX)
278 return thread_map__new_by_uid(uid);
279
280 return thread_map__new_by_tid_str(tid);
281}
282
61void thread_map__delete(struct thread_map *threads) 283void thread_map__delete(struct thread_map *threads)
62{ 284{
63 free(threads); 285 free(threads);
64} 286}
287
288size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
289{
290 int i;
291 size_t printed = fprintf(fp, "%d thread%s: ",
292 threads->nr, threads->nr > 1 ? "s" : "");
293 for (i = 0; i < threads->nr; ++i)
294 printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
295
296 return printed + fprintf(fp, "\n");
297}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3cb907311409..7da80f14418b 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -2,6 +2,7 @@
2#define __PERF_THREAD_MAP_H 2#define __PERF_THREAD_MAP_H
3 3
4#include <sys/types.h> 4#include <sys/types.h>
5#include <stdio.h>
5 6
6struct thread_map { 7struct thread_map {
7 int nr; 8 int nr;
@@ -10,6 +11,14 @@ struct thread_map {
10 11
11struct thread_map *thread_map__new_by_pid(pid_t pid); 12struct thread_map *thread_map__new_by_pid(pid_t pid);
12struct thread_map *thread_map__new_by_tid(pid_t tid); 13struct thread_map *thread_map__new_by_tid(pid_t tid);
13struct thread_map *thread_map__new(pid_t pid, pid_t tid); 14struct thread_map *thread_map__new_by_uid(uid_t uid);
15struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
16
17struct thread_map *thread_map__new_str(const char *pid,
18 const char *tid, uid_t uid);
19
14void thread_map__delete(struct thread_map *threads); 20void thread_map__delete(struct thread_map *threads);
21
22size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
23
15#endif /* __PERF_THREAD_MAP_H */ 24#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 500471dffa4f..09fe579ccafb 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
69 69
70 ret += SNPRINTF(bf + ret, size - ret, "], "); 70 ret += SNPRINTF(bf + ret, size - ret, "], ");
71 71
72 if (top->target_pid != -1) 72 if (top->target_pid)
73 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", 73 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
74 top->target_pid); 74 top->target_pid);
75 else if (top->target_tid != -1) 75 else if (top->target_tid)
76 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", 76 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
77 top->target_tid); 77 top->target_tid);
78 else if (top->uid_str != NULL)
79 ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
80 top->uid_str);
78 else 81 else
79 ret += SNPRINTF(bf + ret, size - ret, " (all"); 82 ret += SNPRINTF(bf + ret, size - ret, " (all");
80 83
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
82 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", 85 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
83 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); 86 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
84 else { 87 else {
85 if (top->target_tid != -1) 88 if (top->target_tid)
86 ret += SNPRINTF(bf + ret, size - ret, ")"); 89 ret += SNPRINTF(bf + ret, size - ret, ")");
87 else 90 else
88 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", 91 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index f2eab81435ae..ce61cb2d1acf 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -23,7 +23,8 @@ struct perf_top {
23 u64 guest_us_samples, guest_kernel_samples; 23 u64 guest_us_samples, guest_kernel_samples;
24 int print_entries, count_filter, delay_secs; 24 int print_entries, count_filter, delay_secs;
25 int freq; 25 int freq;
26 pid_t target_pid, target_tid; 26 const char *target_pid, *target_tid;
27 uid_t uid;
27 bool hide_kernel_symbols, hide_user_symbols, zero; 28 bool hide_kernel_symbols, hide_user_symbols, zero;
28 bool system_wide; 29 bool system_wide;
29 bool use_tui, use_stdio; 30 bool use_tui, use_stdio;
@@ -33,7 +34,7 @@ struct perf_top {
33 bool vmlinux_warned; 34 bool vmlinux_warned;
34 bool inherit; 35 bool inherit;
35 bool group; 36 bool group;
36 bool sample_id_all_avail; 37 bool sample_id_all_missing;
37 bool exclude_guest_missing; 38 bool exclude_guest_missing;
38 bool dump_symtab; 39 bool dump_symtab;
39 const char *cpu_list; 40 const char *cpu_list;
@@ -46,6 +47,7 @@ struct perf_top {
46 int realtime_prio; 47 int realtime_prio;
47 int sym_pcnt_filter; 48 int sym_pcnt_filter;
48 const char *sym_filter; 49 const char *sym_filter;
50 const char *uid_str;
49}; 51};
50 52
51size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); 53size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1a8d4dc4f386..a4088ced1e64 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -25,7 +25,6 @@
25#include <stdio.h> 25#include <stdio.h>
26#include <stdlib.h> 26#include <stdlib.h>
27#include <string.h> 27#include <string.h>
28#include <ctype.h>
29#include <errno.h> 28#include <errno.h>
30 29
31#include "../perf.h" 30#include "../perf.h"
@@ -1424,6 +1423,11 @@ static long long arg_num_eval(struct print_arg *arg)
1424 die("unknown op '%s'", arg->op.op); 1423 die("unknown op '%s'", arg->op.op);
1425 } 1424 }
1426 break; 1425 break;
1426 case '+':
1427 left = arg_num_eval(arg->op.left);
1428 right = arg_num_eval(arg->op.right);
1429 val = left + right;
1430 break;
1427 default: 1431 default:
1428 die("unknown op '%s'", arg->op.op); 1432 die("unknown op '%s'", arg->op.op);
1429 } 1433 }
@@ -1484,6 +1488,13 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
1484 1488
1485 free_token(token); 1489 free_token(token);
1486 type = process_arg(event, arg, &token); 1490 type = process_arg(event, arg, &token);
1491
1492 if (type == EVENT_OP)
1493 type = process_op(event, arg, &token);
1494
1495 if (type == EVENT_ERROR)
1496 goto out_free;
1497
1487 if (test_type_token(type, token, EVENT_DELIM, ",")) 1498 if (test_type_token(type, token, EVENT_DELIM, ","))
1488 goto out_free; 1499 goto out_free;
1489 1500
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f55cc3a765a1..b9592e0de8d7 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -33,7 +33,6 @@
33#include <pthread.h> 33#include <pthread.h>
34#include <fcntl.h> 34#include <fcntl.h>
35#include <unistd.h> 35#include <unistd.h>
36#include <ctype.h>
37#include <errno.h> 36#include <errno.h>
38 37
39#include "../perf.h" 38#include "../perf.h"
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index a3fdf55f317b..18ae6c1831d3 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -22,7 +22,6 @@
22#include <stdio.h> 22#include <stdio.h>
23#include <stdlib.h> 23#include <stdlib.h>
24#include <string.h> 24#include <string.h>
25#include <ctype.h>
26#include <errno.h> 25#include <errno.h>
27 26
28#include "../perf.h" 27#include "../perf.h"
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 295a9c93f945..57a4c6ef3fd2 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -69,14 +69,17 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
69 if (!self->navkeypressed) 69 if (!self->navkeypressed)
70 width += 1; 70 width += 1;
71 71
72 if (!ab->hide_src_code && ol->offset != -1)
73 if (!current_entry || (self->use_navkeypressed &&
74 !self->navkeypressed))
75 ui_browser__set_color(self, HE_COLORSET_CODE);
76
72 if (!*ol->line) 77 if (!*ol->line)
73 slsmg_write_nstring(" ", width - 18); 78 slsmg_write_nstring(" ", width - 18);
74 else 79 else
75 slsmg_write_nstring(ol->line, width - 18); 80 slsmg_write_nstring(ol->line, width - 18);
76 81
77 if (!current_entry) 82 if (current_entry)
78 ui_browser__set_color(self, HE_COLORSET_CODE);
79 else
80 ab->selection = ol; 83 ab->selection = ol;
81} 84}
82 85
@@ -230,9 +233,9 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
230 struct rb_node *nd = NULL; 233 struct rb_node *nd = NULL;
231 struct map_symbol *ms = self->b.priv; 234 struct map_symbol *ms = self->b.priv;
232 struct symbol *sym = ms->sym; 235 struct symbol *sym = ms->sym;
233 const char *help = "<-, ESC: exit, TAB/shift+TAB: cycle hottest lines, " 236 const char *help = "<-/ESC: Exit, TAB/shift+TAB: Cycle hot lines, "
234 "H: Hottest, -> Line action, S -> Toggle source " 237 "H: Go to hottest line, ->/ENTER: Line action, "
235 "code view"; 238 "S: Toggle source code view";
236 int key; 239 int key;
237 240
238 if (ui_browser__show(&self->b, sym->name, help) < 0) 241 if (ui_browser__show(&self->b, sym->name, help) < 0)
@@ -284,9 +287,11 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
284 nd = self->curr_hot; 287 nd = self->curr_hot;
285 break; 288 break;
286 case 'H': 289 case 'H':
290 case 'h':
287 nd = self->curr_hot; 291 nd = self->curr_hot;
288 break; 292 break;
289 case 'S': 293 case 'S':
294 case 's':
290 if (annotate_browser__toggle_source(self)) 295 if (annotate_browser__toggle_source(self))
291 ui_helpline__puts(help); 296 ui_helpline__puts(help);
292 continue; 297 continue;
@@ -338,6 +343,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
338 pthread_mutex_unlock(&notes->lock); 343 pthread_mutex_unlock(&notes->lock);
339 symbol__tui_annotate(target, ms->map, evidx, 344 symbol__tui_annotate(target, ms->map, evidx,
340 timer, arg, delay_secs); 345 timer, arg, delay_secs);
346 ui_browser__show_title(&self->b, sym->name);
341 } 347 }
342 continue; 348 continue;
343 case K_LEFT: 349 case K_LEFT:
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index bb9197c9c4a4..fa530fcc764a 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
805 self->hists = hists; 805 self->hists = hists;
806 self->b.refresh = hist_browser__refresh; 806 self->b.refresh = hist_browser__refresh;
807 self->b.seek = ui_browser__hists_seek; 807 self->b.seek = ui_browser__hists_seek;
808 self->b.use_navkeypressed = true, 808 self->b.use_navkeypressed = true;
809 self->has_symbols = sort_sym.list.next != NULL; 809 if (sort__branch_mode == 1)
810 self->has_symbols = sort_sym_from.list.next != NULL;
811 else
812 self->has_symbols = sort_sym.list.next != NULL;
810 } 813 }
811 814
812 return self; 815 return self;
@@ -839,6 +842,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
839 nr_events = convert_unit(nr_events, &unit); 842 nr_events = convert_unit(nr_events, &unit);
840 printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); 843 printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
841 844
845 if (self->uid_filter_str)
846 printed += snprintf(bf + printed, size - printed,
847 ", UID: %s", self->uid_filter_str);
842 if (thread) 848 if (thread)
843 printed += scnprintf(bf + printed, size - printed, 849 printed += scnprintf(bf + printed, size - printed,
844 ", Thread: %s(%d)", 850 ", Thread: %s(%d)",
@@ -850,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
850 return printed; 856 return printed;
851} 857}
852 858
859static inline void free_popup_options(char **options, int n)
860{
861 int i;
862
863 for (i = 0; i < n; ++i) {
864 free(options[i]);
865 options[i] = NULL;
866 }
867}
868
853static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, 869static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
854 const char *helpline, const char *ev_name, 870 const char *helpline, const char *ev_name,
855 bool left_exits, 871 bool left_exits,
@@ -858,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
858{ 874{
859 struct hists *self = &evsel->hists; 875 struct hists *self = &evsel->hists;
860 struct hist_browser *browser = hist_browser__new(self); 876 struct hist_browser *browser = hist_browser__new(self);
877 struct branch_info *bi;
861 struct pstack *fstack; 878 struct pstack *fstack;
879 char *options[16];
880 int nr_options = 0;
862 int key = -1; 881 int key = -1;
863 882
864 if (browser == NULL) 883 if (browser == NULL)
@@ -870,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
870 889
871 ui_helpline__push(helpline); 890 ui_helpline__push(helpline);
872 891
892 memset(options, 0, sizeof(options));
893
873 while (1) { 894 while (1) {
874 const struct thread *thread = NULL; 895 const struct thread *thread = NULL;
875 const struct dso *dso = NULL; 896 const struct dso *dso = NULL;
876 char *options[16]; 897 int choice = 0,
877 int nr_options = 0, choice = 0, i,
878 annotate = -2, zoom_dso = -2, zoom_thread = -2, 898 annotate = -2, zoom_dso = -2, zoom_thread = -2,
879 browse_map = -2; 899 annotate_f = -2, annotate_t = -2, browse_map = -2;
900
901 nr_options = 0;
880 902
881 key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); 903 key = hist_browser__run(browser, ev_name, timer, arg, delay_secs);
882 904
@@ -884,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
884 thread = hist_browser__selected_thread(browser); 906 thread = hist_browser__selected_thread(browser);
885 dso = browser->selection->map ? browser->selection->map->dso : NULL; 907 dso = browser->selection->map ? browser->selection->map->dso : NULL;
886 } 908 }
887
888 switch (key) { 909 switch (key) {
889 case K_TAB: 910 case K_TAB:
890 case K_UNTAB: 911 case K_UNTAB:
@@ -899,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
899 if (!browser->has_symbols) { 920 if (!browser->has_symbols) {
900 ui_browser__warning(&browser->b, delay_secs * 2, 921 ui_browser__warning(&browser->b, delay_secs * 2,
901 "Annotation is only available for symbolic views, " 922 "Annotation is only available for symbolic views, "
902 "include \"sym\" in --sort to use it."); 923 "include \"sym*\" in --sort to use it.");
903 continue; 924 continue;
904 } 925 }
905 926
@@ -969,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
969 if (!browser->has_symbols) 990 if (!browser->has_symbols)
970 goto add_exit_option; 991 goto add_exit_option;
971 992
972 if (browser->selection != NULL && 993 if (sort__branch_mode == 1) {
973 browser->selection->sym != NULL && 994 bi = browser->he_selection->branch_info;
974 !browser->selection->map->dso->annotate_warned && 995 if (browser->selection != NULL &&
975 asprintf(&options[nr_options], "Annotate %s", 996 bi &&
976 browser->selection->sym->name) > 0) 997 bi->from.sym != NULL &&
977 annotate = nr_options++; 998 !bi->from.map->dso->annotate_warned &&
999 asprintf(&options[nr_options], "Annotate %s",
1000 bi->from.sym->name) > 0)
1001 annotate_f = nr_options++;
1002
1003 if (browser->selection != NULL &&
1004 bi &&
1005 bi->to.sym != NULL &&
1006 !bi->to.map->dso->annotate_warned &&
1007 (bi->to.sym != bi->from.sym ||
1008 bi->to.map->dso != bi->from.map->dso) &&
1009 asprintf(&options[nr_options], "Annotate %s",
1010 bi->to.sym->name) > 0)
1011 annotate_t = nr_options++;
1012 } else {
1013
1014 if (browser->selection != NULL &&
1015 browser->selection->sym != NULL &&
1016 !browser->selection->map->dso->annotate_warned &&
1017 asprintf(&options[nr_options], "Annotate %s",
1018 browser->selection->sym->name) > 0)
1019 annotate = nr_options++;
1020 }
978 1021
979 if (thread != NULL && 1022 if (thread != NULL &&
980 asprintf(&options[nr_options], "Zoom %s %s(%d) thread", 1023 asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
@@ -995,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
995 browse_map = nr_options++; 1038 browse_map = nr_options++;
996add_exit_option: 1039add_exit_option:
997 options[nr_options++] = (char *)"Exit"; 1040 options[nr_options++] = (char *)"Exit";
998 1041retry_popup_menu:
999 choice = ui__popup_menu(nr_options, options); 1042 choice = ui__popup_menu(nr_options, options);
1000 1043
1001 for (i = 0; i < nr_options - 1; ++i)
1002 free(options[i]);
1003
1004 if (choice == nr_options - 1) 1044 if (choice == nr_options - 1)
1005 break; 1045 break;
1006 1046
1007 if (choice == -1) 1047 if (choice == -1) {
1048 free_popup_options(options, nr_options - 1);
1008 continue; 1049 continue;
1050 }
1009 1051
1010 if (choice == annotate) { 1052 if (choice == annotate || choice == annotate_t || choice == annotate_f) {
1011 struct hist_entry *he; 1053 struct hist_entry *he;
1012 int err; 1054 int err;
1013do_annotate: 1055do_annotate:
1014 he = hist_browser__selected_entry(browser); 1056 he = hist_browser__selected_entry(browser);
1015 if (he == NULL) 1057 if (he == NULL)
1016 continue; 1058 continue;
1059
1060 /*
1061 * we stash the branch_info symbol + map into the
1062 * the ms so we don't have to rewrite all the annotation
1063 * code to use branch_info.
1064 * in branch mode, the ms struct is not used
1065 */
1066 if (choice == annotate_f) {
1067 he->ms.sym = he->branch_info->from.sym;
1068 he->ms.map = he->branch_info->from.map;
1069 } else if (choice == annotate_t) {
1070 he->ms.sym = he->branch_info->to.sym;
1071 he->ms.map = he->branch_info->to.map;
1072 }
1073
1017 /* 1074 /*
1018 * Don't let this be freed, say, by hists__decay_entry. 1075 * Don't let this be freed, say, by hists__decay_entry.
1019 */ 1076 */
@@ -1021,9 +1078,18 @@ do_annotate:
1021 err = hist_entry__tui_annotate(he, evsel->idx, 1078 err = hist_entry__tui_annotate(he, evsel->idx,
1022 timer, arg, delay_secs); 1079 timer, arg, delay_secs);
1023 he->used = false; 1080 he->used = false;
1081 /*
1082 * offer option to annotate the other branch source or target
1083 * (if they exists) when returning from annotate
1084 */
1085 if ((err == 'q' || err == CTRL('c'))
1086 && annotate_t != -2 && annotate_f != -2)
1087 goto retry_popup_menu;
1088
1024 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); 1089 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
1025 if (err) 1090 if (err)
1026 ui_browser__handle_resize(&browser->b); 1091 ui_browser__handle_resize(&browser->b);
1092
1027 } else if (choice == browse_map) 1093 } else if (choice == browse_map)
1028 map__browse(browser->selection->map); 1094 map__browse(browser->selection->map);
1029 else if (choice == zoom_dso) { 1095 else if (choice == zoom_dso) {
@@ -1069,6 +1135,7 @@ out_free_stack:
1069 pstack__delete(fstack); 1135 pstack__delete(fstack);
1070out: 1136out:
1071 hist_browser__delete(browser); 1137 hist_browser__delete(browser);
1138 free_popup_options(options, nr_options - 1);
1072 return key; 1139 return key;
1073} 1140}
1074 1141
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 6905bcc8be2d..eca6575abfd0 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -3,9 +3,9 @@
3#include <newt.h> 3#include <newt.h>
4#include <inttypes.h> 4#include <inttypes.h>
5#include <sys/ttydefaults.h> 5#include <sys/ttydefaults.h>
6#include <ctype.h>
7#include <string.h> 6#include <string.h>
8#include <linux/bitops.h> 7#include <linux/bitops.h>
8#include "../../util.h"
9#include "../../debug.h" 9#include "../../debug.h"
10#include "../../symbol.h" 10#include "../../symbol.h"
11#include "../browser.h" 11#include "../browser.h"
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index d76d1c0ff98f..52bb07c6442a 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -7,6 +7,7 @@
7 * Copyright (C) Linus Torvalds, 2005 7 * Copyright (C) Linus Torvalds, 2005
8 */ 8 */
9#include "util.h" 9#include "util.h"
10#include "debug.h"
10 11
11static void report(const char *prefix, const char *err, va_list params) 12static void report(const char *prefix, const char *err, va_list params)
12{ 13{
@@ -81,3 +82,41 @@ void warning(const char *warn, ...)
81 warn_routine(warn, params); 82 warn_routine(warn, params);
82 va_end(params); 83 va_end(params);
83} 84}
85
86uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
87{
88 struct passwd pwd, *result;
89 char buf[1024];
90
91 if (str == NULL)
92 return UINT_MAX;
93
94 /* UID and PID are mutually exclusive */
95 if (tid || pid) {
96 ui__warning("PID/TID switch overriding UID\n");
97 sleep(1);
98 return UINT_MAX;
99 }
100
101 getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
102
103 if (result == NULL) {
104 char *endptr;
105 int uid = strtol(str, &endptr, 10);
106
107 if (*endptr != '\0') {
108 ui__error("Invalid user %s\n", str);
109 return UINT_MAX - 1;
110 }
111
112 getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
113
114 if (result == NULL) {
115 ui__error("Problems obtaining information for user %s\n",
116 str);
117 return UINT_MAX - 1;
118 }
119 }
120
121 return result->pw_uid;
122}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index fb25d1329218..8109a907841e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr)
14 attr->exclude_host = 1; 14 attr->exclude_host = 1;
15 if (!perf_guest) 15 if (!perf_guest)
16 attr->exclude_guest = 1; 16 attr->exclude_guest = 1;
17 /* to capture ABI version */
18 attr->size = sizeof(*attr);
17} 19}
18 20
19int mkdir_p(char *path, mode_t mode) 21int mkdir_p(char *path, mode_t mode)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ecf9898169c8..0f99f394d8e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext)
199#undef isalpha 199#undef isalpha
200#undef isprint 200#undef isprint
201#undef isalnum 201#undef isalnum
202#undef islower
203#undef isupper
202#undef tolower 204#undef tolower
203#undef toupper 205#undef toupper
204 206
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256];
219#define isalpha(x) sane_istest(x,GIT_ALPHA) 221#define isalpha(x) sane_istest(x,GIT_ALPHA)
220#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 222#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
221#define isprint(x) sane_istest(x,GIT_PRINT) 223#define isprint(x) sane_istest(x,GIT_PRINT)
224#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20))
225#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20))
222#define tolower(x) sane_case((unsigned char)(x), 0x20) 226#define tolower(x) sane_case((unsigned char)(x), 0x20)
223#define toupper(x) sane_case((unsigned char)(x), 0) 227#define toupper(x) sane_case((unsigned char)(x), 0)
224 228
@@ -245,6 +249,8 @@ struct perf_event_attr;
245 249
246void event_attr_init(struct perf_event_attr *attr); 250void event_attr_init(struct perf_event_attr *attr);
247 251
252uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
253
248#define _STR(x) #x 254#define _STR(x) #x
249#define STR(x) _STR(x) 255#define STR(x) _STR(x)
250 256