aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:15:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:15:23 -0400
commitf0bb4c0ab064a8aeeffbda1cee380151a594eaab (patch)
tree14d55a89c5db455aa10ff9a96ca14c474a9c4d55
parenta4883ef6af5e513a1e8c2ab9aab721604aa3a4f5 (diff)
parent983433b5812c5cf33a9008fa38c6f9b407fedb76 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel improvements: - watchdog driver improvements by Li Zefan - Power7 CPI stack events related improvements by Sukadev Bhattiprolu - event multiplexing via hrtimers and other improvements by Stephane Eranian - kernel stack use optimization by Andrew Hunter - AMD IOMMU uncore PMU support by Suravee Suthikulpanit - NMI handling rate-limits by Dave Hansen - various hw_breakpoint fixes by Oleg Nesterov - hw_breakpoint overflow period sampling and related signal handling fixes by Jiri Olsa - Intel Haswell PMU support by Andi Kleen Tooling improvements: - Reset SIGTERM handler in workload child process, fix from David Ahern. - Makefile reorganization, prep work for Kconfig patches, from Jiri Olsa. - Add automated make test suite, from Jiri Olsa. - Add --percent-limit option to 'top' and 'report', from Namhyung Kim. - Sorting improvements, from Namhyung Kim. - Expand definition of sysfs format attribute, from Michael Ellerman. Tooling fixes: - 'perf tests' fixes from Jiri Olsa. - Make Power7 CPI stack events available in sysfs, from Sukadev Bhattiprolu. - Handle death by SIGTERM in 'perf record', fix from David Ahern. - Fix printing of perf_event_paranoid message, from David Ahern. - Handle realloc failures in 'perf kvm', from David Ahern. - Fix divide by 0 in variance, from David Ahern. - Save parent pid in thread struct, from David Ahern. - Handle JITed code in shared memory, from Andi Kleen. - Fixes for 'perf diff', from Jiri Olsa. - Remove some unused struct members, from Jiri Olsa. - Add missing liblk.a dependency for python/perf.so, fix from Jiri Olsa. - Respect CROSS_COMPILE in liblk.a, from Rabin Vincent. - No need to do locking when adding hists in perf report, only 'top' needs that, from Namhyung Kim. - Fix alignment of symbol column in in the hists browser (top, report) when -v is given, from NAmhyung Kim. - Fix 'perf top' -E option behavior, from Namhyung Kim. - Fix bug in isupper() and islower(), from Sukadev Bhattiprolu. - Fix compile errors in bp_signal 'perf test', from Sukadev Bhattiprolu. ... and more things" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits) perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable() perf/x86: Fix shared register mutual exclusion enforcement perf/x86/intel: Support full width counting x86: Add NMI duration tracepoints perf: Drop sample rate when sampling is too slow x86: Warn when NMI handlers take large amounts of time hw_breakpoint: Introduce "struct bp_cpuinfo" hw_breakpoint: Simplify *register_wide_hw_breakpoint() hw_breakpoint: Introduce cpumask_of_bp() hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths perf/x86/intel: Add mem-loads/stores support for Haswell perf/x86/intel: Support Haswell/v4 LBR format perf/x86/intel: Move NMI clearing to end of PMI handler perf/x86/intel: Add Haswell PEBS support perf/x86/intel: Add simple Haswell PMU support perf/x86/intel: Add Haswell PEBS record support perf/x86/intel: Fix sparse warning perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation perf/x86/amd: Add IOMMU Performance Counter resource management ...
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-events32
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-format6
-rw-r--r--Documentation/sysctl/kernel.txt50
-rw-r--r--Documentation/trace/events-nmi.txt43
-rw-r--r--arch/metag/kernel/perf/perf_event.c2
-rw-r--r--arch/powerpc/perf/power7-pmu.c73
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h24
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c504
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c136
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c178
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h4
-rw-r--r--arch/x86/kernel/nmi.c36
-rw-r--r--arch/x86/kernel/signal.c16
-rw-r--r--drivers/iommu/amd_iommu_init.c140
-rw-r--r--drivers/iommu/amd_iommu_proto.h7
-rw-r--r--drivers/iommu/amd_iommu_types.h15
-rw-r--r--include/linux/perf_event.h28
-rw-r--r--include/trace/events/nmi.h37
-rw-r--r--include/uapi/linux/perf_event.h5
-rw-r--r--init/main.c2
-rw-r--r--kernel/events/core.c278
-rw-r--r--kernel/events/hw_breakpoint.c191
-rw-r--r--kernel/sysctl.c12
-rw-r--r--tools/lib/lk/Makefile3
-rw-r--r--tools/perf/Documentation/perf-archive.txt2
-rw-r--r--tools/perf/Documentation/perf-report.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt4
-rw-r--r--tools/perf/Makefile630
-rw-r--r--tools/perf/builtin-diff.c19
-rw-r--r--tools/perf/builtin-kvm.c3
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-report.c102
-rw-r--r--tools/perf/builtin-top.c74
-rw-r--r--tools/perf/config/Makefile477
-rw-r--r--tools/perf/tests/attr/base-record4
-rw-r--r--tools/perf/tests/attr/base-stat4
-rw-r--r--tools/perf/tests/attr/test-record-data5
-rw-r--r--tools/perf/tests/bp_signal.c6
-rw-r--r--tools/perf/tests/bp_signal_overflow.c6
-rw-r--r--tools/perf/tests/builtin-test.c2
-rw-r--r--tools/perf/tests/make138
-rw-r--r--tools/perf/ui/browsers/hists.c106
-rw-r--r--tools/perf/ui/gtk/hists.c13
-rw-r--r--tools/perf/ui/stdio/hist.c7
-rw-r--r--tools/perf/util/evlist.c2
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/header.c2
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c96
-rw-r--r--tools/perf/util/hist.h16
-rw-r--r--tools/perf/util/map.c1
-rw-r--r--tools/perf/util/session.h1
-rw-r--r--tools/perf/util/setup.py5
-rw-r--r--tools/perf/util/sort.c128
-rw-r--r--tools/perf/util/sort.h36
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/thread.c4
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/top.c23
-rw-r--r--tools/perf/util/top.h2
-rw-r--r--tools/perf/util/util.h4
71 files changed, 2947 insertions, 1053 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index 0adeb524c0d4..8b25ffb42562 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -27,14 +27,36 @@ Description: Generic performance monitoring events
27 "basename". 27 "basename".
28 28
29 29
30What: /sys/devices/cpu/events/PM_LD_MISS_L1 30What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL
31 /sys/devices/cpu/events/PM_LD_REF_L1
32 /sys/devices/cpu/events/PM_CYC
33 /sys/devices/cpu/events/PM_BRU_FIN 31 /sys/devices/cpu/events/PM_BRU_FIN
34 /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
35 /sys/devices/cpu/events/PM_BRU_MPRED 32 /sys/devices/cpu/events/PM_BRU_MPRED
36 /sys/devices/cpu/events/PM_INST_CMPL
37 /sys/devices/cpu/events/PM_CMPLU_STALL 33 /sys/devices/cpu/events/PM_CMPLU_STALL
34 /sys/devices/cpu/events/PM_CMPLU_STALL_BRU
35 /sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS
36 /sys/devices/cpu/events/PM_CMPLU_STALL_DFU
37 /sys/devices/cpu/events/PM_CMPLU_STALL_DIV
38 /sys/devices/cpu/events/PM_CMPLU_STALL_ERAT_MISS
39 /sys/devices/cpu/events/PM_CMPLU_STALL_FXU
40 /sys/devices/cpu/events/PM_CMPLU_STALL_IFU
41 /sys/devices/cpu/events/PM_CMPLU_STALL_LSU
42 /sys/devices/cpu/events/PM_CMPLU_STALL_REJECT
43 /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR
44 /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR_LONG
45 /sys/devices/cpu/events/PM_CMPLU_STALL_STORE
46 /sys/devices/cpu/events/PM_CMPLU_STALL_THRD
47 /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR
48 /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR_LONG
49 /sys/devices/cpu/events/PM_CYC
50 /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED
51 /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED_IC_MISS
52 /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
53 /sys/devices/cpu/events/PM_GCT_NOSLOT_IC_MISS
54 /sys/devices/cpu/events/PM_GRP_CMPL
55 /sys/devices/cpu/events/PM_INST_CMPL
56 /sys/devices/cpu/events/PM_LD_MISS_L1
57 /sys/devices/cpu/events/PM_LD_REF_L1
58 /sys/devices/cpu/events/PM_RUN_CYC
59 /sys/devices/cpu/events/PM_RUN_INST_CMPL
38 60
39Date: 2013/01/08 61Date: 2013/01/08
40 62
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
index 079afc71363d..77f47ff5ee02 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
@@ -9,6 +9,12 @@ Description:
9 we want to export, so that userspace can deal with sane 9 we want to export, so that userspace can deal with sane
10 name/value pairs. 10 name/value pairs.
11 11
12 Userspace must be prepared for the possibility that attributes
13 define overlapping bit ranges. For example:
14 attr1 = 'config:0-23'
15 attr2 = 'config:0-7'
16 attr3 = 'config:12-35'
17
12 Example: 'config1:1,6-10,44' 18 Example: 'config1:1,6-10,44'
13 Defines contents of attribute that occupies bits 1,6-10,44 of 19 Defines contents of attribute that occupies bits 1,6-10,44 of
14 perf_event_attr::config1. 20 perf_event_attr::config1.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index ccd42589e124..ab7d16efa96b 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -70,12 +70,12 @@ show up in /proc/sys/kernel:
70- shmall 70- shmall
71- shmmax [ sysv ipc ] 71- shmmax [ sysv ipc ]
72- shmmni 72- shmmni
73- softlockup_thresh
74- stop-a [ SPARC only ] 73- stop-a [ SPARC only ]
75- sysrq ==> Documentation/sysrq.txt 74- sysrq ==> Documentation/sysrq.txt
76- tainted 75- tainted
77- threads-max 76- threads-max
78- unknown_nmi_panic 77- unknown_nmi_panic
78- watchdog_thresh
79- version 79- version
80 80
81============================================================== 81==============================================================
@@ -427,6 +427,32 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
427 427
428============================================================== 428==============================================================
429 429
430perf_cpu_time_max_percent:
431
432Hints to the kernel how much CPU time it should be allowed to
433use to handle perf sampling events. If the perf subsystem
434is informed that its samples are exceeding this limit, it
435will drop its sampling frequency to attempt to reduce its CPU
436usage.
437
438Some perf sampling happens in NMIs. If these samples
439unexpectedly take too long to execute, the NMIs can become
440stacked up next to each other so much that nothing else is
441allowed to execute.
442
4430: disable the mechanism. Do not monitor or correct perf's
444 sampling rate no matter how CPU time it takes.
445
4461-100: attempt to throttle perf's sample rate to this
447 percentage of CPU. Note: the kernel calculates an
448 "expected" length of each sample event. 100 here means
449 100% of that expected length. Even if this is set to
450 100, you may still see sample throttling if this
451 length is exceeded. Set to 0 if you truly do not care
452 how much CPU is consumed.
453
454==============================================================
455
430 456
431pid_max: 457pid_max:
432 458
@@ -604,15 +630,6 @@ without users and with a dead originative process will be destroyed.
604 630
605============================================================== 631==============================================================
606 632
607softlockup_thresh:
608
609This value can be used to lower the softlockup tolerance threshold. The
610default threshold is 60 seconds. If a cpu is locked up for 60 seconds,
611the kernel complains. Valid values are 1-60 seconds. Setting this
612tunable to zero will disable the softlockup detection altogether.
613
614==============================================================
615
616tainted: 633tainted:
617 634
618Non-zero if the kernel has been tainted. Numeric values, which 635Non-zero if the kernel has been tainted. Numeric values, which
@@ -648,3 +665,16 @@ that time, kernel debugging information is displayed on console.
648 665
649NMI switch that most IA32 servers have fires unknown NMI up, for 666NMI switch that most IA32 servers have fires unknown NMI up, for
650example. If a system hangs up, try pressing the NMI switch. 667example. If a system hangs up, try pressing the NMI switch.
668
669==============================================================
670
671watchdog_thresh:
672
673This value can be used to control the frequency of hrtimer and NMI
674events and the soft and hard lockup thresholds. The default threshold
675is 10 seconds.
676
677The softlockup threshold is (2 * watchdog_thresh). Setting this
678tunable to zero will disable lockup detection altogether.
679
680==============================================================
diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt
new file mode 100644
index 000000000000..c03c8c89f08d
--- /dev/null
+++ b/Documentation/trace/events-nmi.txt
@@ -0,0 +1,43 @@
1NMI Trace Events
2
3These events normally show up here:
4
5 /sys/kernel/debug/tracing/events/nmi
6
7--
8
9nmi_handler:
10
11You might want to use this tracepoint if you suspect that your
12NMI handlers are hogging large amounts of CPU time. The kernel
13will warn if it sees long-running handlers:
14
15 INFO: NMI handler took too long to run: 9.207 msecs
16
17and this tracepoint will allow you to drill down and get some
18more details.
19
20Let's say you suspect that perf_event_nmi_handler() is causing
21you some problems and you only want to trace that handler
22specifically. You need to find its address:
23
24 $ grep perf_event_nmi_handler /proc/kallsyms
25 ffffffff81625600 t perf_event_nmi_handler
26
27Let's also say you are only interested in when that function is
28really hogging a lot of CPU time, like a millisecond at a time.
29Note that the kernel's output is in milliseconds, but the input
30to the filter is in nanoseconds! You can filter on 'delta_ns':
31
32cd /sys/kernel/debug/tracing/events/nmi/nmi_handler
33echo 'handler==0xffffffff81625600 && delta_ns>1000000' > filter
34echo 1 > enable
35
36Your output would then look like:
37
38$ cat /sys/kernel/debug/tracing/trace_pipe
39<idle>-0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1
40<idle>-0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1
41<idle>-0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1
42<idle>-0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1
43
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
index 366569425c52..5b18888ee364 100644
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -882,7 +882,7 @@ static int __init init_hw_perf_events(void)
882 } 882 }
883 883
884 register_cpu_notifier(&metag_pmu_notifier); 884 register_cpu_notifier(&metag_pmu_notifier);
885 ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW); 885 ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
886out: 886out:
887 return ret; 887 return ret;
888} 888}
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 3c475d6267c7..13c3f0e547a2 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -62,6 +62,29 @@
62#define PME_PM_BRU_FIN 0x10068 62#define PME_PM_BRU_FIN 0x10068
63#define PME_PM_BRU_MPRED 0x400f6 63#define PME_PM_BRU_MPRED 0x400f6
64 64
65#define PME_PM_CMPLU_STALL_FXU 0x20014
66#define PME_PM_CMPLU_STALL_DIV 0x40014
67#define PME_PM_CMPLU_STALL_SCALAR 0x40012
68#define PME_PM_CMPLU_STALL_SCALAR_LONG 0x20018
69#define PME_PM_CMPLU_STALL_VECTOR 0x2001c
70#define PME_PM_CMPLU_STALL_VECTOR_LONG 0x4004a
71#define PME_PM_CMPLU_STALL_LSU 0x20012
72#define PME_PM_CMPLU_STALL_REJECT 0x40016
73#define PME_PM_CMPLU_STALL_ERAT_MISS 0x40018
74#define PME_PM_CMPLU_STALL_DCACHE_MISS 0x20016
75#define PME_PM_CMPLU_STALL_STORE 0x2004a
76#define PME_PM_CMPLU_STALL_THRD 0x1001c
77#define PME_PM_CMPLU_STALL_IFU 0x4004c
78#define PME_PM_CMPLU_STALL_BRU 0x4004e
79#define PME_PM_GCT_NOSLOT_IC_MISS 0x2001a
80#define PME_PM_GCT_NOSLOT_BR_MPRED 0x4001a
81#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS 0x4001c
82#define PME_PM_GRP_CMPL 0x30004
83#define PME_PM_1PLUS_PPC_CMPL 0x100f2
84#define PME_PM_CMPLU_STALL_DFU 0x2003c
85#define PME_PM_RUN_CYC 0x200f4
86#define PME_PM_RUN_INST_CMPL 0x400fa
87
65/* 88/*
66 * Layout of constraint bits: 89 * Layout of constraint bits:
67 * 6666555555555544444444443333333333222222222211111111110000000000 90 * 6666555555555544444444443333333333222222222211111111110000000000
@@ -393,6 +416,31 @@ POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
393POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) 416POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
394POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); 417POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED);
395 418
419POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU);
420POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV);
421POWER_EVENT_ATTR(CMPLU_STALL_SCALAR, CMPLU_STALL_SCALAR);
422POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG, CMPLU_STALL_SCALAR_LONG);
423POWER_EVENT_ATTR(CMPLU_STALL_VECTOR, CMPLU_STALL_VECTOR);
424POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG, CMPLU_STALL_VECTOR_LONG);
425POWER_EVENT_ATTR(CMPLU_STALL_LSU, CMPLU_STALL_LSU);
426POWER_EVENT_ATTR(CMPLU_STALL_REJECT, CMPLU_STALL_REJECT);
427
428POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS, CMPLU_STALL_ERAT_MISS);
429POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS, CMPLU_STALL_DCACHE_MISS);
430POWER_EVENT_ATTR(CMPLU_STALL_STORE, CMPLU_STALL_STORE);
431POWER_EVENT_ATTR(CMPLU_STALL_THRD, CMPLU_STALL_THRD);
432POWER_EVENT_ATTR(CMPLU_STALL_IFU, CMPLU_STALL_IFU);
433POWER_EVENT_ATTR(CMPLU_STALL_BRU, CMPLU_STALL_BRU);
434POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS, GCT_NOSLOT_IC_MISS);
435
436POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED, GCT_NOSLOT_BR_MPRED);
437POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS, GCT_NOSLOT_BR_MPRED_IC_MISS);
438POWER_EVENT_ATTR(GRP_CMPL, GRP_CMPL);
439POWER_EVENT_ATTR(1PLUS_PPC_CMPL, 1PLUS_PPC_CMPL);
440POWER_EVENT_ATTR(CMPLU_STALL_DFU, CMPLU_STALL_DFU);
441POWER_EVENT_ATTR(RUN_CYC, RUN_CYC);
442POWER_EVENT_ATTR(RUN_INST_CMPL, RUN_INST_CMPL);
443
396static struct attribute *power7_events_attr[] = { 444static struct attribute *power7_events_attr[] = {
397 GENERIC_EVENT_PTR(CYC), 445 GENERIC_EVENT_PTR(CYC),
398 GENERIC_EVENT_PTR(GCT_NOSLOT_CYC), 446 GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
@@ -411,6 +459,31 @@ static struct attribute *power7_events_attr[] = {
411 POWER_EVENT_PTR(LD_MISS_L1), 459 POWER_EVENT_PTR(LD_MISS_L1),
412 POWER_EVENT_PTR(BRU_FIN), 460 POWER_EVENT_PTR(BRU_FIN),
413 POWER_EVENT_PTR(BRU_MPRED), 461 POWER_EVENT_PTR(BRU_MPRED),
462
463 POWER_EVENT_PTR(CMPLU_STALL_FXU),
464 POWER_EVENT_PTR(CMPLU_STALL_DIV),
465 POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
466 POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
467 POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
468 POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
469 POWER_EVENT_PTR(CMPLU_STALL_LSU),
470 POWER_EVENT_PTR(CMPLU_STALL_REJECT),
471
472 POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
473 POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
474 POWER_EVENT_PTR(CMPLU_STALL_STORE),
475 POWER_EVENT_PTR(CMPLU_STALL_THRD),
476 POWER_EVENT_PTR(CMPLU_STALL_IFU),
477 POWER_EVENT_PTR(CMPLU_STALL_BRU),
478 POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
479 POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
480
481 POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
482 POWER_EVENT_PTR(GRP_CMPL),
483 POWER_EVENT_PTR(1PLUS_PPC_CMPL),
484 POWER_EVENT_PTR(CMPLU_STALL_DFU),
485 POWER_EVENT_PTR(RUN_CYC),
486 POWER_EVENT_PTR(RUN_INST_CMPL),
414 NULL 487 NULL
415}; 488};
416 489
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index cf1a471a18a2..bccfca68430e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -34,8 +34,6 @@
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35#include <asm/smap.h> 35#include <asm/smap.h>
36 36
37#define FIX_EFLAGS __FIX_EFLAGS
38
39int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 37int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
40{ 38{
41 int err = 0; 39 int err = 0;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 57cb63402213..8249df45d2f2 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define HSW_IN_TX (1ULL << 32)
33#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
34
32#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) 35#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
33#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) 36#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
34#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) 37#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index beff97f7df37..7a958164088c 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,10 +7,10 @@
7 7
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9 9
10#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ 10#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
11 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ 11 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
12 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ 12 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
13 X86_EFLAGS_CF) 13 X86_EFLAGS_CF | X86_EFLAGS_RF)
14 14
15void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 15void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
16 16
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 2af848dfa754..bb0465090ae5 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -170,6 +170,9 @@
170#define MSR_KNC_EVNTSEL0 0x00000028 170#define MSR_KNC_EVNTSEL0 0x00000028
171#define MSR_KNC_EVNTSEL1 0x00000029 171#define MSR_KNC_EVNTSEL1 0x00000029
172 172
173/* Alternative perfctr range with full access. */
174#define MSR_IA32_PMC0 0x000004c1
175
173/* AMD64 MSRs. Not complete. See the architecture manual for a more 176/* AMD64 MSRs. Not complete. See the architecture manual for a more
174 complete list. */ 177 complete list. */
175 178
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4a73aa..47b56a7e99cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
31 31
32ifdef CONFIG_PERF_EVENTS 32ifdef CONFIG_PERF_EVENTS
33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o 33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
34ifdef CONFIG_AMD_IOMMU
35obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
36endif
34obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o 37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 38obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o 39obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
37endif 40endif
38 41
42
39obj-$(CONFIG_X86_MCE) += mcheck/ 43obj-$(CONFIG_X86_MCE) += mcheck/
40obj-$(CONFIG_MTRR) += mtrr/ 44obj-$(CONFIG_MTRR) += mtrr/
41 45
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c99d20..9e581c5cf6d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
403 * check that PEBS LBR correction does not conflict with 403 * check that PEBS LBR correction does not conflict with
404 * whatever the user is asking with attr->branch_sample_type 404 * whatever the user is asking with attr->branch_sample_type
405 */ 405 */
406 if (event->attr.precise_ip > 1) { 406 if (event->attr.precise_ip > 1 &&
407 x86_pmu.intel_cap.pebs_format < 2) {
407 u64 *br_type = &event->attr.branch_sample_type; 408 u64 *br_type = &event->attr.branch_sample_type;
408 409
409 if (has_branch_stack(event)) { 410 if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
568struct perf_sched { 569struct perf_sched {
569 int max_weight; 570 int max_weight;
570 int max_events; 571 int max_events;
571 struct event_constraint **constraints; 572 struct perf_event **events;
572 struct sched_state state; 573 struct sched_state state;
573 int saved_states; 574 int saved_states;
574 struct sched_state saved[SCHED_STATES_MAX]; 575 struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
577/* 578/*
578 * Initialize interator that runs through all events and counters. 579 * Initialize interator that runs through all events and counters.
579 */ 580 */
580static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 581static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
581 int num, int wmin, int wmax) 582 int num, int wmin, int wmax)
582{ 583{
583 int idx; 584 int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
585 memset(sched, 0, sizeof(*sched)); 586 memset(sched, 0, sizeof(*sched));
586 sched->max_events = num; 587 sched->max_events = num;
587 sched->max_weight = wmax; 588 sched->max_weight = wmax;
588 sched->constraints = c; 589 sched->events = events;
589 590
590 for (idx = 0; idx < num; idx++) { 591 for (idx = 0; idx < num; idx++) {
591 if (c[idx]->weight == wmin) 592 if (events[idx]->hw.constraint->weight == wmin)
592 break; 593 break;
593 } 594 }
594 595
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
635 if (sched->state.event >= sched->max_events) 636 if (sched->state.event >= sched->max_events)
636 return false; 637 return false;
637 638
638 c = sched->constraints[sched->state.event]; 639 c = sched->events[sched->state.event]->hw.constraint;
639
640 /* Prefer fixed purpose counters */ 640 /* Prefer fixed purpose counters */
641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { 641 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
642 idx = INTEL_PMC_IDX_FIXED; 642 idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
694 if (sched->state.weight > sched->max_weight) 694 if (sched->state.weight > sched->max_weight)
695 return false; 695 return false;
696 } 696 }
697 c = sched->constraints[sched->state.event]; 697 c = sched->events[sched->state.event]->hw.constraint;
698 } while (c->weight != sched->state.weight); 698 } while (c->weight != sched->state.weight);
699 699
700 sched->state.counter = 0; /* start with first counter */ 700 sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
705/* 705/*
706 * Assign a counter for each event. 706 * Assign a counter for each event.
707 */ 707 */
708int perf_assign_events(struct event_constraint **constraints, int n, 708int perf_assign_events(struct perf_event **events, int n,
709 int wmin, int wmax, int *assign) 709 int wmin, int wmax, int *assign)
710{ 710{
711 struct perf_sched sched; 711 struct perf_sched sched;
712 712
713 perf_sched_init(&sched, constraints, n, wmin, wmax); 713 perf_sched_init(&sched, events, n, wmin, wmax);
714 714
715 do { 715 do {
716 if (!perf_sched_find_counter(&sched)) 716 if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
724 724
725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
726{ 726{
727 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 727 struct event_constraint *c;
728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 728 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
729 struct perf_event *e;
729 int i, wmin, wmax, num = 0; 730 int i, wmin, wmax, num = 0;
730 struct hw_perf_event *hwc; 731 struct hw_perf_event *hwc;
731 732
732 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 733 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
733 734
734 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 735 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
736 hwc = &cpuc->event_list[i]->hw;
735 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 737 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
736 constraints[i] = c; 738 hwc->constraint = c;
739
737 wmin = min(wmin, c->weight); 740 wmin = min(wmin, c->weight);
738 wmax = max(wmax, c->weight); 741 wmax = max(wmax, c->weight);
739 } 742 }
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
743 */ 746 */
744 for (i = 0; i < n; i++) { 747 for (i = 0; i < n; i++) {
745 hwc = &cpuc->event_list[i]->hw; 748 hwc = &cpuc->event_list[i]->hw;
746 c = constraints[i]; 749 c = hwc->constraint;
747 750
748 /* never assigned */ 751 /* never assigned */
749 if (hwc->idx == -1) 752 if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
764 767
765 /* slow path */ 768 /* slow path */
766 if (i != n) 769 if (i != n)
767 num = perf_assign_events(constraints, n, wmin, wmax, assign); 770 num = perf_assign_events(cpuc->event_list, n, wmin,
771 wmax, assign);
768 772
769 /* 773 /*
774 * Mark the event as committed, so we do not put_constraint()
775 * in case new events are added and fail scheduling.
776 */
777 if (!num && assign) {
778 for (i = 0; i < n; i++) {
779 e = cpuc->event_list[i];
780 e->hw.flags |= PERF_X86_EVENT_COMMITTED;
781 }
782 }
783 /*
770 * scheduling failed or is just a simulation, 784 * scheduling failed or is just a simulation,
771 * free resources if necessary 785 * free resources if necessary
772 */ 786 */
773 if (!assign || num) { 787 if (!assign || num) {
774 for (i = 0; i < n; i++) { 788 for (i = 0; i < n; i++) {
789 e = cpuc->event_list[i];
790 /*
791 * do not put_constraint() on comitted events,
792 * because they are good to go
793 */
794 if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
795 continue;
796
775 if (x86_pmu.put_event_constraints) 797 if (x86_pmu.put_event_constraints)
776 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); 798 x86_pmu.put_event_constraints(cpuc, e);
777 } 799 }
778 } 800 }
779 return num ? -EINVAL : 0; 801 return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1153 int i; 1175 int i;
1154 1176
1155 /* 1177 /*
1178 * event is descheduled
1179 */
1180 event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
1181
1182 /*
1156 * If we're called during a txn, we don't need to do anything. 1183 * If we're called during a txn, we don't need to do anything.
1157 * The events never got scheduled and ->cancel_txn will truncate 1184 * The events never got scheduled and ->cancel_txn will truncate
1158 * the event_list. 1185 * the event_list.
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
1249static int __kprobes 1276static int __kprobes
1250perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1251{ 1278{
1279 int ret;
1280 u64 start_clock;
1281 u64 finish_clock;
1282
1252 if (!atomic_read(&active_events)) 1283 if (!atomic_read(&active_events))
1253 return NMI_DONE; 1284 return NMI_DONE;
1254 1285
1255 return x86_pmu.handle_irq(regs); 1286 start_clock = local_clock();
1287 ret = x86_pmu.handle_irq(regs);
1288 finish_clock = local_clock();
1289
1290 perf_sample_event_took(finish_clock - start_clock);
1291
1292 return ret;
1256} 1293}
1257 1294
1258struct event_constraint emptyconstraint; 1295struct event_constraint emptyconstraint;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadfa683b..97e557bc4c91 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
63 int flags; 63 int flags;
64}; 64};
65/* 65/*
66 * struct event_constraint flags 66 * struct hw_perf_event.flags flags
67 */ 67 */
68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ 68#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ 69#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
70#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
71#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
70 72
71struct amd_nb { 73struct amd_nb {
72 int nb_id; /* NorthBridge id */ 74 int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
227 * - inv 229 * - inv
228 * - edge 230 * - edge
229 * - cnt-mask 231 * - cnt-mask
232 * - in_tx
233 * - in_tx_checkpointed
230 * The other filters are supported by fixed counters. 234 * The other filters are supported by fixed counters.
231 * The any-thread option is supported starting with v3. 235 * The any-thread option is supported starting with v3.
232 */ 236 */
237#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
233#define FIXED_EVENT_CONSTRAINT(c, n) \ 238#define FIXED_EVENT_CONSTRAINT(c, n) \
234 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) 239 EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
235 240
236/* 241/*
237 * Constraint on the Event code + UMask 242 * Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
247 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 252 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
248 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) 253 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
249 254
255/* DataLA version of store sampling without extra enable bit. */
256#define INTEL_PST_HSW_CONSTRAINT(c, n) \
257 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
258 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
259
250#define EVENT_CONSTRAINT_END \ 260#define EVENT_CONSTRAINT_END \
251 EVENT_CONSTRAINT(0, 0, 0) 261 EVENT_CONSTRAINT(0, 0, 0)
252 262
@@ -301,6 +311,11 @@ union perf_capabilities {
301 u64 pebs_arch_reg:1; 311 u64 pebs_arch_reg:1;
302 u64 pebs_format:4; 312 u64 pebs_format:4;
303 u64 smm_freeze:1; 313 u64 smm_freeze:1;
314 /*
315 * PMU supports separate counter range for writing
316 * values > 32bit.
317 */
318 u64 full_width_write:1;
304 }; 319 };
305 u64 capabilities; 320 u64 capabilities;
306}; 321};
@@ -375,6 +390,7 @@ struct x86_pmu {
375 struct event_constraint *event_constraints; 390 struct event_constraint *event_constraints;
376 struct x86_pmu_quirk *quirks; 391 struct x86_pmu_quirk *quirks;
377 int perfctr_second_write; 392 int perfctr_second_write;
393 bool late_ack;
378 394
379 /* 395 /*
380 * sysfs attrs 396 * sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
528 544
529void x86_pmu_enable_all(int added); 545void x86_pmu_enable_all(int added);
530 546
531int perf_assign_events(struct event_constraint **constraints, int n, 547int perf_assign_events(struct perf_event **events, int n,
532 int wmin, int wmax, int *assign); 548 int wmin, int wmax, int *assign);
533int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 549int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
534 550
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
633 649
634extern struct event_constraint intel_ivb_pebs_event_constraints[]; 650extern struct event_constraint intel_ivb_pebs_event_constraints[];
635 651
652extern struct event_constraint intel_hsw_pebs_event_constraints[];
653
636struct event_constraint *intel_pebs_constraints(struct perf_event *event); 654struct event_constraint *intel_pebs_constraints(struct perf_event *event);
637 655
638void intel_pmu_pebs_enable(struct perf_event *event); 656void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d9467bb4..4cbe03287b08 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
648 .cpu_dead = amd_pmu_cpu_dead, 648 .cpu_dead = amd_pmu_cpu_dead,
649}; 649};
650 650
651static int setup_event_constraints(void) 651static int __init amd_core_pmu_init(void)
652{ 652{
653 if (boot_cpu_data.x86 == 0x15) 653 if (!cpu_has_perfctr_core)
654 return 0;
655
656 switch (boot_cpu_data.x86) {
657 case 0x15:
658 pr_cont("Fam15h ");
654 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 659 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
655 return 0; 660 break;
656}
657 661
658static int setup_perfctr_core(void) 662 default:
659{ 663 pr_err("core perfctr but no constraints; unknown hardware!\n");
660 if (!cpu_has_perfctr_core) {
661 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
662 KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
663 return -ENODEV; 664 return -ENODEV;
664 } 665 }
665 666
666 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
667 KERN_ERR "hw perf events core counters need constraints handler!");
668
669 /* 667 /*
670 * If core performance counter extensions exists, we must use 668 * If core performance counter extensions exists, we must use
671 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 669 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
672 * x86_pmu_addr_offset(). 670 * amd_pmu_addr_offset().
673 */ 671 */
674 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 672 x86_pmu.eventsel = MSR_F15H_PERF_CTL;
675 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 673 x86_pmu.perfctr = MSR_F15H_PERF_CTR;
676 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 674 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
677 675
678 printk(KERN_INFO "perf: AMD core performance counters detected\n"); 676 pr_cont("core perfctr, ");
679
680 return 0; 677 return 0;
681} 678}
682 679
683__init int amd_pmu_init(void) 680__init int amd_pmu_init(void)
684{ 681{
682 int ret;
683
685 /* Performance-monitoring supported from K7 and later: */ 684 /* Performance-monitoring supported from K7 and later: */
686 if (boot_cpu_data.x86 < 6) 685 if (boot_cpu_data.x86 < 6)
687 return -ENODEV; 686 return -ENODEV;
688 687
689 x86_pmu = amd_pmu; 688 x86_pmu = amd_pmu;
690 689
691 setup_event_constraints(); 690 ret = amd_core_pmu_init();
692 setup_perfctr_core(); 691 if (ret)
692 return ret;
693 693
694 /* Events are common for all AMDs */ 694 /* Events are common for all AMDs */
695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 695 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 000000000000..0db655ef3918
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,504 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/perf_event.h>
15#include <linux/module.h>
16#include <linux/cpumask.h>
17#include <linux/slab.h>
18
19#include "perf_event.h"
20#include "perf_event_amd_iommu.h"
21
22#define COUNTER_SHIFT 16
23
24#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
25#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
26
27/* iommu pmu config masks */
28#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
29#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
30#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
31#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
32#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
33#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
34#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
35
36static struct perf_amd_iommu __perf_iommu;
37
38struct perf_amd_iommu {
39 struct pmu pmu;
40 u8 max_banks;
41 u8 max_counters;
42 u64 cntr_assign_mask;
43 raw_spinlock_t lock;
44 const struct attribute_group *attr_groups[4];
45};
46
47#define format_group attr_groups[0]
48#define cpumask_group attr_groups[1]
49#define events_group attr_groups[2]
50#define null_group attr_groups[3]
51
52/*---------------------------------------------
53 * sysfs format attributes
54 *---------------------------------------------*/
55PMU_FORMAT_ATTR(csource, "config:0-7");
56PMU_FORMAT_ATTR(devid, "config:8-23");
57PMU_FORMAT_ATTR(pasid, "config:24-39");
58PMU_FORMAT_ATTR(domid, "config:40-55");
59PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
60PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
61PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
62
63static struct attribute *iommu_format_attrs[] = {
64 &format_attr_csource.attr,
65 &format_attr_devid.attr,
66 &format_attr_pasid.attr,
67 &format_attr_domid.attr,
68 &format_attr_devid_mask.attr,
69 &format_attr_pasid_mask.attr,
70 &format_attr_domid_mask.attr,
71 NULL,
72};
73
74static struct attribute_group amd_iommu_format_group = {
75 .name = "format",
76 .attrs = iommu_format_attrs,
77};
78
79/*---------------------------------------------
80 * sysfs events attributes
81 *---------------------------------------------*/
82struct amd_iommu_event_desc {
83 struct kobj_attribute attr;
84 const char *event;
85};
86
87static ssize_t _iommu_event_show(struct kobject *kobj,
88 struct kobj_attribute *attr, char *buf)
89{
90 struct amd_iommu_event_desc *event =
91 container_of(attr, struct amd_iommu_event_desc, attr);
92 return sprintf(buf, "%s\n", event->event);
93}
94
95#define AMD_IOMMU_EVENT_DESC(_name, _event) \
96{ \
97 .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
98 .event = _event, \
99}
100
101static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
102 AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
103 AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
104 AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
105 AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
106 AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
107 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
108 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
109 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
110 AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
111 AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
112 AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
113 AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
114 AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
115 AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
116 AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
117 AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
118 AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
119 AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
120 AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
121 { /* end: all zeroes */ },
122};
123
124/*---------------------------------------------
125 * sysfs cpumask attributes
126 *---------------------------------------------*/
127static cpumask_t iommu_cpumask;
128
129static ssize_t _iommu_cpumask_show(struct device *dev,
130 struct device_attribute *attr,
131 char *buf)
132{
133 int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
134 buf[n++] = '\n';
135 buf[n] = '\0';
136 return n;
137}
138static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
139
140static struct attribute *iommu_cpumask_attrs[] = {
141 &dev_attr_cpumask.attr,
142 NULL,
143};
144
145static struct attribute_group amd_iommu_cpumask_group = {
146 .attrs = iommu_cpumask_attrs,
147};
148
149/*---------------------------------------------*/
150
151static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
152{
153 unsigned long flags;
154 int shift, bank, cntr, retval;
155 int max_banks = perf_iommu->max_banks;
156 int max_cntrs = perf_iommu->max_counters;
157
158 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
159
160 for (bank = 0, shift = 0; bank < max_banks; bank++) {
161 for (cntr = 0; cntr < max_cntrs; cntr++) {
162 shift = bank + (bank*3) + cntr;
163 if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
164 continue;
165 } else {
166 perf_iommu->cntr_assign_mask |= (1ULL<<shift);
167 retval = ((u16)((u16)bank<<8) | (u8)(cntr));
168 goto out;
169 }
170 }
171 }
172 retval = -ENOSPC;
173out:
174 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
175 return retval;
176}
177
178static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
179 u8 bank, u8 cntr)
180{
181 unsigned long flags;
182 int max_banks, max_cntrs;
183 int shift = 0;
184
185 max_banks = perf_iommu->max_banks;
186 max_cntrs = perf_iommu->max_counters;
187
188 if ((bank > max_banks) || (cntr > max_cntrs))
189 return -EINVAL;
190
191 shift = bank + cntr + (bank*3);
192
193 raw_spin_lock_irqsave(&perf_iommu->lock, flags);
194 perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
195 raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
196
197 return 0;
198}
199
200static int perf_iommu_event_init(struct perf_event *event)
201{
202 struct hw_perf_event *hwc = &event->hw;
203 struct perf_amd_iommu *perf_iommu;
204 u64 config, config1;
205
206 /* test the event attr type check for PMU enumeration */
207 if (event->attr.type != event->pmu->type)
208 return -ENOENT;
209
210 /*
211 * IOMMU counters are shared across all cores.
212 * Therefore, it does not support per-process mode.
213 * Also, it does not support event sampling mode.
214 */
215 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
216 return -EINVAL;
217
218 /* IOMMU counters do not have usr/os/guest/host bits */
219 if (event->attr.exclude_user || event->attr.exclude_kernel ||
220 event->attr.exclude_host || event->attr.exclude_guest)
221 return -EINVAL;
222
223 if (event->cpu < 0)
224 return -EINVAL;
225
226 perf_iommu = &__perf_iommu;
227
228 if (event->pmu != &perf_iommu->pmu)
229 return -ENOENT;
230
231 if (perf_iommu) {
232 config = event->attr.config;
233 config1 = event->attr.config1;
234 } else {
235 return -EINVAL;
236 }
237
238 /* integrate with iommu base devid (0000), assume one iommu */
239 perf_iommu->max_banks =
240 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
241 perf_iommu->max_counters =
242 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
243 if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
244 return -EINVAL;
245
246 /* update the hw_perf_event struct with the iommu config data */
247 hwc->config = config;
248 hwc->extra_reg.config = config1;
249
250 return 0;
251}
252
253static void perf_iommu_enable_event(struct perf_event *ev)
254{
255 u8 csource = _GET_CSOURCE(ev);
256 u16 devid = _GET_DEVID(ev);
257 u64 reg = 0ULL;
258
259 reg = csource;
260 amd_iommu_pc_get_set_reg_val(devid,
261 _GET_BANK(ev), _GET_CNTR(ev) ,
262 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
263
264 reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
265 if (reg)
266 reg |= (1UL << 31);
267 amd_iommu_pc_get_set_reg_val(devid,
268 _GET_BANK(ev), _GET_CNTR(ev) ,
269 IOMMU_PC_DEVID_MATCH_REG, &reg, true);
270
271 reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
272 if (reg)
273 reg |= (1UL << 31);
274 amd_iommu_pc_get_set_reg_val(devid,
275 _GET_BANK(ev), _GET_CNTR(ev) ,
276 IOMMU_PC_PASID_MATCH_REG, &reg, true);
277
278 reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
279 if (reg)
280 reg |= (1UL << 31);
281 amd_iommu_pc_get_set_reg_val(devid,
282 _GET_BANK(ev), _GET_CNTR(ev) ,
283 IOMMU_PC_DOMID_MATCH_REG, &reg, true);
284}
285
286static void perf_iommu_disable_event(struct perf_event *event)
287{
288 u64 reg = 0ULL;
289
290 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
291 _GET_BANK(event), _GET_CNTR(event),
292 IOMMU_PC_COUNTER_SRC_REG, &reg, true);
293}
294
295static void perf_iommu_start(struct perf_event *event, int flags)
296{
297 struct hw_perf_event *hwc = &event->hw;
298
299 pr_debug("perf: amd_iommu:perf_iommu_start\n");
300 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
301 return;
302
303 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
304 hwc->state = 0;
305
306 if (flags & PERF_EF_RELOAD) {
307 u64 prev_raw_count = local64_read(&hwc->prev_count);
308 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
309 _GET_BANK(event), _GET_CNTR(event),
310 IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
311 }
312
313 perf_iommu_enable_event(event);
314 perf_event_update_userpage(event);
315
316}
317
318static void perf_iommu_read(struct perf_event *event)
319{
320 u64 count = 0ULL;
321 u64 prev_raw_count = 0ULL;
322 u64 delta = 0ULL;
323 struct hw_perf_event *hwc = &event->hw;
324 pr_debug("perf: amd_iommu:perf_iommu_read\n");
325
326 amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
327 _GET_BANK(event), _GET_CNTR(event),
328 IOMMU_PC_COUNTER_REG, &count, false);
329
330 /* IOMMU pc counter register is only 48 bits */
331 count &= 0xFFFFFFFFFFFFULL;
332
333 prev_raw_count = local64_read(&hwc->prev_count);
334 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
335 count) != prev_raw_count)
336 return;
337
338 /* Handling 48-bit counter overflowing */
339 delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
340 delta >>= COUNTER_SHIFT;
341 local64_add(delta, &event->count);
342
343}
344
345static void perf_iommu_stop(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 u64 config;
349
350 pr_debug("perf: amd_iommu:perf_iommu_stop\n");
351
352 if (hwc->state & PERF_HES_UPTODATE)
353 return;
354
355 perf_iommu_disable_event(event);
356 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
357 hwc->state |= PERF_HES_STOPPED;
358
359 if (hwc->state & PERF_HES_UPTODATE)
360 return;
361
362 config = hwc->config;
363 perf_iommu_read(event);
364 hwc->state |= PERF_HES_UPTODATE;
365}
366
367static int perf_iommu_add(struct perf_event *event, int flags)
368{
369 int retval;
370 struct perf_amd_iommu *perf_iommu =
371 container_of(event->pmu, struct perf_amd_iommu, pmu);
372
373 pr_debug("perf: amd_iommu:perf_iommu_add\n");
374 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
375
376 /* request an iommu bank/counter */
377 retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
378 if (retval != -ENOSPC)
379 event->hw.extra_reg.reg = (u16)retval;
380 else
381 return retval;
382
383 if (flags & PERF_EF_START)
384 perf_iommu_start(event, PERF_EF_RELOAD);
385
386 return 0;
387}
388
389static void perf_iommu_del(struct perf_event *event, int flags)
390{
391 struct perf_amd_iommu *perf_iommu =
392 container_of(event->pmu, struct perf_amd_iommu, pmu);
393
394 pr_debug("perf: amd_iommu:perf_iommu_del\n");
395 perf_iommu_stop(event, PERF_EF_UPDATE);
396
397 /* clear the assigned iommu bank/counter */
398 clear_avail_iommu_bnk_cntr(perf_iommu,
399 _GET_BANK(event),
400 _GET_CNTR(event));
401
402 perf_event_update_userpage(event);
403}
404
405static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
406{
407 struct attribute **attrs;
408 struct attribute_group *attr_group;
409 int i = 0, j;
410
411 while (amd_iommu_v2_event_descs[i].attr.attr.name)
412 i++;
413
414 attr_group = kzalloc(sizeof(struct attribute *)
415 * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
416 if (!attr_group)
417 return -ENOMEM;
418
419 attrs = (struct attribute **)(attr_group + 1);
420 for (j = 0; j < i; j++)
421 attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
422
423 attr_group->name = "events";
424 attr_group->attrs = attrs;
425 perf_iommu->events_group = attr_group;
426
427 return 0;
428}
429
430static __init void amd_iommu_pc_exit(void)
431{
432 if (__perf_iommu.events_group != NULL) {
433 kfree(__perf_iommu.events_group);
434 __perf_iommu.events_group = NULL;
435 }
436}
437
438static __init int _init_perf_amd_iommu(
439 struct perf_amd_iommu *perf_iommu, char *name)
440{
441 int ret;
442
443 raw_spin_lock_init(&perf_iommu->lock);
444
445 /* Init format attributes */
446 perf_iommu->format_group = &amd_iommu_format_group;
447
448 /* Init cpumask attributes to only core 0 */
449 cpumask_set_cpu(0, &iommu_cpumask);
450 perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
451
452 /* Init events attributes */
453 if (_init_events_attrs(perf_iommu) != 0)
454 pr_err("perf: amd_iommu: Only support raw events.\n");
455
456 /* Init null attributes */
457 perf_iommu->null_group = NULL;
458 perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
459
460 ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
461 if (ret) {
462 pr_err("perf: amd_iommu: Failed to initialized.\n");
463 amd_iommu_pc_exit();
464 } else {
465 pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
466 amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
467 amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
468 }
469
470 return ret;
471}
472
473static struct perf_amd_iommu __perf_iommu = {
474 .pmu = {
475 .event_init = perf_iommu_event_init,
476 .add = perf_iommu_add,
477 .del = perf_iommu_del,
478 .start = perf_iommu_start,
479 .stop = perf_iommu_stop,
480 .read = perf_iommu_read,
481 },
482 .max_banks = 0x00,
483 .max_counters = 0x00,
484 .cntr_assign_mask = 0ULL,
485 .format_group = NULL,
486 .cpumask_group = NULL,
487 .events_group = NULL,
488 .null_group = NULL,
489};
490
491static __init int amd_iommu_pc_init(void)
492{
493 /* Make sure the IOMMU PC resource is available */
494 if (!amd_iommu_pc_supported()) {
495 pr_err("perf: amd_iommu PMU not installed. No support!\n");
496 return -ENODEV;
497 }
498
499 _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
500
501 return 0;
502}
503
504device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 000000000000..845d173278e3
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) 2013 Advanced Micro Devices, Inc.
3 *
4 * Author: Steven Kinney <Steven.Kinney@amd.com>
5 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#ifndef _PERF_EVENT_AMD_IOMMU_H_
13#define _PERF_EVENT_AMD_IOMMU_H_
14
15/* iommu pc mmio region register indexes */
16#define IOMMU_PC_COUNTER_REG 0x00
17#define IOMMU_PC_COUNTER_SRC_REG 0x08
18#define IOMMU_PC_PASID_MATCH_REG 0x10
19#define IOMMU_PC_DOMID_MATCH_REG 0x18
20#define IOMMU_PC_DEVID_MATCH_REG 0x20
21#define IOMMU_PC_COUNTER_REPORT_REG 0x28
22
23/* maximun specified bank/counters */
24#define PC_MAX_SPEC_BNKS 64
25#define PC_MAX_SPEC_CNTRS 16
26
27/* iommu pc reg masks*/
28#define IOMMU_BASE_DEVID 0x0000
29
30/* amd_iommu_init.c external support functions */
31extern bool amd_iommu_pc_supported(void);
32
33extern u8 amd_iommu_pc_get_max_banks(u16 devid);
34
35extern u8 amd_iommu_pc_get_max_counters(u16 devid);
36
37extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
38 u8 fxn, u64 *value, bool is_write);
39
40#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a9e22073bd56..fbc9210b45bc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/export.h> 14#include <linux/export.h>
15 15
16#include <asm/cpufeature.h>
16#include <asm/hardirq.h> 17#include <asm/hardirq.h>
17#include <asm/apic.h> 18#include <asm/apic.h>
18 19
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
190 NULL, 191 NULL,
191}; 192};
192 193
194static struct event_constraint intel_hsw_event_constraints[] = {
195 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
196 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
197 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
198 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
199 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
200 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
201 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
202 INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
203 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
204 INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
205 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
206 INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
207 EVENT_CONSTRAINT_END
208};
209
193static u64 intel_pmu_event_map(int hw_event) 210static u64 intel_pmu_event_map(int hw_event)
194{ 211{
195 return intel_perfmon_event_map[hw_event]; 212 return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
872 return true; 889 return true;
873 890
874 /* implicit branch sampling to correct PEBS skid */ 891 /* implicit branch sampling to correct PEBS skid */
875 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 892 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
893 x86_pmu.intel_cap.pebs_format < 2)
876 return true; 894 return true;
877 895
878 return false; 896 return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1167 cpuc = &__get_cpu_var(cpu_hw_events); 1185 cpuc = &__get_cpu_var(cpu_hw_events);
1168 1186
1169 /* 1187 /*
1170 * Some chipsets need to unmask the LVTPC in a particular spot 1188 * No known reason to not always do late ACK,
1171 * inside the nmi handler. As a result, the unmasking was pushed 1189 * but just in case do it opt-in.
1172 * into all the nmi handlers.
1173 *
1174 * This handler doesn't seem to have any issues with the unmasking
1175 * so it was left at the top.
1176 */ 1190 */
1177 apic_write(APIC_LVTPC, APIC_DM_NMI); 1191 if (!x86_pmu.late_ack)
1178 1192 apic_write(APIC_LVTPC, APIC_DM_NMI);
1179 intel_pmu_disable_all(); 1193 intel_pmu_disable_all();
1180 handled = intel_pmu_drain_bts_buffer(); 1194 handled = intel_pmu_drain_bts_buffer();
1181 status = intel_pmu_get_status(); 1195 status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1188again: 1202again:
1189 intel_pmu_ack_status(status); 1203 intel_pmu_ack_status(status);
1190 if (++loops > 100) { 1204 if (++loops > 100) {
1191 WARN_ONCE(1, "perfevents: irq loop stuck!\n"); 1205 static bool warned = false;
1192 perf_event_print_debug(); 1206 if (!warned) {
1207 WARN(1, "perfevents: irq loop stuck!\n");
1208 perf_event_print_debug();
1209 warned = true;
1210 }
1193 intel_pmu_reset(); 1211 intel_pmu_reset();
1194 goto done; 1212 goto done;
1195 } 1213 }
@@ -1235,6 +1253,13 @@ again:
1235 1253
1236done: 1254done:
1237 intel_pmu_enable_all(0); 1255 intel_pmu_enable_all(0);
1256 /*
1257 * Only unmask the NMI after the overflow counters
1258 * have been reset. This avoids spurious NMIs on
1259 * Haswell CPUs.
1260 */
1261 if (x86_pmu.late_ack)
1262 apic_write(APIC_LVTPC, APIC_DM_NMI);
1238 return handled; 1263 return handled;
1239} 1264}
1240 1265
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1425 if (x86_pmu.event_constraints) { 1450 if (x86_pmu.event_constraints) {
1426 for_each_event_constraint(c, x86_pmu.event_constraints) { 1451 for_each_event_constraint(c, x86_pmu.event_constraints) {
1427 if ((event->hw.config & c->cmask) == c->code) { 1452 if ((event->hw.config & c->cmask) == c->code) {
1428 /* hw.flags zeroed at initialization */
1429 event->hw.flags |= c->flags; 1453 event->hw.flags |= c->flags;
1430 return c; 1454 return c;
1431 } 1455 }
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1473static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1497static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1474 struct perf_event *event) 1498 struct perf_event *event)
1475{ 1499{
1476 event->hw.flags = 0;
1477 intel_put_shared_regs_event_constraints(cpuc, event); 1500 intel_put_shared_regs_event_constraints(cpuc, event);
1478} 1501}
1479 1502
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
1646 } 1669 }
1647} 1670}
1648 1671
1672static int hsw_hw_config(struct perf_event *event)
1673{
1674 int ret = intel_pmu_hw_config(event);
1675
1676 if (ret)
1677 return ret;
1678 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
1679 return 0;
1680 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
1681
1682 /*
1683 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
1684 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
1685 * this combination.
1686 */
1687 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
1688 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
1689 event->attr.precise_ip > 0))
1690 return -EOPNOTSUPP;
1691
1692 return 0;
1693}
1694
1695static struct event_constraint counter2_constraint =
1696 EVENT_CONSTRAINT(0, 0x4, 0);
1697
1698static struct event_constraint *
1699hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1700{
1701 struct event_constraint *c = intel_get_event_constraints(cpuc, event);
1702
1703 /* Handle special quirk on in_tx_checkpointed only in counter 2 */
1704 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
1705 if (c->idxmsk64 & (1U << 2))
1706 return &counter2_constraint;
1707 return &emptyconstraint;
1708 }
1709
1710 return c;
1711}
1712
1649PMU_FORMAT_ATTR(event, "config:0-7" ); 1713PMU_FORMAT_ATTR(event, "config:0-7" );
1650PMU_FORMAT_ATTR(umask, "config:8-15" ); 1714PMU_FORMAT_ATTR(umask, "config:8-15" );
1651PMU_FORMAT_ATTR(edge, "config:18" ); 1715PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
1653PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 1717PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
1654PMU_FORMAT_ATTR(inv, "config:23" ); 1718PMU_FORMAT_ATTR(inv, "config:23" );
1655PMU_FORMAT_ATTR(cmask, "config:24-31" ); 1719PMU_FORMAT_ATTR(cmask, "config:24-31" );
1720PMU_FORMAT_ATTR(in_tx, "config:32");
1721PMU_FORMAT_ATTR(in_tx_cp, "config:33");
1656 1722
1657static struct attribute *intel_arch_formats_attr[] = { 1723static struct attribute *intel_arch_formats_attr[] = {
1658 &format_attr_event.attr, 1724 &format_attr_event.attr,
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
1807 &format_attr_any.attr, 1873 &format_attr_any.attr,
1808 &format_attr_inv.attr, 1874 &format_attr_inv.attr,
1809 &format_attr_cmask.attr, 1875 &format_attr_cmask.attr,
1876 &format_attr_in_tx.attr,
1877 &format_attr_in_tx_cp.attr,
1810 1878
1811 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 1879 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
1812 &format_attr_ldlat.attr, /* PEBS load latency */ 1880 &format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
1966 } 2034 }
1967} 2035}
1968 2036
2037EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2038EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2039
2040static struct attribute *hsw_events_attrs[] = {
2041 EVENT_PTR(mem_ld_hsw),
2042 EVENT_PTR(mem_st_hsw),
2043 NULL
2044};
2045
1969__init int intel_pmu_init(void) 2046__init int intel_pmu_init(void)
1970{ 2047{
1971 union cpuid10_edx edx; 2048 union cpuid10_edx edx;
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
2189 break; 2266 break;
2190 2267
2191 2268
2269 case 60: /* Haswell Client */
2270 case 70:
2271 case 71:
2272 case 63:
2273 x86_pmu.late_ack = true;
2274 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2275 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2276
2277 intel_pmu_lbr_init_snb();
2278
2279 x86_pmu.event_constraints = intel_hsw_event_constraints;
2280 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2281 x86_pmu.extra_regs = intel_snb_extra_regs;
2282 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2283 /* all extra regs are per-cpu when HT is on */
2284 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2285 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2286
2287 x86_pmu.hw_config = hsw_hw_config;
2288 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2289 x86_pmu.cpu_events = hsw_events_attrs;
2290 pr_cont("Haswell events, ");
2291 break;
2292
2192 default: 2293 default:
2193 switch (x86_pmu.version) { 2294 switch (x86_pmu.version) {
2194 case 1: 2295 case 1:
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
2227 * counter, so do not extend mask to generic counters 2328 * counter, so do not extend mask to generic counters
2228 */ 2329 */
2229 for_each_event_constraint(c, x86_pmu.event_constraints) { 2330 for_each_event_constraint(c, x86_pmu.event_constraints) {
2230 if (c->cmask != X86_RAW_EVENT_MASK 2331 if (c->cmask != FIXED_EVENT_FLAGS
2231 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { 2332 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2232 continue; 2333 continue;
2233 } 2334 }
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
2237 } 2338 }
2238 } 2339 }
2239 2340
2341 /* Support full width counters using alternative MSR range */
2342 if (x86_pmu.intel_cap.full_width_write) {
2343 x86_pmu.max_period = x86_pmu.cntval_mask;
2344 x86_pmu.perfctr = MSR_IA32_PMC0;
2345 pr_cont("full-width counters, ");
2346 }
2347
2240 return 0; 2348 return 0;
2241} 2349}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f687052..3065c57a63c1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
107 return val; 107 return val;
108} 108}
109 109
110static u64 precise_store_data_hsw(u64 status)
111{
112 union perf_mem_data_src dse;
113
114 dse.val = 0;
115 dse.mem_op = PERF_MEM_OP_STORE;
116 dse.mem_lvl = PERF_MEM_LVL_NA;
117 if (status & 1)
118 dse.mem_lvl = PERF_MEM_LVL_L1;
119 /* Nothing else supported. Sorry. */
120 return dse.val;
121}
122
110static u64 load_latency_data(u64 status) 123static u64 load_latency_data(u64 status)
111{ 124{
112 union intel_x86_pebs_dse dse; 125 union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
165 u64 status, dla, dse, lat; 178 u64 status, dla, dse, lat;
166}; 179};
167 180
181/*
182 * Same as pebs_record_nhm, with two additional fields.
183 */
184struct pebs_record_hsw {
185 struct pebs_record_nhm nhm;
186 /*
187 * Real IP of the event. In the Intel documentation this
188 * is called eventingrip.
189 */
190 u64 real_ip;
191 /*
192 * TSX tuning information field: abort cycles and abort flags.
193 */
194 u64 tsx_tuning;
195};
196
168void init_debug_store_on_cpu(int cpu) 197void init_debug_store_on_cpu(int cpu)
169{ 198{
170 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 199 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
548 EVENT_CONSTRAINT_END 577 EVENT_CONSTRAINT_END
549}; 578};
550 579
580struct event_constraint intel_hsw_pebs_event_constraints[] = {
581 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
582 INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
583 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
584 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
585 INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
586 INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
587 INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
588 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
589 /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
590 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
591 /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
592 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
593 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
594 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
595 /* MEM_UOPS_RETIRED.SPLIT_STORES */
596 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
597 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
598 INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
599 INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
600 INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
601 INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
602 /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
603 INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
604 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
605 INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
606 /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
607 INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
608 /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
609 INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
610 INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
611 INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
612
613 EVENT_CONSTRAINT_END
614};
615
551struct event_constraint *intel_pebs_constraints(struct perf_event *event) 616struct event_constraint *intel_pebs_constraints(struct perf_event *event)
552{ 617{
553 struct event_constraint *c; 618 struct event_constraint *c;
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
588 struct hw_perf_event *hwc = &event->hw; 653 struct hw_perf_event *hwc = &event->hw;
589 654
590 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 655 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
656
657 if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
658 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
659 else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
660 cpuc->pebs_enabled &= ~(1ULL << 63);
661
591 if (cpuc->enabled) 662 if (cpuc->enabled)
592 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 663 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
593 664
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
697 */ 768 */
698 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 769 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
699 struct pebs_record_nhm *pebs = __pebs; 770 struct pebs_record_nhm *pebs = __pebs;
771 struct pebs_record_hsw *pebs_hsw = __pebs;
700 struct perf_sample_data data; 772 struct perf_sample_data data;
701 struct pt_regs regs; 773 struct pt_regs regs;
702 u64 sample_type; 774 u64 sample_type;
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
706 return; 778 return;
707 779
708 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; 780 fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
709 fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; 781 fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
782 PERF_X86_EVENT_PEBS_ST_HSW);
710 783
711 perf_sample_data_init(&data, 0, event->hw.last_period); 784 perf_sample_data_init(&data, 0, event->hw.last_period);
712 785
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
717 * if PEBS-LL or PreciseStore 790 * if PEBS-LL or PreciseStore
718 */ 791 */
719 if (fll || fst) { 792 if (fll || fst) {
720 if (sample_type & PERF_SAMPLE_ADDR)
721 data.addr = pebs->dla;
722
723 /* 793 /*
724 * Use latency for weight (only avail with PEBS-LL) 794 * Use latency for weight (only avail with PEBS-LL)
725 */ 795 */
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
732 if (sample_type & PERF_SAMPLE_DATA_SRC) { 802 if (sample_type & PERF_SAMPLE_DATA_SRC) {
733 if (fll) 803 if (fll)
734 data.data_src.val = load_latency_data(pebs->dse); 804 data.data_src.val = load_latency_data(pebs->dse);
805 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
806 data.data_src.val =
807 precise_store_data_hsw(pebs->dse);
735 else 808 else
736 data.data_src.val = precise_store_data(pebs->dse); 809 data.data_src.val = precise_store_data(pebs->dse);
737 } 810 }
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
753 regs.bp = pebs->bp; 826 regs.bp = pebs->bp;
754 regs.sp = pebs->sp; 827 regs.sp = pebs->sp;
755 828
756 if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 829 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
830 regs.ip = pebs_hsw->real_ip;
831 regs.flags |= PERF_EFLAGS_EXACT;
832 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
757 regs.flags |= PERF_EFLAGS_EXACT; 833 regs.flags |= PERF_EFLAGS_EXACT;
758 else 834 else
759 regs.flags &= ~PERF_EFLAGS_EXACT; 835 regs.flags &= ~PERF_EFLAGS_EXACT;
760 836
837 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
838 x86_pmu.intel_cap.pebs_format >= 1)
839 data.addr = pebs->dla;
840
761 if (has_branch_stack(event)) 841 if (has_branch_stack(event))
762 data.br_stack = &cpuc->lbr_stack; 842 data.br_stack = &cpuc->lbr_stack;
763 843
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
806 __intel_pmu_pebs_event(event, iregs, at); 886 __intel_pmu_pebs_event(event, iregs, at);
807} 887}
808 888
809static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 889static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
890 void *top)
810{ 891{
811 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 892 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
812 struct debug_store *ds = cpuc->ds; 893 struct debug_store *ds = cpuc->ds;
813 struct pebs_record_nhm *at, *top;
814 struct perf_event *event = NULL; 894 struct perf_event *event = NULL;
815 u64 status = 0; 895 u64 status = 0;
816 int bit, n; 896 int bit;
817
818 if (!x86_pmu.pebs_active)
819 return;
820
821 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
822 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
823 897
824 ds->pebs_index = ds->pebs_buffer_base; 898 ds->pebs_index = ds->pebs_buffer_base;
825 899
826 n = top - at; 900 for (; at < top; at += x86_pmu.pebs_record_size) {
827 if (n <= 0) 901 struct pebs_record_nhm *p = at;
828 return;
829
830 /*
831 * Should not happen, we program the threshold at 1 and do not
832 * set a reset value.
833 */
834 WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
835 902
836 for ( ; at < top; at++) { 903 for_each_set_bit(bit, (unsigned long *)&p->status,
837 for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { 904 x86_pmu.max_pebs_events) {
838 event = cpuc->events[bit]; 905 event = cpuc->events[bit];
839 if (!test_bit(bit, cpuc->active_mask)) 906 if (!test_bit(bit, cpuc->active_mask))
840 continue; 907 continue;
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
857 } 924 }
858} 925}
859 926
927static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
928{
929 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
930 struct debug_store *ds = cpuc->ds;
931 struct pebs_record_nhm *at, *top;
932 int n;
933
934 if (!x86_pmu.pebs_active)
935 return;
936
937 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
938 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
939
940 ds->pebs_index = ds->pebs_buffer_base;
941
942 n = top - at;
943 if (n <= 0)
944 return;
945
946 /*
947 * Should not happen, we program the threshold at 1 and do not
948 * set a reset value.
949 */
950 WARN_ONCE(n > x86_pmu.max_pebs_events,
951 "Unexpected number of pebs records %d\n", n);
952
953 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
954}
955
956static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
957{
958 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
959 struct debug_store *ds = cpuc->ds;
960 struct pebs_record_hsw *at, *top;
961 int n;
962
963 if (!x86_pmu.pebs_active)
964 return;
965
966 at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
967 top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
968
969 n = top - at;
970 if (n <= 0)
971 return;
972 /*
973 * Should not happen, we program the threshold at 1 and do not
974 * set a reset value.
975 */
976 WARN_ONCE(n > x86_pmu.max_pebs_events,
977 "Unexpected number of pebs records %d\n", n);
978
979 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
980}
981
860/* 982/*
861 * BTS, PEBS probe and setup 983 * BTS, PEBS probe and setup
862 */ 984 */
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
888 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1010 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
889 break; 1011 break;
890 1012
1013 case 2:
1014 pr_cont("PEBS fmt2%c, ", pebs_type);
1015 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1016 x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
1017 break;
1018
891 default: 1019 default:
892 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 1020 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
893 x86_pmu.pebs = 0; 1021 x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353c939b..d5be06a5005e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
12 LBR_FORMAT_LIP = 0x01, 12 LBR_FORMAT_LIP = 0x01,
13 LBR_FORMAT_EIP = 0x02, 13 LBR_FORMAT_EIP = 0x02,
14 LBR_FORMAT_EIP_FLAGS = 0x03, 14 LBR_FORMAT_EIP_FLAGS = 0x03,
15 LBR_FORMAT_EIP_FLAGS2 = 0x04,
16 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
17};
18
19static enum {
20 LBR_EIP_FLAGS = 1,
21 LBR_TSX = 2,
22} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
23 [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
24 [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
15}; 25};
16 26
17/* 27/*
@@ -56,6 +66,8 @@ enum {
56 LBR_FAR) 66 LBR_FAR)
57 67
58#define LBR_FROM_FLAG_MISPRED (1ULL << 63) 68#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
69#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
70#define LBR_FROM_FLAG_ABORT (1ULL << 61)
59 71
60#define for_each_branch_sample_type(x) \ 72#define for_each_branch_sample_type(x) \
61 for ((x) = PERF_SAMPLE_BRANCH_USER; \ 73 for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
81 X86_BR_JMP = 1 << 9, /* jump */ 93 X86_BR_JMP = 1 << 9, /* jump */
82 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ 94 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
83 X86_BR_IND_CALL = 1 << 11,/* indirect calls */ 95 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
96 X86_BR_ABORT = 1 << 12,/* transaction abort */
97 X86_BR_IN_TX = 1 << 13,/* in transaction */
98 X86_BR_NO_TX = 1 << 14,/* not in transaction */
84}; 99};
85 100
86#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 101#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
102#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
87 103
88#define X86_BR_ANY \ 104#define X86_BR_ANY \
89 (X86_BR_CALL |\ 105 (X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
95 X86_BR_JCC |\ 111 X86_BR_JCC |\
96 X86_BR_JMP |\ 112 X86_BR_JMP |\
97 X86_BR_IRQ |\ 113 X86_BR_IRQ |\
114 X86_BR_ABORT |\
98 X86_BR_IND_CALL) 115 X86_BR_IND_CALL)
99 116
100#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 117#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
270 287
271 for (i = 0; i < x86_pmu.lbr_nr; i++) { 288 for (i = 0; i < x86_pmu.lbr_nr; i++) {
272 unsigned long lbr_idx = (tos - i) & mask; 289 unsigned long lbr_idx = (tos - i) & mask;
273 u64 from, to, mis = 0, pred = 0; 290 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
291 int skip = 0;
292 int lbr_flags = lbr_desc[lbr_format];
274 293
275 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 294 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
276 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 295 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
277 296
278 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 297 if (lbr_flags & LBR_EIP_FLAGS) {
279 mis = !!(from & LBR_FROM_FLAG_MISPRED); 298 mis = !!(from & LBR_FROM_FLAG_MISPRED);
280 pred = !mis; 299 pred = !mis;
281 from = (u64)((((s64)from) << 1) >> 1); 300 skip = 1;
301 }
302 if (lbr_flags & LBR_TSX) {
303 in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
304 abort = !!(from & LBR_FROM_FLAG_ABORT);
305 skip = 3;
282 } 306 }
307 from = (u64)((((s64)from) << skip) >> skip);
283 308
284 cpuc->lbr_entries[i].from = from; 309 cpuc->lbr_entries[i].from = from;
285 cpuc->lbr_entries[i].to = to; 310 cpuc->lbr_entries[i].to = to;
286 cpuc->lbr_entries[i].mispred = mis; 311 cpuc->lbr_entries[i].mispred = mis;
287 cpuc->lbr_entries[i].predicted = pred; 312 cpuc->lbr_entries[i].predicted = pred;
313 cpuc->lbr_entries[i].in_tx = in_tx;
314 cpuc->lbr_entries[i].abort = abort;
288 cpuc->lbr_entries[i].reserved = 0; 315 cpuc->lbr_entries[i].reserved = 0;
289 } 316 }
290 cpuc->lbr_stack.nr = i; 317 cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
310 * - in case there is no HW filter 337 * - in case there is no HW filter
311 * - in case the HW filter has errata or limitations 338 * - in case the HW filter has errata or limitations
312 */ 339 */
313static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 340static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
314{ 341{
315 u64 br_type = event->attr.branch_sample_type; 342 u64 br_type = event->attr.branch_sample_type;
316 int mask = 0; 343 int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
318 if (br_type & PERF_SAMPLE_BRANCH_USER) 345 if (br_type & PERF_SAMPLE_BRANCH_USER)
319 mask |= X86_BR_USER; 346 mask |= X86_BR_USER;
320 347
321 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { 348 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
322 if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
323 return -EACCES;
324 mask |= X86_BR_KERNEL; 349 mask |= X86_BR_KERNEL;
325 }
326 350
327 /* we ignore BRANCH_HV here */ 351 /* we ignore BRANCH_HV here */
328 352
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
337 361
338 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 362 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
339 mask |= X86_BR_IND_CALL; 363 mask |= X86_BR_IND_CALL;
364
365 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
366 mask |= X86_BR_ABORT;
367
368 if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
369 mask |= X86_BR_IN_TX;
370
371 if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
372 mask |= X86_BR_NO_TX;
373
340 /* 374 /*
341 * stash actual user request into reg, it may 375 * stash actual user request into reg, it may
342 * be used by fixup code for some CPU 376 * be used by fixup code for some CPU
343 */ 377 */
344 event->hw.branch_reg.reg = mask; 378 event->hw.branch_reg.reg = mask;
345
346 return 0;
347} 379}
348 380
349/* 381/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
391 /* 423 /*
392 * setup SW LBR filter 424 * setup SW LBR filter
393 */ 425 */
394 ret = intel_pmu_setup_sw_lbr_filter(event); 426 intel_pmu_setup_sw_lbr_filter(event);
395 if (ret)
396 return ret;
397 427
398 /* 428 /*
399 * setup HW LBR filter, if any 429 * setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
415 * decoded (e.g., text page not present), then X86_BR_NONE is 445 * decoded (e.g., text page not present), then X86_BR_NONE is
416 * returned. 446 * returned.
417 */ 447 */
418static int branch_type(unsigned long from, unsigned long to) 448static int branch_type(unsigned long from, unsigned long to, int abort)
419{ 449{
420 struct insn insn; 450 struct insn insn;
421 void *addr; 451 void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
435 if (from == 0 || to == 0) 465 if (from == 0 || to == 0)
436 return X86_BR_NONE; 466 return X86_BR_NONE;
437 467
468 if (abort)
469 return X86_BR_ABORT | to_plm;
470
438 if (from_plm == X86_BR_USER) { 471 if (from_plm == X86_BR_USER) {
439 /* 472 /*
440 * can happen if measuring at the user level only 473 * can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
581 from = cpuc->lbr_entries[i].from; 614 from = cpuc->lbr_entries[i].from;
582 to = cpuc->lbr_entries[i].to; 615 to = cpuc->lbr_entries[i].to;
583 616
584 type = branch_type(from, to); 617 type = branch_type(from, to, cpuc->lbr_entries[i].abort);
618 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
619 if (cpuc->lbr_entries[i].in_tx)
620 type |= X86_BR_IN_TX;
621 else
622 type |= X86_BR_NO_TX;
623 }
585 624
586 /* if type does not correspond, then discard */ 625 /* if type does not correspond, then discard */
587 if (type == X86_BR_NONE || (br_sel & type) != type) { 626 if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2af538..9dd99751ccf9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
536 if (!uncore_box_is_fake(box)) 536 if (!uncore_box_is_fake(box))
537 reg1->alloc |= alloc; 537 reg1->alloc |= alloc;
538 538
539 return 0; 539 return NULL;
540fail: 540fail:
541 for (; i >= 0; i--) { 541 for (; i >= 0; i--) {
542 if (alloc & (0x1 << i)) 542 if (alloc & (0x1 << i))
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
644 (!uncore_box_is_fake(box) && reg1->alloc)) 644 (!uncore_box_is_fake(box) && reg1->alloc))
645 return NULL; 645 return NULL;
646again: 646again:
647 mask = 0xff << (idx * 8); 647 mask = 0xffULL << (idx * 8);
648 raw_spin_lock_irqsave(&er->lock, flags); 648 raw_spin_lock_irqsave(&er->lock, flags);
649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || 649 if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
650 !((config1 ^ er->config) & mask)) { 650 !((config1 ^ er->config) & mask)) {
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
1923{ 1923{
1924 struct hw_perf_event *hwc = &event->hw; 1924 struct hw_perf_event *hwc = &event->hw;
1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg; 1925 struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
1926 int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); 1926 u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
1927 u64 config = reg1->config; 1927 u64 config = reg1->config;
1928 1928
1929 /* get the non-shared control bits and shift them */ 1929 /* get the non-shared control bits and shift them */
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 2723static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
2724{ 2724{
2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 2725 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
2726 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; 2726 struct event_constraint *c;
2727 int i, wmin, wmax, ret = 0; 2727 int i, wmin, wmax, ret = 0;
2728 struct hw_perf_event *hwc; 2728 struct hw_perf_event *hwc;
2729 2729
2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 2730 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
2731 2731
2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 2732 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
2733 hwc = &box->event_list[i]->hw;
2733 c = uncore_get_event_constraint(box, box->event_list[i]); 2734 c = uncore_get_event_constraint(box, box->event_list[i]);
2734 constraints[i] = c; 2735 hwc->constraint = c;
2735 wmin = min(wmin, c->weight); 2736 wmin = min(wmin, c->weight);
2736 wmax = max(wmax, c->weight); 2737 wmax = max(wmax, c->weight);
2737 } 2738 }
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2739 /* fastpath, try to reuse previous register */ 2740 /* fastpath, try to reuse previous register */
2740 for (i = 0; i < n; i++) { 2741 for (i = 0; i < n; i++) {
2741 hwc = &box->event_list[i]->hw; 2742 hwc = &box->event_list[i]->hw;
2742 c = constraints[i]; 2743 c = hwc->constraint;
2743 2744
2744 /* never assigned */ 2745 /* never assigned */
2745 if (hwc->idx == -1) 2746 if (hwc->idx == -1)
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
2759 } 2760 }
2760 /* slow path */ 2761 /* slow path */
2761 if (i != n) 2762 if (i != n)
2762 ret = perf_assign_events(constraints, n, wmin, wmax, assign); 2763 ret = perf_assign_events(box->event_list, n,
2764 wmin, wmax, assign);
2763 2765
2764 if (!assign || ret) { 2766 if (!assign || ret) {
2765 for (i = 0; i < n; i++) 2767 for (i = 0; i < n; i++)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f9528917f6e8..47b3d00c9d89 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -337,10 +337,10 @@
337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) 337 NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
338 338
339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) 339#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) 340#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
341 341
342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) 342#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) 343#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
344 344
345/* 345/*
346 * use the 9~13 bits to select event If the 7th bit is not set, 346 * use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 60308053fdb2..0920212e6159 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
14#include <linux/kprobes.h> 14#include <linux/kprobes.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/nmi.h> 16#include <linux/nmi.h>
17#include <linux/debugfs.h>
17#include <linux/delay.h> 18#include <linux/delay.h>
18#include <linux/hardirq.h> 19#include <linux/hardirq.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
@@ -29,6 +30,9 @@
29#include <asm/nmi.h> 30#include <asm/nmi.h>
30#include <asm/x86_init.h> 31#include <asm/x86_init.h>
31 32
33#define CREATE_TRACE_POINTS
34#include <trace/events/nmi.h>
35
32struct nmi_desc { 36struct nmi_desc {
33 spinlock_t lock; 37 spinlock_t lock;
34 struct list_head head; 38 struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
82 86
83#define nmi_to_desc(type) (&nmi_desc[type]) 87#define nmi_to_desc(type) (&nmi_desc[type])
84 88
89static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
90static int __init nmi_warning_debugfs(void)
91{
92 debugfs_create_u64("nmi_longest_ns", 0644,
93 arch_debugfs_dir, &nmi_longest_ns);
94 return 0;
95}
96fs_initcall(nmi_warning_debugfs);
97
85static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 98static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
86{ 99{
87 struct nmi_desc *desc = nmi_to_desc(type); 100 struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
96 * can be latched at any given time. Walk the whole list 109 * can be latched at any given time. Walk the whole list
97 * to handle those situations. 110 * to handle those situations.
98 */ 111 */
99 list_for_each_entry_rcu(a, &desc->head, list) 112 list_for_each_entry_rcu(a, &desc->head, list) {
100 handled += a->handler(type, regs); 113 u64 before, delta, whole_msecs;
114 int decimal_msecs, thishandled;
115
116 before = local_clock();
117 thishandled = a->handler(type, regs);
118 handled += thishandled;
119 delta = local_clock() - before;
120 trace_nmi_handler(a->handler, (int)delta, thishandled);
121
122 if (delta < nmi_longest_ns)
123 continue;
124
125 nmi_longest_ns = delta;
126 whole_msecs = do_div(delta, (1000 * 1000));
127 decimal_msecs = do_div(delta, 1000) % 1000;
128 printk_ratelimited(KERN_INFO
129 "INFO: NMI handler (%ps) took too long to run: "
130 "%lld.%03d msecs\n", a->handler, whole_msecs,
131 decimal_msecs);
132 }
101 133
102 rcu_read_unlock(); 134 rcu_read_unlock();
103 135
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 69562992e457..cf913587d4dd 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
43 43
44#include <asm/sigframe.h> 44#include <asm/sigframe.h>
45 45
46#ifdef CONFIG_X86_32
47# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
48#else
49# define FIX_EFLAGS __FIX_EFLAGS
50#endif
51
52#define COPY(x) do { \ 46#define COPY(x) do { \
53 get_user_ex(regs->x, &sc->x); \ 47 get_user_ex(regs->x, &sc->x); \
54} while (0) 48} while (0)
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
668 if (!failed) { 662 if (!failed) {
669 /* 663 /*
670 * Clear the direction flag as per the ABI for function entry. 664 * Clear the direction flag as per the ABI for function entry.
671 */ 665 *
672 regs->flags &= ~X86_EFLAGS_DF; 666 * Clear RF when entering the signal handler, because
673 /* 667 * it might disable possible debug exception from the
668 * signal handler.
669 *
674 * Clear TF when entering the signal handler, but 670 * Clear TF when entering the signal handler, but
675 * notify any tracer that was single-stepping it. 671 * notify any tracer that was single-stepping it.
676 * The tracer may want to single-step inside the 672 * The tracer may want to single-step inside the
677 * handler too. 673 * handler too.
678 */ 674 */
679 regs->flags &= ~X86_EFLAGS_TF; 675 regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
680 } 676 }
681 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); 677 signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
682} 678}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index bf51abb78dee..7acbf351e9af 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -99,7 +99,7 @@ struct ivhd_header {
99 u64 mmio_phys; 99 u64 mmio_phys;
100 u16 pci_seg; 100 u16 pci_seg;
101 u16 info; 101 u16 info;
102 u32 reserved; 102 u32 efr;
103} __attribute__((packed)); 103} __attribute__((packed));
104 104
105/* 105/*
@@ -154,6 +154,7 @@ bool amd_iommu_iotlb_sup __read_mostly = true;
154u32 amd_iommu_max_pasids __read_mostly = ~0; 154u32 amd_iommu_max_pasids __read_mostly = ~0;
155 155
156bool amd_iommu_v2_present __read_mostly; 156bool amd_iommu_v2_present __read_mostly;
157bool amd_iommu_pc_present __read_mostly;
157 158
158bool amd_iommu_force_isolation __read_mostly; 159bool amd_iommu_force_isolation __read_mostly;
159 160
@@ -369,23 +370,23 @@ static void iommu_disable(struct amd_iommu *iommu)
369 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 370 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
370 * the system has one. 371 * the system has one.
371 */ 372 */
372static u8 __iomem * __init iommu_map_mmio_space(u64 address) 373static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
373{ 374{
374 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { 375 if (!request_mem_region(address, end, "amd_iommu")) {
375 pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", 376 pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
376 address); 377 address, end);
377 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); 378 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
378 return NULL; 379 return NULL;
379 } 380 }
380 381
381 return (u8 __iomem *)ioremap_nocache(address, MMIO_REGION_LENGTH); 382 return (u8 __iomem *)ioremap_nocache(address, end);
382} 383}
383 384
384static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 385static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
385{ 386{
386 if (iommu->mmio_base) 387 if (iommu->mmio_base)
387 iounmap(iommu->mmio_base); 388 iounmap(iommu->mmio_base);
388 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); 389 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
389} 390}
390 391
391/**************************************************************************** 392/****************************************************************************
@@ -1085,7 +1086,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1085 iommu->cap_ptr = h->cap_ptr; 1086 iommu->cap_ptr = h->cap_ptr;
1086 iommu->pci_seg = h->pci_seg; 1087 iommu->pci_seg = h->pci_seg;
1087 iommu->mmio_phys = h->mmio_phys; 1088 iommu->mmio_phys = h->mmio_phys;
1088 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); 1089
1090 /* Check if IVHD EFR contains proper max banks/counters */
1091 if ((h->efr != 0) &&
1092 ((h->efr & (0xF << 13)) != 0) &&
1093 ((h->efr & (0x3F << 17)) != 0)) {
1094 iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1095 } else {
1096 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1097 }
1098
1099 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1100 iommu->mmio_phys_end);
1089 if (!iommu->mmio_base) 1101 if (!iommu->mmio_base)
1090 return -ENOMEM; 1102 return -ENOMEM;
1091 1103
@@ -1160,6 +1172,33 @@ static int __init init_iommu_all(struct acpi_table_header *table)
1160 return 0; 1172 return 0;
1161} 1173}
1162 1174
1175
1176static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1177{
1178 u64 val = 0xabcd, val2 = 0;
1179
1180 if (!iommu_feature(iommu, FEATURE_PC))
1181 return;
1182
1183 amd_iommu_pc_present = true;
1184
1185 /* Check if the performance counters can be written to */
1186 if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
1187 (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
1188 (val != val2)) {
1189 pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
1190 amd_iommu_pc_present = false;
1191 return;
1192 }
1193
1194 pr_info("AMD-Vi: IOMMU performance counters supported\n");
1195
1196 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1197 iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1198 iommu->max_counters = (u8) ((val >> 7) & 0xf);
1199}
1200
1201
1163static int iommu_init_pci(struct amd_iommu *iommu) 1202static int iommu_init_pci(struct amd_iommu *iommu)
1164{ 1203{
1165 int cap_ptr = iommu->cap_ptr; 1204 int cap_ptr = iommu->cap_ptr;
@@ -1226,6 +1265,8 @@ static int iommu_init_pci(struct amd_iommu *iommu)
1226 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) 1265 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1227 amd_iommu_np_cache = true; 1266 amd_iommu_np_cache = true;
1228 1267
1268 init_iommu_perf_ctr(iommu);
1269
1229 if (is_rd890_iommu(iommu->dev)) { 1270 if (is_rd890_iommu(iommu->dev)) {
1230 int i, j; 1271 int i, j;
1231 1272
@@ -1278,7 +1319,7 @@ static void print_iommu_info(void)
1278 if (iommu_feature(iommu, (1ULL << i))) 1319 if (iommu_feature(iommu, (1ULL << i)))
1279 pr_cont(" %s", feat_str[i]); 1320 pr_cont(" %s", feat_str[i]);
1280 } 1321 }
1281 pr_cont("\n"); 1322 pr_cont("\n");
1282 } 1323 }
1283 } 1324 }
1284 if (irq_remapping_enabled) 1325 if (irq_remapping_enabled)
@@ -2232,3 +2273,84 @@ bool amd_iommu_v2_supported(void)
2232 return amd_iommu_v2_present; 2273 return amd_iommu_v2_present;
2233} 2274}
2234EXPORT_SYMBOL(amd_iommu_v2_supported); 2275EXPORT_SYMBOL(amd_iommu_v2_supported);
2276
2277/****************************************************************************
2278 *
2279 * IOMMU EFR Performance Counter support functionality. This code allows
2280 * access to the IOMMU PC functionality.
2281 *
2282 ****************************************************************************/
2283
2284u8 amd_iommu_pc_get_max_banks(u16 devid)
2285{
2286 struct amd_iommu *iommu;
2287 u8 ret = 0;
2288
2289 /* locate the iommu governing the devid */
2290 iommu = amd_iommu_rlookup_table[devid];
2291 if (iommu)
2292 ret = iommu->max_banks;
2293
2294 return ret;
2295}
2296EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
2297
2298bool amd_iommu_pc_supported(void)
2299{
2300 return amd_iommu_pc_present;
2301}
2302EXPORT_SYMBOL(amd_iommu_pc_supported);
2303
2304u8 amd_iommu_pc_get_max_counters(u16 devid)
2305{
2306 struct amd_iommu *iommu;
2307 u8 ret = 0;
2308
2309 /* locate the iommu governing the devid */
2310 iommu = amd_iommu_rlookup_table[devid];
2311 if (iommu)
2312 ret = iommu->max_counters;
2313
2314 return ret;
2315}
2316EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
2317
2318int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
2319 u64 *value, bool is_write)
2320{
2321 struct amd_iommu *iommu;
2322 u32 offset;
2323 u32 max_offset_lim;
2324
2325 /* Make sure the IOMMU PC resource is available */
2326 if (!amd_iommu_pc_present)
2327 return -ENODEV;
2328
2329 /* Locate the iommu associated with the device ID */
2330 iommu = amd_iommu_rlookup_table[devid];
2331
2332 /* Check for valid iommu and pc register indexing */
2333 if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
2334 return -ENODEV;
2335
2336 offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
2337
2338 /* Limit the offset to the hw defined mmio region aperture */
2339 max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
2340 (iommu->max_counters << 8) | 0x28);
2341 if ((offset < MMIO_CNTR_REG_OFFSET) ||
2342 (offset > max_offset_lim))
2343 return -EINVAL;
2344
2345 if (is_write) {
2346 writel((u32)*value, iommu->mmio_base + offset);
2347 writel((*value >> 32), iommu->mmio_base + offset + 4);
2348 } else {
2349 *value = readl(iommu->mmio_base + offset + 4);
2350 *value <<= 32;
2351 *value = readl(iommu->mmio_base + offset);
2352 }
2353
2354 return 0;
2355}
2356EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index c294961bdd36..95ed6deae47f 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -56,6 +56,13 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
56extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); 56extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
57extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); 57extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
58 58
59/* IOMMU Performance Counter functions */
60extern bool amd_iommu_pc_supported(void);
61extern u8 amd_iommu_pc_get_max_banks(u16 devid);
62extern u8 amd_iommu_pc_get_max_counters(u16 devid);
63extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
64 u64 *value, bool is_write);
65
59#define PPR_SUCCESS 0x0 66#define PPR_SUCCESS 0x0
60#define PPR_INVALID 0x1 67#define PPR_INVALID 0x1
61#define PPR_FAILURE 0xf 68#define PPR_FAILURE 0xf
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0285a215df16..e400fbe411de 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -38,9 +38,6 @@
38#define ALIAS_TABLE_ENTRY_SIZE 2 38#define ALIAS_TABLE_ENTRY_SIZE 2
39#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) 39#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
40 40
41/* Length of the MMIO region for the AMD IOMMU */
42#define MMIO_REGION_LENGTH 0x4000
43
44/* Capability offsets used by the driver */ 41/* Capability offsets used by the driver */
45#define MMIO_CAP_HDR_OFFSET 0x00 42#define MMIO_CAP_HDR_OFFSET 0x00
46#define MMIO_RANGE_OFFSET 0x0c 43#define MMIO_RANGE_OFFSET 0x0c
@@ -78,6 +75,10 @@
78#define MMIO_STATUS_OFFSET 0x2020 75#define MMIO_STATUS_OFFSET 0x2020
79#define MMIO_PPR_HEAD_OFFSET 0x2030 76#define MMIO_PPR_HEAD_OFFSET 0x2030
80#define MMIO_PPR_TAIL_OFFSET 0x2038 77#define MMIO_PPR_TAIL_OFFSET 0x2038
78#define MMIO_CNTR_CONF_OFFSET 0x4000
79#define MMIO_CNTR_REG_OFFSET 0x40000
80#define MMIO_REG_END_OFFSET 0x80000
81
81 82
82 83
83/* Extended Feature Bits */ 84/* Extended Feature Bits */
@@ -507,6 +508,10 @@ struct amd_iommu {
507 508
508 /* physical address of MMIO space */ 509 /* physical address of MMIO space */
509 u64 mmio_phys; 510 u64 mmio_phys;
511
512 /* physical end address of MMIO space */
513 u64 mmio_phys_end;
514
510 /* virtual address of MMIO space */ 515 /* virtual address of MMIO space */
511 u8 __iomem *mmio_base; 516 u8 __iomem *mmio_base;
512 517
@@ -584,6 +589,10 @@ struct amd_iommu {
584 589
585 /* The l2 indirect registers */ 590 /* The l2 indirect registers */
586 u32 stored_l2[0x83]; 591 u32 stored_l2[0x83];
592
593 /* The maximum PC banks and counters/bank (PCSup=1) */
594 u8 max_banks;
595 u8 max_counters;
587}; 596};
588 597
589struct devid_map { 598struct devid_map {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c5b6dbf9c2fc..50b3efd14d29 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -73,13 +73,18 @@ struct perf_raw_record {
73 * 73 *
74 * support for mispred, predicted is optional. In case it 74 * support for mispred, predicted is optional. In case it
75 * is not supported mispred = predicted = 0. 75 * is not supported mispred = predicted = 0.
76 *
77 * in_tx: running in a hardware transaction
78 * abort: aborting a hardware transaction
76 */ 79 */
77struct perf_branch_entry { 80struct perf_branch_entry {
78 __u64 from; 81 __u64 from;
79 __u64 to; 82 __u64 to;
80 __u64 mispred:1, /* target mispredicted */ 83 __u64 mispred:1, /* target mispredicted */
81 predicted:1,/* target predicted */ 84 predicted:1,/* target predicted */
82 reserved:62; 85 in_tx:1, /* in transaction */
86 abort:1, /* transaction abort */
87 reserved:60;
83}; 88};
84 89
85/* 90/*
@@ -113,6 +118,8 @@ struct hw_perf_event_extra {
113 int idx; /* index in shared_regs->regs[] */ 118 int idx; /* index in shared_regs->regs[] */
114}; 119};
115 120
121struct event_constraint;
122
116/** 123/**
117 * struct hw_perf_event - performance event hardware details: 124 * struct hw_perf_event - performance event hardware details:
118 */ 125 */
@@ -131,6 +138,8 @@ struct hw_perf_event {
131 138
132 struct hw_perf_event_extra extra_reg; 139 struct hw_perf_event_extra extra_reg;
133 struct hw_perf_event_extra branch_reg; 140 struct hw_perf_event_extra branch_reg;
141
142 struct event_constraint *constraint;
134 }; 143 };
135 struct { /* software */ 144 struct { /* software */
136 struct hrtimer hrtimer; 145 struct hrtimer hrtimer;
@@ -188,12 +197,13 @@ struct pmu {
188 197
189 struct device *dev; 198 struct device *dev;
190 const struct attribute_group **attr_groups; 199 const struct attribute_group **attr_groups;
191 char *name; 200 const char *name;
192 int type; 201 int type;
193 202
194 int * __percpu pmu_disable_count; 203 int * __percpu pmu_disable_count;
195 struct perf_cpu_context * __percpu pmu_cpu_context; 204 struct perf_cpu_context * __percpu pmu_cpu_context;
196 int task_ctx_nr; 205 int task_ctx_nr;
206 int hrtimer_interval_ms;
197 207
198 /* 208 /*
199 * Fully disable/enable this PMU, can be used to protect from the PMI 209 * Fully disable/enable this PMU, can be used to protect from the PMI
@@ -500,8 +510,9 @@ struct perf_cpu_context {
500 struct perf_event_context *task_ctx; 510 struct perf_event_context *task_ctx;
501 int active_oncpu; 511 int active_oncpu;
502 int exclusive; 512 int exclusive;
513 struct hrtimer hrtimer;
514 ktime_t hrtimer_interval;
503 struct list_head rotation_list; 515 struct list_head rotation_list;
504 int jiffies_interval;
505 struct pmu *unique_pmu; 516 struct pmu *unique_pmu;
506 struct perf_cgroup *cgrp; 517 struct perf_cgroup *cgrp;
507}; 518};
@@ -517,7 +528,7 @@ struct perf_output_handle {
517 528
518#ifdef CONFIG_PERF_EVENTS 529#ifdef CONFIG_PERF_EVENTS
519 530
520extern int perf_pmu_register(struct pmu *pmu, char *name, int type); 531extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
521extern void perf_pmu_unregister(struct pmu *pmu); 532extern void perf_pmu_unregister(struct pmu *pmu);
522 533
523extern int perf_num_counters(void); 534extern int perf_num_counters(void);
@@ -695,10 +706,17 @@ static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64
695extern int sysctl_perf_event_paranoid; 706extern int sysctl_perf_event_paranoid;
696extern int sysctl_perf_event_mlock; 707extern int sysctl_perf_event_mlock;
697extern int sysctl_perf_event_sample_rate; 708extern int sysctl_perf_event_sample_rate;
709extern int sysctl_perf_cpu_time_max_percent;
710
711extern void perf_sample_event_took(u64 sample_len_ns);
698 712
699extern int perf_proc_update_handler(struct ctl_table *table, int write, 713extern int perf_proc_update_handler(struct ctl_table *table, int write,
700 void __user *buffer, size_t *lenp, 714 void __user *buffer, size_t *lenp,
701 loff_t *ppos); 715 loff_t *ppos);
716extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
717 void __user *buffer, size_t *lenp,
718 loff_t *ppos);
719
702 720
703static inline bool perf_paranoid_tracepoint_raw(void) 721static inline bool perf_paranoid_tracepoint_raw(void)
704{ 722{
@@ -742,6 +760,7 @@ extern unsigned int perf_output_skip(struct perf_output_handle *handle,
742 unsigned int len); 760 unsigned int len);
743extern int perf_swevent_get_recursion_context(void); 761extern int perf_swevent_get_recursion_context(void);
744extern void perf_swevent_put_recursion_context(int rctx); 762extern void perf_swevent_put_recursion_context(int rctx);
763extern u64 perf_swevent_set_period(struct perf_event *event);
745extern void perf_event_enable(struct perf_event *event); 764extern void perf_event_enable(struct perf_event *event);
746extern void perf_event_disable(struct perf_event *event); 765extern void perf_event_disable(struct perf_event *event);
747extern int __perf_event_disable(void *info); 766extern int __perf_event_disable(void *info);
@@ -781,6 +800,7 @@ static inline void perf_event_fork(struct task_struct *tsk) { }
781static inline void perf_event_init(void) { } 800static inline void perf_event_init(void) { }
782static inline int perf_swevent_get_recursion_context(void) { return -1; } 801static inline int perf_swevent_get_recursion_context(void) { return -1; }
783static inline void perf_swevent_put_recursion_context(int rctx) { } 802static inline void perf_swevent_put_recursion_context(int rctx) { }
803static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; }
784static inline void perf_event_enable(struct perf_event *event) { } 804static inline void perf_event_enable(struct perf_event *event) { }
785static inline void perf_event_disable(struct perf_event *event) { } 805static inline void perf_event_disable(struct perf_event *event) { }
786static inline int __perf_event_disable(void *info) { return -1; } 806static inline int __perf_event_disable(void *info) { return -1; }
diff --git a/include/trace/events/nmi.h b/include/trace/events/nmi.h
new file mode 100644
index 000000000000..da3ee96b8d03
--- /dev/null
+++ b/include/trace/events/nmi.h
@@ -0,0 +1,37 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM nmi
3
4#if !defined(_TRACE_NMI_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NMI_H
6
7#include <linux/ktime.h>
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(nmi_handler,
11
12 TP_PROTO(void *handler, s64 delta_ns, int handled),
13
14 TP_ARGS(handler, delta_ns, handled),
15
16 TP_STRUCT__entry(
17 __field( void *, handler )
18 __field( s64, delta_ns)
19 __field( int, handled )
20 ),
21
22 TP_fast_assign(
23 __entry->handler = handler;
24 __entry->delta_ns = delta_ns;
25 __entry->handled = handled;
26 ),
27
28 TP_printk("%ps() delta_ns: %lld handled: %d",
29 __entry->handler,
30 __entry->delta_ns,
31 __entry->handled)
32);
33
34#endif /* _TRACE_NMI_H */
35
36/* This part ust be outside protection */
37#include <trace/define_trace.h>
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index fb104e51496e..0b1df41691e8 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -157,8 +157,11 @@ enum perf_branch_sample_type {
157 PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ 157 PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
158 PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ 158 PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
159 PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ 159 PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
160 PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
161 PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
162 PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
160 163
161 PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ 164 PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */
162}; 165};
163 166
164#define PERF_SAMPLE_BRANCH_PLM_ALL \ 167#define PERF_SAMPLE_BRANCH_PLM_ALL \
diff --git a/init/main.c b/init/main.c
index 9484f4ba88d0..ec549581d732 100644
--- a/init/main.c
+++ b/init/main.c
@@ -542,7 +542,6 @@ asmlinkage void __init start_kernel(void)
542 if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) 542 if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
543 local_irq_disable(); 543 local_irq_disable();
544 idr_init_cache(); 544 idr_init_cache();
545 perf_event_init();
546 rcu_init(); 545 rcu_init();
547 tick_nohz_init(); 546 tick_nohz_init();
548 radix_tree_init(); 547 radix_tree_init();
@@ -555,6 +554,7 @@ asmlinkage void __init start_kernel(void)
555 softirq_init(); 554 softirq_init();
556 timekeeping_init(); 555 timekeeping_init();
557 time_init(); 556 time_init();
557 perf_event_init();
558 profile_init(); 558 profile_init();
559 call_function_init(); 559 call_function_init();
560 WARN(!irqs_disabled(), "Interrupts were enabled early\n"); 560 WARN(!irqs_disabled(), "Interrupts were enabled early\n");
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b391907d5352..1db3af933704 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -165,10 +165,28 @@ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free'
165/* 165/*
166 * max perf event sample rate 166 * max perf event sample rate
167 */ 167 */
168#define DEFAULT_MAX_SAMPLE_RATE 100000 168#define DEFAULT_MAX_SAMPLE_RATE 100000
169int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; 169#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
170static int max_samples_per_tick __read_mostly = 170#define DEFAULT_CPU_TIME_MAX_PERCENT 25
171 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); 171
172int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
173
174static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
175static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
176
177static atomic_t perf_sample_allowed_ns __read_mostly =
178 ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
179
180void update_perf_cpu_limits(void)
181{
182 u64 tmp = perf_sample_period_ns;
183
184 tmp *= sysctl_perf_cpu_time_max_percent;
185 tmp = do_div(tmp, 100);
186 atomic_set(&perf_sample_allowed_ns, tmp);
187}
188
189static int perf_rotate_context(struct perf_cpu_context *cpuctx);
172 190
173int perf_proc_update_handler(struct ctl_table *table, int write, 191int perf_proc_update_handler(struct ctl_table *table, int write,
174 void __user *buffer, size_t *lenp, 192 void __user *buffer, size_t *lenp,
@@ -180,10 +198,78 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
180 return ret; 198 return ret;
181 199
182 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); 200 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
201 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
202 update_perf_cpu_limits();
183 203
184 return 0; 204 return 0;
185} 205}
186 206
207int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
208
209int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
210 void __user *buffer, size_t *lenp,
211 loff_t *ppos)
212{
213 int ret = proc_dointvec(table, write, buffer, lenp, ppos);
214
215 if (ret || !write)
216 return ret;
217
218 update_perf_cpu_limits();
219
220 return 0;
221}
222
223/*
224 * perf samples are done in some very critical code paths (NMIs).
225 * If they take too much CPU time, the system can lock up and not
226 * get any real work done. This will drop the sample rate when
227 * we detect that events are taking too long.
228 */
229#define NR_ACCUMULATED_SAMPLES 128
230DEFINE_PER_CPU(u64, running_sample_length);
231
232void perf_sample_event_took(u64 sample_len_ns)
233{
234 u64 avg_local_sample_len;
235 u64 local_samples_len = __get_cpu_var(running_sample_length);
236
237 if (atomic_read(&perf_sample_allowed_ns) == 0)
238 return;
239
240 /* decay the counter by 1 average sample */
241 local_samples_len = __get_cpu_var(running_sample_length);
242 local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
243 local_samples_len += sample_len_ns;
244 __get_cpu_var(running_sample_length) = local_samples_len;
245
246 /*
247 * note: this will be biased artifically low until we have
248 * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
249 * from having to maintain a count.
250 */
251 avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
252
253 if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
254 return;
255
256 if (max_samples_per_tick <= 1)
257 return;
258
259 max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
260 sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
261 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
262
263 printk_ratelimited(KERN_WARNING
264 "perf samples too long (%lld > %d), lowering "
265 "kernel.perf_event_max_sample_rate to %d\n",
266 avg_local_sample_len,
267 atomic_read(&perf_sample_allowed_ns),
268 sysctl_perf_event_sample_rate);
269
270 update_perf_cpu_limits();
271}
272
187static atomic64_t perf_event_id; 273static atomic64_t perf_event_id;
188 274
189static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, 275static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
@@ -655,6 +741,106 @@ perf_cgroup_mark_enabled(struct perf_event *event,
655} 741}
656#endif 742#endif
657 743
744/*
745 * set default to be dependent on timer tick just
746 * like original code
747 */
748#define PERF_CPU_HRTIMER (1000 / HZ)
749/*
750 * function must be called with interrupts disbled
751 */
752static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
753{
754 struct perf_cpu_context *cpuctx;
755 enum hrtimer_restart ret = HRTIMER_NORESTART;
756 int rotations = 0;
757
758 WARN_ON(!irqs_disabled());
759
760 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
761
762 rotations = perf_rotate_context(cpuctx);
763
764 /*
765 * arm timer if needed
766 */
767 if (rotations) {
768 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
769 ret = HRTIMER_RESTART;
770 }
771
772 return ret;
773}
774
775/* CPU is going down */
776void perf_cpu_hrtimer_cancel(int cpu)
777{
778 struct perf_cpu_context *cpuctx;
779 struct pmu *pmu;
780 unsigned long flags;
781
782 if (WARN_ON(cpu != smp_processor_id()))
783 return;
784
785 local_irq_save(flags);
786
787 rcu_read_lock();
788
789 list_for_each_entry_rcu(pmu, &pmus, entry) {
790 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
791
792 if (pmu->task_ctx_nr == perf_sw_context)
793 continue;
794
795 hrtimer_cancel(&cpuctx->hrtimer);
796 }
797
798 rcu_read_unlock();
799
800 local_irq_restore(flags);
801}
802
803static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
804{
805 struct hrtimer *hr = &cpuctx->hrtimer;
806 struct pmu *pmu = cpuctx->ctx.pmu;
807 int timer;
808
809 /* no multiplexing needed for SW PMU */
810 if (pmu->task_ctx_nr == perf_sw_context)
811 return;
812
813 /*
814 * check default is sane, if not set then force to
815 * default interval (1/tick)
816 */
817 timer = pmu->hrtimer_interval_ms;
818 if (timer < 1)
819 timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
820
821 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
822
823 hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
824 hr->function = perf_cpu_hrtimer_handler;
825}
826
827static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
828{
829 struct hrtimer *hr = &cpuctx->hrtimer;
830 struct pmu *pmu = cpuctx->ctx.pmu;
831
832 /* not for SW PMU */
833 if (pmu->task_ctx_nr == perf_sw_context)
834 return;
835
836 if (hrtimer_active(hr))
837 return;
838
839 if (!hrtimer_callback_running(hr))
840 __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
841 0, HRTIMER_MODE_REL_PINNED, 0);
842}
843
658void perf_pmu_disable(struct pmu *pmu) 844void perf_pmu_disable(struct pmu *pmu)
659{ 845{
660 int *count = this_cpu_ptr(pmu->pmu_disable_count); 846 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -1503,6 +1689,7 @@ group_sched_in(struct perf_event *group_event,
1503 1689
1504 if (event_sched_in(group_event, cpuctx, ctx)) { 1690 if (event_sched_in(group_event, cpuctx, ctx)) {
1505 pmu->cancel_txn(pmu); 1691 pmu->cancel_txn(pmu);
1692 perf_cpu_hrtimer_restart(cpuctx);
1506 return -EAGAIN; 1693 return -EAGAIN;
1507 } 1694 }
1508 1695
@@ -1549,6 +1736,8 @@ group_error:
1549 1736
1550 pmu->cancel_txn(pmu); 1737 pmu->cancel_txn(pmu);
1551 1738
1739 perf_cpu_hrtimer_restart(cpuctx);
1740
1552 return -EAGAIN; 1741 return -EAGAIN;
1553} 1742}
1554 1743
@@ -1804,8 +1993,10 @@ static int __perf_event_enable(void *info)
1804 * If this event can't go on and it's part of a 1993 * If this event can't go on and it's part of a
1805 * group, then the whole group has to come off. 1994 * group, then the whole group has to come off.
1806 */ 1995 */
1807 if (leader != event) 1996 if (leader != event) {
1808 group_sched_out(leader, cpuctx, ctx); 1997 group_sched_out(leader, cpuctx, ctx);
1998 perf_cpu_hrtimer_restart(cpuctx);
1999 }
1809 if (leader->attr.pinned) { 2000 if (leader->attr.pinned) {
1810 update_group_times(leader); 2001 update_group_times(leader);
1811 leader->state = PERF_EVENT_STATE_ERROR; 2002 leader->state = PERF_EVENT_STATE_ERROR;
@@ -2552,7 +2743,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
2552 * because they're strictly cpu affine and rotate_start is called with IRQs 2743 * because they're strictly cpu affine and rotate_start is called with IRQs
2553 * disabled, while rotate_context is called from IRQ context. 2744 * disabled, while rotate_context is called from IRQ context.
2554 */ 2745 */
2555static void perf_rotate_context(struct perf_cpu_context *cpuctx) 2746static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2556{ 2747{
2557 struct perf_event_context *ctx = NULL; 2748 struct perf_event_context *ctx = NULL;
2558 int rotate = 0, remove = 1; 2749 int rotate = 0, remove = 1;
@@ -2591,6 +2782,8 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2591done: 2782done:
2592 if (remove) 2783 if (remove)
2593 list_del_init(&cpuctx->rotation_list); 2784 list_del_init(&cpuctx->rotation_list);
2785
2786 return rotate;
2594} 2787}
2595 2788
2596#ifdef CONFIG_NO_HZ_FULL 2789#ifdef CONFIG_NO_HZ_FULL
@@ -2622,10 +2815,6 @@ void perf_event_task_tick(void)
2622 ctx = cpuctx->task_ctx; 2815 ctx = cpuctx->task_ctx;
2623 if (ctx) 2816 if (ctx)
2624 perf_adjust_freq_unthr_context(ctx, throttled); 2817 perf_adjust_freq_unthr_context(ctx, throttled);
2625
2626 if (cpuctx->jiffies_interval == 1 ||
2627 !(jiffies % cpuctx->jiffies_interval))
2628 perf_rotate_context(cpuctx);
2629 } 2818 }
2630} 2819}
2631 2820
@@ -5036,7 +5225,7 @@ static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
5036 * sign as trigger. 5225 * sign as trigger.
5037 */ 5226 */
5038 5227
5039static u64 perf_swevent_set_period(struct perf_event *event) 5228u64 perf_swevent_set_period(struct perf_event *event)
5040{ 5229{
5041 struct hw_perf_event *hwc = &event->hw; 5230 struct hw_perf_event *hwc = &event->hw;
5042 u64 period = hwc->last_period; 5231 u64 period = hwc->last_period;
@@ -5979,9 +6168,56 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
5979 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); 6168 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
5980} 6169}
5981 6170
6171static ssize_t
6172perf_event_mux_interval_ms_show(struct device *dev,
6173 struct device_attribute *attr,
6174 char *page)
6175{
6176 struct pmu *pmu = dev_get_drvdata(dev);
6177
6178 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
6179}
6180
6181static ssize_t
6182perf_event_mux_interval_ms_store(struct device *dev,
6183 struct device_attribute *attr,
6184 const char *buf, size_t count)
6185{
6186 struct pmu *pmu = dev_get_drvdata(dev);
6187 int timer, cpu, ret;
6188
6189 ret = kstrtoint(buf, 0, &timer);
6190 if (ret)
6191 return ret;
6192
6193 if (timer < 1)
6194 return -EINVAL;
6195
6196 /* same value, noting to do */
6197 if (timer == pmu->hrtimer_interval_ms)
6198 return count;
6199
6200 pmu->hrtimer_interval_ms = timer;
6201
6202 /* update all cpuctx for this PMU */
6203 for_each_possible_cpu(cpu) {
6204 struct perf_cpu_context *cpuctx;
6205 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
6206 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
6207
6208 if (hrtimer_active(&cpuctx->hrtimer))
6209 hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
6210 }
6211
6212 return count;
6213}
6214
6215#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
6216
5982static struct device_attribute pmu_dev_attrs[] = { 6217static struct device_attribute pmu_dev_attrs[] = {
5983 __ATTR_RO(type), 6218 __ATTR_RO(type),
5984 __ATTR_NULL, 6219 __ATTR_RW(perf_event_mux_interval_ms),
6220 __ATTR_NULL,
5985}; 6221};
5986 6222
5987static int pmu_bus_running; 6223static int pmu_bus_running;
@@ -6027,7 +6263,7 @@ free_dev:
6027static struct lock_class_key cpuctx_mutex; 6263static struct lock_class_key cpuctx_mutex;
6028static struct lock_class_key cpuctx_lock; 6264static struct lock_class_key cpuctx_lock;
6029 6265
6030int perf_pmu_register(struct pmu *pmu, char *name, int type) 6266int perf_pmu_register(struct pmu *pmu, const char *name, int type)
6031{ 6267{
6032 int cpu, ret; 6268 int cpu, ret;
6033 6269
@@ -6076,7 +6312,9 @@ skip_type:
6076 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); 6312 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
6077 cpuctx->ctx.type = cpu_context; 6313 cpuctx->ctx.type = cpu_context;
6078 cpuctx->ctx.pmu = pmu; 6314 cpuctx->ctx.pmu = pmu;
6079 cpuctx->jiffies_interval = 1; 6315
6316 __perf_cpu_hrtimer_init(cpuctx, cpu);
6317
6080 INIT_LIST_HEAD(&cpuctx->rotation_list); 6318 INIT_LIST_HEAD(&cpuctx->rotation_list);
6081 cpuctx->unique_pmu = pmu; 6319 cpuctx->unique_pmu = pmu;
6082 } 6320 }
@@ -6402,11 +6640,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
6402 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) 6640 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
6403 return -EINVAL; 6641 return -EINVAL;
6404 6642
6405 /* kernel level capture: check permissions */
6406 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
6407 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
6408 return -EACCES;
6409
6410 /* propagate priv level, when not set for branch */ 6643 /* propagate priv level, when not set for branch */
6411 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { 6644 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
6412 6645
@@ -6424,6 +6657,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
6424 */ 6657 */
6425 attr->branch_sample_type = mask; 6658 attr->branch_sample_type = mask;
6426 } 6659 }
6660 /* privileged levels capture (kernel, hv): check permissions */
6661 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
6662 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
6663 return -EACCES;
6427 } 6664 }
6428 6665
6429 if (attr->sample_type & PERF_SAMPLE_REGS_USER) { 6666 if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
@@ -7476,7 +7713,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
7476 case CPU_DOWN_PREPARE: 7713 case CPU_DOWN_PREPARE:
7477 perf_event_exit_cpu(cpu); 7714 perf_event_exit_cpu(cpu);
7478 break; 7715 break;
7479
7480 default: 7716 default:
7481 break; 7717 break;
7482 } 7718 }
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 20185ea64aa6..1559fb0b9296 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -46,23 +46,26 @@
46#include <linux/smp.h> 46#include <linux/smp.h>
47 47
48#include <linux/hw_breakpoint.h> 48#include <linux/hw_breakpoint.h>
49
50
51/* 49/*
52 * Constraints data 50 * Constraints data
53 */ 51 */
52struct bp_cpuinfo {
53 /* Number of pinned cpu breakpoints in a cpu */
54 unsigned int cpu_pinned;
55 /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
56 unsigned int *tsk_pinned;
57 /* Number of non-pinned cpu/task breakpoints in a cpu */
58 unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
59};
54 60
55/* Number of pinned cpu breakpoints in a cpu */ 61static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
56static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
57
58/* Number of pinned task breakpoints in a cpu */
59static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
60
61/* Number of non-pinned cpu/task breakpoints in a cpu */
62static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
63
64static int nr_slots[TYPE_MAX]; 62static int nr_slots[TYPE_MAX];
65 63
64static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
65{
66 return per_cpu_ptr(bp_cpuinfo + type, cpu);
67}
68
66/* Keep track of the breakpoints attached to tasks */ 69/* Keep track of the breakpoints attached to tasks */
67static LIST_HEAD(bp_task_head); 70static LIST_HEAD(bp_task_head);
68 71
@@ -96,8 +99,8 @@ static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
96 */ 99 */
97static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 100static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
98{ 101{
102 unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
99 int i; 103 int i;
100 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
101 104
102 for (i = nr_slots[type] - 1; i >= 0; i--) { 105 for (i = nr_slots[type] - 1; i >= 0; i--) {
103 if (tsk_pinned[i] > 0) 106 if (tsk_pinned[i] > 0)
@@ -127,6 +130,13 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
127 return count; 130 return count;
128} 131}
129 132
133static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
134{
135 if (bp->cpu >= 0)
136 return cpumask_of(bp->cpu);
137 return cpu_possible_mask;
138}
139
130/* 140/*
131 * Report the number of pinned/un-pinned breakpoints we have in 141 * Report the number of pinned/un-pinned breakpoints we have in
132 * a given cpu (cpu > -1) or in all of them (cpu = -1). 142 * a given cpu (cpu > -1) or in all of them (cpu = -1).
@@ -135,25 +145,15 @@ static void
135fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, 145fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
136 enum bp_type_idx type) 146 enum bp_type_idx type)
137{ 147{
138 int cpu = bp->cpu; 148 const struct cpumask *cpumask = cpumask_of_bp(bp);
139 struct task_struct *tsk = bp->hw.bp_target; 149 int cpu;
140
141 if (cpu >= 0) {
142 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
143 if (!tsk)
144 slots->pinned += max_task_bp_pinned(cpu, type);
145 else
146 slots->pinned += task_bp_pinned(cpu, bp, type);
147 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
148
149 return;
150 }
151 150
152 for_each_possible_cpu(cpu) { 151 for_each_cpu(cpu, cpumask) {
153 unsigned int nr; 152 struct bp_cpuinfo *info = get_bp_info(cpu, type);
153 int nr;
154 154
155 nr = per_cpu(nr_cpu_bp_pinned[type], cpu); 155 nr = info->cpu_pinned;
156 if (!tsk) 156 if (!bp->hw.bp_target)
157 nr += max_task_bp_pinned(cpu, type); 157 nr += max_task_bp_pinned(cpu, type);
158 else 158 else
159 nr += task_bp_pinned(cpu, bp, type); 159 nr += task_bp_pinned(cpu, bp, type);
@@ -161,8 +161,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
161 if (nr > slots->pinned) 161 if (nr > slots->pinned)
162 slots->pinned = nr; 162 slots->pinned = nr;
163 163
164 nr = per_cpu(nr_bp_flexible[type], cpu); 164 nr = info->flexible;
165
166 if (nr > slots->flexible) 165 if (nr > slots->flexible)
167 slots->flexible = nr; 166 slots->flexible = nr;
168 } 167 }
@@ -182,29 +181,19 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
182/* 181/*
183 * Add a pinned breakpoint for the given task in our constraint table 182 * Add a pinned breakpoint for the given task in our constraint table
184 */ 183 */
185static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable, 184static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
186 enum bp_type_idx type, int weight) 185 enum bp_type_idx type, int weight)
187{ 186{
188 unsigned int *tsk_pinned; 187 unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
189 int old_count = 0; 188 int old_idx, new_idx;
190 int old_idx = 0; 189
191 int idx = 0; 190 old_idx = task_bp_pinned(cpu, bp, type) - 1;
192 191 new_idx = old_idx + weight;
193 old_count = task_bp_pinned(cpu, bp, type); 192
194 old_idx = old_count - 1; 193 if (old_idx >= 0)
195 idx = old_idx + weight; 194 tsk_pinned[old_idx]--;
196 195 if (new_idx >= 0)
197 /* tsk_pinned[n] is the number of tasks having n breakpoints */ 196 tsk_pinned[new_idx]++;
198 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
199 if (enable) {
200 tsk_pinned[idx]++;
201 if (old_count > 0)
202 tsk_pinned[old_idx]--;
203 } else {
204 tsk_pinned[idx]--;
205 if (old_count > 0)
206 tsk_pinned[old_idx]++;
207 }
208} 197}
209 198
210/* 199/*
@@ -214,33 +203,26 @@ static void
214toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, 203toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
215 int weight) 204 int weight)
216{ 205{
217 int cpu = bp->cpu; 206 const struct cpumask *cpumask = cpumask_of_bp(bp);
218 struct task_struct *tsk = bp->hw.bp_target; 207 int cpu;
219 208
220 /* Pinned counter cpu profiling */ 209 if (!enable)
221 if (!tsk) { 210 weight = -weight;
222 211
223 if (enable) 212 /* Pinned counter cpu profiling */
224 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; 213 if (!bp->hw.bp_target) {
225 else 214 get_bp_info(bp->cpu, type)->cpu_pinned += weight;
226 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
227 return; 215 return;
228 } 216 }
229 217
230 /* Pinned counter task profiling */ 218 /* Pinned counter task profiling */
231 219 for_each_cpu(cpu, cpumask)
232 if (!enable) 220 toggle_bp_task_slot(bp, cpu, type, weight);
233 list_del(&bp->hw.bp_list);
234
235 if (cpu >= 0) {
236 toggle_bp_task_slot(bp, cpu, enable, type, weight);
237 } else {
238 for_each_possible_cpu(cpu)
239 toggle_bp_task_slot(bp, cpu, enable, type, weight);
240 }
241 221
242 if (enable) 222 if (enable)
243 list_add_tail(&bp->hw.bp_list, &bp_task_head); 223 list_add_tail(&bp->hw.bp_list, &bp_task_head);
224 else
225 list_del(&bp->hw.bp_list);
244} 226}
245 227
246/* 228/*
@@ -261,8 +243,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
261 * 243 *
262 * - If attached to a single cpu, check: 244 * - If attached to a single cpu, check:
263 * 245 *
264 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) 246 * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
265 * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM 247 * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
266 * 248 *
267 * -> If there are already non-pinned counters in this cpu, it means 249 * -> If there are already non-pinned counters in this cpu, it means
268 * there is already a free slot for them. 250 * there is already a free slot for them.
@@ -272,8 +254,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
272 * 254 *
273 * - If attached to every cpus, check: 255 * - If attached to every cpus, check:
274 * 256 *
275 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) 257 * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
276 * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM 258 * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
277 * 259 *
278 * -> This is roughly the same, except we check the number of per cpu 260 * -> This is roughly the same, except we check the number of per cpu
279 * bp for every cpu and we keep the max one. Same for the per tasks 261 * bp for every cpu and we keep the max one. Same for the per tasks
@@ -284,16 +266,16 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
284 * 266 *
285 * - If attached to a single cpu, check: 267 * - If attached to a single cpu, check:
286 * 268 *
287 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) 269 * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
288 * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM 270 * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
289 * 271 *
290 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep 272 * -> Same checks as before. But now the info->flexible, if any, must keep
291 * one register at least (or they will never be fed). 273 * one register at least (or they will never be fed).
292 * 274 *
293 * - If attached to every cpus, check: 275 * - If attached to every cpus, check:
294 * 276 *
295 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 277 * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
296 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 278 * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
297 */ 279 */
298static int __reserve_bp_slot(struct perf_event *bp) 280static int __reserve_bp_slot(struct perf_event *bp)
299{ 281{
@@ -518,8 +500,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
518 perf_overflow_handler_t triggered, 500 perf_overflow_handler_t triggered,
519 void *context) 501 void *context)
520{ 502{
521 struct perf_event * __percpu *cpu_events, **pevent, *bp; 503 struct perf_event * __percpu *cpu_events, *bp;
522 long err; 504 long err = 0;
523 int cpu; 505 int cpu;
524 506
525 cpu_events = alloc_percpu(typeof(*cpu_events)); 507 cpu_events = alloc_percpu(typeof(*cpu_events));
@@ -528,31 +510,21 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
528 510
529 get_online_cpus(); 511 get_online_cpus();
530 for_each_online_cpu(cpu) { 512 for_each_online_cpu(cpu) {
531 pevent = per_cpu_ptr(cpu_events, cpu);
532 bp = perf_event_create_kernel_counter(attr, cpu, NULL, 513 bp = perf_event_create_kernel_counter(attr, cpu, NULL,
533 triggered, context); 514 triggered, context);
534
535 *pevent = bp;
536
537 if (IS_ERR(bp)) { 515 if (IS_ERR(bp)) {
538 err = PTR_ERR(bp); 516 err = PTR_ERR(bp);
539 goto fail; 517 break;
540 } 518 }
541 }
542 put_online_cpus();
543 519
544 return cpu_events; 520 per_cpu(*cpu_events, cpu) = bp;
545
546fail:
547 for_each_online_cpu(cpu) {
548 pevent = per_cpu_ptr(cpu_events, cpu);
549 if (IS_ERR(*pevent))
550 break;
551 unregister_hw_breakpoint(*pevent);
552 } 521 }
553 put_online_cpus(); 522 put_online_cpus();
554 523
555 free_percpu(cpu_events); 524 if (likely(!err))
525 return cpu_events;
526
527 unregister_wide_hw_breakpoint(cpu_events);
556 return (void __percpu __force *)ERR_PTR(err); 528 return (void __percpu __force *)ERR_PTR(err);
557} 529}
558EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 530EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
@@ -564,12 +536,10 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
564void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 536void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
565{ 537{
566 int cpu; 538 int cpu;
567 struct perf_event **pevent;
568 539
569 for_each_possible_cpu(cpu) { 540 for_each_possible_cpu(cpu)
570 pevent = per_cpu_ptr(cpu_events, cpu); 541 unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
571 unregister_hw_breakpoint(*pevent); 542
572 }
573 free_percpu(cpu_events); 543 free_percpu(cpu_events);
574} 544}
575EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 545EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
@@ -612,6 +582,11 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
612 if (!(flags & PERF_EF_START)) 582 if (!(flags & PERF_EF_START))
613 bp->hw.state = PERF_HES_STOPPED; 583 bp->hw.state = PERF_HES_STOPPED;
614 584
585 if (is_sampling_event(bp)) {
586 bp->hw.last_period = bp->hw.sample_period;
587 perf_swevent_set_period(bp);
588 }
589
615 return arch_install_hw_breakpoint(bp); 590 return arch_install_hw_breakpoint(bp);
616} 591}
617 592
@@ -650,7 +625,6 @@ static struct pmu perf_breakpoint = {
650 625
651int __init init_hw_breakpoint(void) 626int __init init_hw_breakpoint(void)
652{ 627{
653 unsigned int **task_bp_pinned;
654 int cpu, err_cpu; 628 int cpu, err_cpu;
655 int i; 629 int i;
656 630
@@ -659,10 +633,11 @@ int __init init_hw_breakpoint(void)
659 633
660 for_each_possible_cpu(cpu) { 634 for_each_possible_cpu(cpu) {
661 for (i = 0; i < TYPE_MAX; i++) { 635 for (i = 0; i < TYPE_MAX; i++) {
662 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); 636 struct bp_cpuinfo *info = get_bp_info(cpu, i);
663 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], 637
664 GFP_KERNEL); 638 info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
665 if (!*task_bp_pinned) 639 GFP_KERNEL);
640 if (!info->tsk_pinned)
666 goto err_alloc; 641 goto err_alloc;
667 } 642 }
668 } 643 }
@@ -676,7 +651,7 @@ int __init init_hw_breakpoint(void)
676 err_alloc: 651 err_alloc:
677 for_each_possible_cpu(err_cpu) { 652 for_each_possible_cpu(err_cpu) {
678 for (i = 0; i < TYPE_MAX; i++) 653 for (i = 0; i < TYPE_MAX; i++)
679 kfree(per_cpu(nr_task_bp_pinned[i], err_cpu)); 654 kfree(get_bp_info(err_cpu, i)->tsk_pinned);
680 if (err_cpu == cpu) 655 if (err_cpu == cpu)
681 break; 656 break;
682 } 657 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9edcf456e0fc..4ce13c3cedb9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -120,7 +120,6 @@ extern int blk_iopoll_enabled;
120/* Constants used for minimum and maximum */ 120/* Constants used for minimum and maximum */
121#ifdef CONFIG_LOCKUP_DETECTOR 121#ifdef CONFIG_LOCKUP_DETECTOR
122static int sixty = 60; 122static int sixty = 60;
123static int neg_one = -1;
124#endif 123#endif
125 124
126static int zero; 125static int zero;
@@ -814,7 +813,7 @@ static struct ctl_table kern_table[] = {
814 .maxlen = sizeof(int), 813 .maxlen = sizeof(int),
815 .mode = 0644, 814 .mode = 0644,
816 .proc_handler = proc_dowatchdog, 815 .proc_handler = proc_dowatchdog,
817 .extra1 = &neg_one, 816 .extra1 = &zero,
818 .extra2 = &sixty, 817 .extra2 = &sixty,
819 }, 818 },
820 { 819 {
@@ -1044,6 +1043,15 @@ static struct ctl_table kern_table[] = {
1044 .mode = 0644, 1043 .mode = 0644,
1045 .proc_handler = perf_proc_update_handler, 1044 .proc_handler = perf_proc_update_handler,
1046 }, 1045 },
1046 {
1047 .procname = "perf_cpu_time_max_percent",
1048 .data = &sysctl_perf_cpu_time_max_percent,
1049 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1050 .mode = 0644,
1051 .proc_handler = perf_cpu_time_max_percent_handler,
1052 .extra1 = &zero,
1053 .extra2 = &one_hundred,
1054 },
1047#endif 1055#endif
1048#ifdef CONFIG_KMEMCHECK 1056#ifdef CONFIG_KMEMCHECK
1049 { 1057 {
diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile
index 926cbf3efc7f..2c5a19733357 100644
--- a/tools/lib/lk/Makefile
+++ b/tools/lib/lk/Makefile
@@ -1,5 +1,8 @@
1include ../../scripts/Makefile.include 1include ../../scripts/Makefile.include
2 2
3CC = $(CROSS_COMPILE)gcc
4AR = $(CROSS_COMPILE)ar
5
3# guard against environment variables 6# guard against environment variables
4LIB_H= 7LIB_H=
5LIB_OBJS= 8LIB_OBJS=
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
index fae174dc7d01..5032a142853e 100644
--- a/tools/perf/Documentation/perf-archive.txt
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -13,7 +13,7 @@ SYNOPSIS
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command runs runs perf-buildid-list --with-hits, and collects the files 15This command runs runs perf-buildid-list --with-hits, and collects the files
16with the buildids found so that analisys of perf.data contents can be possible 16with the buildids found so that analysis of perf.data contents can be possible
17on another machine. 17on another machine.
18 18
19 19
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 7d5f4f38aa52..66dab7410c1d 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -210,6 +210,10 @@ OPTIONS
210 Demangle symbol names to human readable form. It's enabled by default, 210 Demangle symbol names to human readable form. It's enabled by default,
211 disable with --no-demangle. 211 disable with --no-demangle.
212 212
213--percent-limit::
214 Do not show entries which have an overhead under that percent.
215 (Default: 0).
216
213SEE ALSO 217SEE ALSO
214-------- 218--------
215linkperf:perf-stat[1], linkperf:perf-annotate[1] 219linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9f1a2fe54757..7fdd1909e376 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -155,6 +155,10 @@ Default is to monitor all CPUS.
155 155
156 Default: fractal,0.5,callee. 156 Default: fractal,0.5,callee.
157 157
158--percent-limit::
159 Do not show entries which have an overhead under that percent.
160 (Default: 0).
161
158INTERACTIVE PROMPTING KEYS 162INTERACTIVE PROMPTING KEYS
159-------------------------- 163--------------------------
160 164
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index b0f164b133d9..203cb0eecff2 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -51,189 +51,63 @@ include config/utilities.mak
51# Define NO_BACKTRACE if you do not want stack backtrace debug feature 51# Define NO_BACKTRACE if you do not want stack backtrace debug feature
52# 52#
53# Define NO_LIBNUMA if you do not want numa perf benchmark 53# Define NO_LIBNUMA if you do not want numa perf benchmark
54#
55# Define NO_LIBAUDIT if you do not want libaudit support
56#
57# Define NO_LIBBIONIC if you do not want bionic support
54 58
55$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 59ifeq ($(srctree),)
56 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 60srctree := $(patsubst %/,%,$(dir $(shell pwd)))
57 61srctree := $(patsubst %/,%,$(dir $(srctree)))
58uname_M := $(shell uname -m 2>/dev/null || echo not) 62#$(info Determined 'srctree' to be $(srctree))
59
60ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
61 -e s/arm.*/arm/ -e s/sa110/arm/ \
62 -e s/s390x/s390/ -e s/parisc64/parisc/ \
63 -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
64 -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
65NO_PERF_REGS := 1
66
67CC = $(CROSS_COMPILE)gcc
68AR = $(CROSS_COMPILE)ar
69
70# Additional ARCH settings for x86
71ifeq ($(ARCH),i386)
72 override ARCH := x86
73 NO_PERF_REGS := 0
74 LIBUNWIND_LIBS = -lunwind -lunwind-x86
75endif
76ifeq ($(ARCH),x86_64)
77 override ARCH := x86
78 IS_X86_64 := 0
79 ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
80 IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
81 endif
82 ifeq (${IS_X86_64}, 1)
83 RAW_ARCH := x86_64
84 ARCH_CFLAGS := -DARCH_X86_64
85 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
86 endif
87 NO_PERF_REGS := 0
88 LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
89endif
90
91# Treat warnings as errors unless directed not to
92ifneq ($(WERROR),0)
93 CFLAGS_WERROR := -Werror
94endif
95
96ifeq ("$(origin DEBUG)", "command line")
97 PERF_DEBUG = $(DEBUG)
98endif
99ifndef PERF_DEBUG
100 CFLAGS_OPTIMIZE = -O6
101endif 63endif
102 64
103ifdef PARSER_DEBUG 65ifneq ($(objtree),)
104 PARSER_DEBUG_BISON := -t 66#$(info Determined 'objtree' to be $(objtree))
105 PARSER_DEBUG_FLEX := -d
106 PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG
107endif 67endif
108 68
109ifdef NO_NEWT 69ifneq ($(OUTPUT),)
110 NO_SLANG=1 70#$(info Determined 'OUTPUT' to be $(OUTPUT))
111endif 71endif
112 72
113CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) 73$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
114EXTLIBS = -lpthread -lrt -lelf -lm 74 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
115ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
116ALL_LDFLAGS = $(LDFLAGS)
117STRIP ?= strip
118
119# Among the variables below, these:
120# perfexecdir
121# template_dir
122# mandir
123# infodir
124# htmldir
125# ETC_PERFCONFIG (but not sysconfdir)
126# can be specified as a relative path some/where/else;
127# this is interpreted as relative to $(prefix) and "perf" at
128# runtime figures out where they are based on the path to the executable.
129# This can help installing the suite in a relocatable way.
130
131# Make the path relative to DESTDIR, not to prefix
132ifndef DESTDIR
133prefix = $(HOME)
134endif
135bindir_relative = bin
136bindir = $(prefix)/$(bindir_relative)
137mandir = share/man
138infodir = share/info
139perfexecdir = libexec/perf-core
140sharedir = $(prefix)/share
141template_dir = share/perf-core/templates
142htmldir = share/doc/perf-doc
143ifeq ($(prefix),/usr)
144sysconfdir = /etc
145ETC_PERFCONFIG = $(sysconfdir)/perfconfig
146else
147sysconfdir = $(prefix)/etc
148ETC_PERFCONFIG = etc/perfconfig
149endif
150lib = lib
151 75
152export prefix bindir sharedir sysconfdir 76CC = $(CROSS_COMPILE)gcc
77AR = $(CROSS_COMPILE)ar
153 78
154RM = rm -f 79RM = rm -f
155MKDIR = mkdir 80MKDIR = mkdir
156FIND = find 81FIND = find
157INSTALL = install 82INSTALL = install
158FLEX = flex 83FLEX = flex
159BISON= bison 84BISON = bison
160 85STRIP = strip
161# sparse is architecture-neutral, which means that we need to tell it
162# explicitly what architecture to check for. Fix this up for yours..
163SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
164
165ifneq ($(MAKECMDGOALS),clean)
166ifneq ($(MAKECMDGOALS),tags)
167-include config/feature-tests.mak
168
169ifeq ($(call get-executable,$(FLEX)),)
170 dummy := $(error Error: $(FLEX) is missing on this system, please install it)
171endif
172
173ifeq ($(call get-executable,$(BISON)),)
174 dummy := $(error Error: $(BISON) is missing on this system, please install it)
175endif
176 86
177ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) 87LK_DIR = $(srctree)/tools/lib/lk/
178 CFLAGS := $(CFLAGS) -fstack-protector-all 88TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
179endif
180 89
181ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y) 90# include config/Makefile by default and rule out
182 CFLAGS := $(CFLAGS) -Wstack-protector 91# non-config cases
183endif 92config := 1
184 93
185ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y) 94NON_CONFIG_TARGETS := clean TAGS tags cscope help
186 CFLAGS := $(CFLAGS) -Wvolatile-register-var
187endif
188 95
189ifndef PERF_DEBUG 96ifdef MAKECMDGOALS
190 ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y) 97ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
191 CFLAGS := $(CFLAGS) -D_FORTIFY_SOURCE=2 98 config := 0
192 endif
193endif 99endif
194
195### --- END CONFIGURATION SECTION ---
196
197ifeq ($(srctree),)
198srctree := $(patsubst %/,%,$(dir $(shell pwd)))
199srctree := $(patsubst %/,%,$(dir $(srctree)))
200#$(info Determined 'srctree' to be $(srctree))
201endif 100endif
202 101
203ifneq ($(objtree),) 102ifeq ($(config),1)
204#$(info Determined 'objtree' to be $(objtree)) 103include config/Makefile
205endif 104endif
206 105
207ifneq ($(OUTPUT),) 106export prefix bindir sharedir sysconfdir
208#$(info Determined 'OUTPUT' to be $(OUTPUT))
209endif
210 107
211BASIC_CFLAGS = \ 108# sparse is architecture-neutral, which means that we need to tell it
212 -Iutil/include \ 109# explicitly what architecture to check for. Fix this up for yours..
213 -Iarch/$(ARCH)/include \ 110SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
214 $(if $(objtree),-I$(objtree)/arch/$(ARCH)/include/generated/uapi) \
215 -I$(srctree)/arch/$(ARCH)/include/uapi \
216 -I$(srctree)/arch/$(ARCH)/include \
217 $(if $(objtree),-I$(objtree)/include/generated/uapi) \
218 -I$(srctree)/include/uapi \
219 -I$(srctree)/include \
220 -I$(OUTPUT)util \
221 -Iutil \
222 -I. \
223 -I$(TRACE_EVENT_DIR) \
224 -I../lib/ \
225 -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
226
227BASIC_LDFLAGS =
228
229ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
230 BIONIC := 1
231 EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
232 EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
233 BASIC_CFLAGS += -I.
234endif
235endif # MAKECMDGOALS != tags
236endif # MAKECMDGOALS != clean
237 111
238# Guard against environment variables 112# Guard against environment variables
239BUILTIN_OBJS = 113BUILTIN_OBJS =
@@ -247,20 +121,17 @@ SCRIPT_SH += perf-archive.sh
247grep-libs = $(filter -l%,$(1)) 121grep-libs = $(filter -l%,$(1))
248strip-libs = $(filter-out -l%,$(1)) 122strip-libs = $(filter-out -l%,$(1))
249 123
250LK_DIR = ../lib/lk/
251TRACE_EVENT_DIR = ../lib/traceevent/
252
253LK_PATH=$(LK_DIR) 124LK_PATH=$(LK_DIR)
254 125
255ifneq ($(OUTPUT),) 126ifneq ($(OUTPUT),)
256 TE_PATH=$(OUTPUT) 127 TE_PATH=$(OUTPUT)
257ifneq ($(subdir),) 128ifneq ($(subdir),)
258 LK_PATH=$(OUTPUT)$(LK_DIR) 129 LK_PATH=$(OUTPUT)$(LK_DIR)
259else 130else
260 LK_PATH=$(OUTPUT) 131 LK_PATH=$(OUTPUT)
261endif 132endif
262else 133else
263 TE_PATH=$(TRACE_EVENT_DIR) 134 TE_PATH=$(TRACE_EVENT_DIR)
264endif 135endif
265 136
266LIBTRACEEVENT = $(TE_PATH)libtraceevent.a 137LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
@@ -278,10 +149,10 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
278python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so 149python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
279 150
280PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) 151PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
281PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) 152PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK)
282 153
283$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) 154$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
284 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ 155 $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
285 --quiet build_ext; \ 156 --quiet build_ext; \
286 mkdir -p $(OUTPUT)python && \ 157 mkdir -p $(OUTPUT)python && \
287 cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/ 158 cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -296,8 +167,6 @@ SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
296# 167#
297PROGRAMS += $(OUTPUT)perf 168PROGRAMS += $(OUTPUT)perf
298 169
299LANG_BINDINGS =
300
301# what 'all' will build and 'install' will install, in perfexecdir 170# what 'all' will build and 'install' will install, in perfexecdir
302ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) 171ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
303 172
@@ -306,10 +175,10 @@ OTHER_PROGRAMS = $(OUTPUT)perf
306 175
307# Set paths to tools early so that they can be used for version tests. 176# Set paths to tools early so that they can be used for version tests.
308ifndef SHELL_PATH 177ifndef SHELL_PATH
309 SHELL_PATH = /bin/sh 178 SHELL_PATH = /bin/sh
310endif 179endif
311ifndef PERL_PATH 180ifndef PERL_PATH
312 PERL_PATH = /usr/bin/perl 181 PERL_PATH = /usr/bin/perl
313endif 182endif
314 183
315export PERL_PATH 184export PERL_PATH
@@ -557,79 +426,14 @@ BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
557 426
558PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) 427PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
559 428
560#
561# Platform specific tweaks
562#
563ifneq ($(MAKECMDGOALS),clean)
564ifneq ($(MAKECMDGOALS),tags)
565
566# We choose to avoid "if .. else if .. else .. endif endif" 429# We choose to avoid "if .. else if .. else .. endif endif"
567# because maintaining the nesting to match is a pain. If 430# because maintaining the nesting to match is a pain. If
568# we had "elif" things would have been much nicer... 431# we had "elif" things would have been much nicer...
569 432
570ifdef NO_LIBELF
571 NO_DWARF := 1
572 NO_DEMANGLE := 1
573 NO_LIBUNWIND := 1
574else
575FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
576ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y)
577 FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
578 ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y)
579 LIBC_SUPPORT := 1
580 endif
581 ifeq ($(BIONIC),1)
582 LIBC_SUPPORT := 1
583 endif
584 ifeq ($(LIBC_SUPPORT),1)
585 msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
586
587 NO_LIBELF := 1
588 NO_DWARF := 1
589 NO_DEMANGLE := 1
590 else
591 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
592 endif
593else
594 # for linking with debug library, run like:
595 # make DEBUG=1 LIBDW_DIR=/opt/libdw/
596 ifdef LIBDW_DIR
597 LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
598 LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
599 endif
600
601 FLAGS_DWARF=$(ALL_CFLAGS) $(LIBDW_CFLAGS) -ldw -lelf $(LIBDW_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
602 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y)
603 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
604 NO_DWARF := 1
605 endif # Dwarf support
606endif # SOURCE_LIBELF
607endif # NO_LIBELF
608
609# There's only x86 (both 32 and 64) support for CFI unwind so far
610ifneq ($(ARCH),x86)
611 NO_LIBUNWIND := 1
612endif
613
614ifndef NO_LIBUNWIND
615# for linking with debug library, run like:
616# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
617ifdef LIBUNWIND_DIR
618 LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include
619 LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
620endif
621
622FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
623ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y)
624 msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
625 NO_LIBUNWIND := 1
626endif # Libunwind support
627endif # NO_LIBUNWIND
628
629-include arch/$(ARCH)/Makefile 433-include arch/$(ARCH)/Makefile
630 434
631ifneq ($(OUTPUT),) 435ifneq ($(OUTPUT),)
632 BASIC_CFLAGS += -I$(OUTPUT) 436 CFLAGS += -I$(OUTPUT)
633endif 437endif
634 438
635ifdef NO_LIBELF 439ifdef NO_LIBELF
@@ -647,281 +451,74 @@ BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
647LIB_OBJS += $(OUTPUT)util/symbol-minimal.o 451LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
648 452
649else # NO_LIBELF 453else # NO_LIBELF
650BASIC_CFLAGS += -DLIBELF_SUPPORT
651
652FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
653ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
654 BASIC_CFLAGS += -DLIBELF_MMAP
655endif
656
657ifndef NO_DWARF 454ifndef NO_DWARF
658ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) 455 LIB_OBJS += $(OUTPUT)util/probe-finder.o
659 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); 456 LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
660else
661 BASIC_CFLAGS := -DDWARF_SUPPORT $(LIBDW_CFLAGS) $(BASIC_CFLAGS)
662 BASIC_LDFLAGS := $(LIBDW_LDFLAGS) $(BASIC_LDFLAGS)
663 EXTLIBS += -lelf -ldw
664 LIB_OBJS += $(OUTPUT)util/probe-finder.o
665 LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
666endif # PERF_HAVE_DWARF_REGS
667endif # NO_DWARF 457endif # NO_DWARF
668endif # NO_LIBELF 458endif # NO_LIBELF
669 459
670ifndef NO_LIBUNWIND 460ifndef NO_LIBUNWIND
671 BASIC_CFLAGS += -DLIBUNWIND_SUPPORT 461 LIB_OBJS += $(OUTPUT)util/unwind.o
672 EXTLIBS += $(LIBUNWIND_LIBS)
673 BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS)
674 BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS)
675 LIB_OBJS += $(OUTPUT)util/unwind.o
676endif 462endif
677 463
678ifndef NO_LIBAUDIT 464ifndef NO_LIBAUDIT
679 FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit 465 BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
680 ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y)
681 msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
682 else
683 BASIC_CFLAGS += -DLIBAUDIT_SUPPORT
684 BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
685 EXTLIBS += -laudit
686 endif
687endif 466endif
688 467
689ifndef NO_SLANG 468ifndef NO_SLANG
690 FLAGS_SLANG=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang 469 LIB_OBJS += $(OUTPUT)ui/browser.o
691 ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y) 470 LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
692 msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev); 471 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
693 else 472 LIB_OBJS += $(OUTPUT)ui/browsers/map.o
694 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 473 LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
695 BASIC_CFLAGS += -I/usr/include/slang 474 LIB_OBJS += $(OUTPUT)ui/tui/setup.o
696 BASIC_CFLAGS += -DSLANG_SUPPORT 475 LIB_OBJS += $(OUTPUT)ui/tui/util.o
697 EXTLIBS += -lslang 476 LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
698 LIB_OBJS += $(OUTPUT)ui/browser.o 477 LIB_OBJS += $(OUTPUT)ui/tui/progress.o
699 LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o 478 LIB_H += ui/browser.h
700 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o 479 LIB_H += ui/browsers/map.h
701 LIB_OBJS += $(OUTPUT)ui/browsers/map.o 480 LIB_H += ui/keysyms.h
702 LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o 481 LIB_H += ui/libslang.h
703 LIB_OBJS += $(OUTPUT)ui/tui/setup.o
704 LIB_OBJS += $(OUTPUT)ui/tui/util.o
705 LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
706 LIB_OBJS += $(OUTPUT)ui/tui/progress.o
707 LIB_H += ui/browser.h
708 LIB_H += ui/browsers/map.h
709 LIB_H += ui/keysyms.h
710 LIB_H += ui/libslang.h
711 endif
712endif 482endif
713 483
714ifndef NO_GTK2 484ifndef NO_GTK2
715 FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) 485 LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
716 ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y) 486 LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
717 msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); 487 LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
718 else 488 LIB_OBJS += $(OUTPUT)ui/gtk/util.o
719 ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y) 489 LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
720 BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR 490 LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
721 endif 491 LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
722 BASIC_CFLAGS += -DGTK2_SUPPORT
723 BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
724 EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
725 LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
726 LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
727 LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
728 LIB_OBJS += $(OUTPUT)ui/gtk/util.o
729 LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
730 LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
731 LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
732 endif
733endif 492endif
734 493
735ifdef NO_LIBPERL 494ifndef NO_LIBPERL
736 BASIC_CFLAGS += -DNO_LIBPERL 495 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
737else 496 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
738 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
739 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
740 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
741 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
742 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
743
744 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y)
745 BASIC_CFLAGS += -DNO_LIBPERL
746 else
747 ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
748 EXTLIBS += $(PERL_EMBED_LIBADD)
749 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
750 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
751 endif
752endif 497endif
753 498
754disable-python = $(eval $(disable-python_code)) 499ifndef NO_LIBPYTHON
755define disable-python_code 500 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
756 BASIC_CFLAGS += -DNO_LIBPYTHON 501 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
757 $(if $(1),$(warning No $(1) was found))
758 $(warning Python support will not be built)
759endef
760
761override PYTHON := \
762 $(call get-executable-or-default,PYTHON,python)
763
764ifndef PYTHON
765 $(call disable-python,python interpreter)
766else
767
768 PYTHON_WORD := $(call shell-wordify,$(PYTHON))
769
770 ifdef NO_LIBPYTHON
771 $(call disable-python)
772 else
773
774 override PYTHON_CONFIG := \
775 $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON)-config)
776
777 ifndef PYTHON_CONFIG
778 $(call disable-python,python-config tool)
779 else
780
781 PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
782
783 PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
784 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
785 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
786 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
787 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
788
789 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y)
790 $(call disable-python,Python.h (for Python 2.x))
791 else
792
793 ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y)
794 $(warning Python 3 is not yet supported; please set)
795 $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
796 $(warning If you also have Python 2 installed, then)
797 $(warning try something like:)
798 $(warning $(and ,))
799 $(warning $(and ,) make PYTHON=python2)
800 $(warning $(and ,))
801 $(warning Otherwise, disable Python support entirely:)
802 $(warning $(and ,))
803 $(warning $(and ,) make NO_LIBPYTHON=1)
804 $(warning $(and ,))
805 $(error $(and ,))
806 else
807 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
808 EXTLIBS += $(PYTHON_EMBED_LIBADD)
809 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
810 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
811 LANG_BINDINGS += $(OUTPUT)python/perf.so
812 endif
813
814 endif
815 endif
816 endif
817endif
818
819ifdef NO_DEMANGLE
820 BASIC_CFLAGS += -DNO_DEMANGLE
821else
822 ifdef HAVE_CPLUS_DEMANGLE
823 EXTLIBS += -liberty
824 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
825 else
826 FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
827 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
828 ifeq ($(has_bfd),y)
829 EXTLIBS += -lbfd
830 else
831 FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
832 has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY),liberty)
833 ifeq ($(has_bfd_iberty),y)
834 EXTLIBS += -lbfd -liberty
835 else
836 FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
837 has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z),libz)
838 ifeq ($(has_bfd_iberty_z),y)
839 EXTLIBS += -lbfd -liberty -lz
840 else
841 FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
842 has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
843 ifeq ($(has_cplus_demangle),y)
844 EXTLIBS += -liberty
845 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
846 else
847 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
848 BASIC_CFLAGS += -DNO_DEMANGLE
849 endif
850 endif
851 endif
852 endif
853 endif
854endif 502endif
855 503
856ifeq ($(NO_PERF_REGS),0) 504ifeq ($(NO_PERF_REGS),0)
857 ifeq ($(ARCH),x86) 505 ifeq ($(ARCH),x86)
858 LIB_H += arch/x86/include/perf_regs.h 506 LIB_H += arch/x86/include/perf_regs.h
859 endif 507 endif
860 BASIC_CFLAGS += -DHAVE_PERF_REGS
861endif
862
863ifndef NO_STRLCPY
864 ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y)
865 BASIC_CFLAGS += -DHAVE_STRLCPY
866 endif
867endif
868
869ifndef NO_ON_EXIT
870 ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y)
871 BASIC_CFLAGS += -DHAVE_ON_EXIT
872 endif
873endif
874
875ifndef NO_BACKTRACE
876 ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y)
877 BASIC_CFLAGS += -DBACKTRACE_SUPPORT
878 endif
879endif 508endif
880 509
881ifndef NO_LIBNUMA 510ifndef NO_LIBNUMA
882 FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma 511 BUILTIN_OBJS += $(OUTPUT)bench/numa.o
883 ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
884 msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
885 else
886 BASIC_CFLAGS += -DLIBNUMA_SUPPORT
887 BUILTIN_OBJS += $(OUTPUT)bench/numa.o
888 EXTLIBS += -lnuma
889 endif
890endif 512endif
891 513
892ifdef ASCIIDOC8 514ifdef ASCIIDOC8
893 export ASCIIDOC8 515 export ASCIIDOC8
894endif 516endif
895 517
896endif # MAKECMDGOALS != tags
897endif # MAKECMDGOALS != clean
898
899# Shell quote (do not use $(call) to accommodate ancient setups);
900
901ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
902
903DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
904bindir_SQ = $(subst ','\'',$(bindir))
905bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
906mandir_SQ = $(subst ','\'',$(mandir))
907infodir_SQ = $(subst ','\'',$(infodir))
908perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
909template_dir_SQ = $(subst ','\'',$(template_dir))
910htmldir_SQ = $(subst ','\'',$(htmldir))
911prefix_SQ = $(subst ','\'',$(prefix))
912sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
913
914SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
915
916LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group 518LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
917 519
918ALL_CFLAGS += $(BASIC_CFLAGS)
919ALL_CFLAGS += $(ARCH_CFLAGS)
920ALL_LDFLAGS += $(BASIC_LDFLAGS)
921
922export INSTALL SHELL_PATH 520export INSTALL SHELL_PATH
923 521
924
925### Build rules 522### Build rules
926 523
927SHELL = $(SHELL_PATH) 524SHELL = $(SHELL_PATH)
@@ -939,20 +536,20 @@ strip: $(PROGRAMS) $(OUTPUT)perf
939$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 536$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
940 $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ 537 $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
941 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 538 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
942 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 539 $(CFLAGS) -c $(filter %.c,$^) -o $@
943 540
944$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 541$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
945 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ 542 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
946 $(BUILTIN_OBJS) $(LIBS) -o $@ 543 $(BUILTIN_OBJS) $(LIBS) -o $@
947 544
948$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 545$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
949 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 546 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
950 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 547 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
951 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 548 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
952 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 549 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
953 550
954$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 551$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
955 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 552 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
956 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 553 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
957 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 554 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
958 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 555 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
@@ -977,77 +574,77 @@ $(OUTPUT)perf.o perf.spec \
977# over the general rule for .o 574# over the general rule for .o
978 575
979$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS 576$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
980 $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -w $< 577 $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
981 578
982$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS 579$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
983 $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< 580 $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
984 581
985$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS 582$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
986 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< 583 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
987$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS 584$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
988 $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $< 585 $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
989$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS 586$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
990 $(QUIET_CC)$(CC) -o $@ -S $(ALL_CFLAGS) $< 587 $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
991$(OUTPUT)%.o: %.S 588$(OUTPUT)%.o: %.S
992 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< 589 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
993$(OUTPUT)%.s: %.S 590$(OUTPUT)%.s: %.S
994 $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $< 591 $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
995 592
996$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS 593$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
997 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 594 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
998 '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ 595 '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
999 '-DPREFIX="$(prefix_SQ)"' \ 596 '-DPREFIX="$(prefix_SQ)"' \
1000 $< 597 $<
1001 598
1002$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS 599$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
1003 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 600 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
1004 '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ 601 '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
1005 $< 602 $<
1006 603
1007$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS 604$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
1008 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 605 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
1009 -DPYTHONPATH='"$(OUTPUT)python"' \ 606 -DPYTHONPATH='"$(OUTPUT)python"' \
1010 -DPYTHON='"$(PYTHON_WORD)"' \ 607 -DPYTHON='"$(PYTHON_WORD)"' \
1011 $< 608 $<
1012 609
1013$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS 610$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
1014 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 611 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
1015 612
1016$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS 613$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
1017 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 614 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
1018 615
1019$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS 616$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
1020 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 617 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
1021 618
1022$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS 619$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
1023 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 620 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
1024 621
1025$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS 622$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
1026 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 623 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
1027 624
1028$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS 625$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
1029 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 626 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
1030 627
1031$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS 628$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
1032 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 629 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
1033 630
1034$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS 631$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
1035 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $< 632 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
1036 633
1037$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS 634$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
1038 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< 635 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
1039 636
1040$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS 637$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
1041 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 638 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
1042 639
1043$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS 640$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
1044 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< 641 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
1045 642
1046$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS 643$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
1047 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 644 $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
1048 645
1049$(OUTPUT)perf-%: %.o $(PERFLIBS) 646$(OUTPUT)perf-%: %.o $(PERFLIBS)
1050 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 647 $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
1051 648
1052$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) 649$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
1053$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) 650$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
@@ -1134,7 +731,7 @@ cscope:
1134 $(FIND) . -name '*.[hcS]' -print | xargs cscope -b 731 $(FIND) . -name '*.[hcS]' -print | xargs cscope -b
1135 732
1136### Detect prefix changes 733### Detect prefix changes
1137TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ 734TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
1138 $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) 735 $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
1139 736
1140$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS 737$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
@@ -1155,7 +752,7 @@ check: $(OUTPUT)common-cmds.h
1155 then \ 752 then \
1156 for i in *.c */*.c; \ 753 for i in *.c */*.c; \
1157 do \ 754 do \
1158 sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ 755 sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
1159 done; \ 756 done; \
1160 else \ 757 else \
1161 exit 1; \ 758 exit 1; \
@@ -1163,13 +760,6 @@ check: $(OUTPUT)common-cmds.h
1163 760
1164### Installation rules 761### Installation rules
1165 762
1166ifneq ($(filter /%,$(firstword $(perfexecdir))),)
1167perfexec_instdir = $(perfexecdir)
1168else
1169perfexec_instdir = $(prefix)/$(perfexecdir)
1170endif
1171perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
1172
1173install-bin: all 763install-bin: all
1174 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 764 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
1175 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' 765 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 2d0462d89a97..da8f8eb383a0 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -323,13 +323,20 @@ static void hists__baseline_only(struct hists *hists)
323 323
324static void hists__precompute(struct hists *hists) 324static void hists__precompute(struct hists *hists)
325{ 325{
326 struct rb_node *next = rb_first(&hists->entries); 326 struct rb_root *root;
327 struct rb_node *next;
328
329 if (sort__need_collapse)
330 root = &hists->entries_collapsed;
331 else
332 root = hists->entries_in;
327 333
334 next = rb_first(root);
328 while (next != NULL) { 335 while (next != NULL) {
329 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); 336 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
330 struct hist_entry *pair = hist_entry__next_pair(he); 337 struct hist_entry *pair = hist_entry__next_pair(he);
331 338
332 next = rb_next(&he->rb_node); 339 next = rb_next(&he->rb_node_in);
333 if (!pair) 340 if (!pair)
334 continue; 341 continue;
335 342
@@ -457,7 +464,7 @@ static void hists__process(struct hists *old, struct hists *new)
457 hists__output_resort(new); 464 hists__output_resort(new);
458 } 465 }
459 466
460 hists__fprintf(new, true, 0, 0, stdout); 467 hists__fprintf(new, true, 0, 0, 0, stdout);
461} 468}
462 469
463static int __cmd_diff(void) 470static int __cmd_diff(void)
@@ -611,9 +618,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
611 618
612 setup_pager(); 619 setup_pager();
613 620
614 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); 621 sort__setup_elide(NULL);
615 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL);
616 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL);
617 622
618 return __cmd_diff(); 623 return __cmd_diff();
619} 624}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 533501e2b07c..24b78aecc928 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -328,6 +328,7 @@ static int kvm_events_hash_fn(u64 key)
328static bool kvm_event_expand(struct kvm_event *event, int vcpu_id) 328static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
329{ 329{
330 int old_max_vcpu = event->max_vcpu; 330 int old_max_vcpu = event->max_vcpu;
331 void *prev;
331 332
332 if (vcpu_id < event->max_vcpu) 333 if (vcpu_id < event->max_vcpu)
333 return true; 334 return true;
@@ -335,9 +336,11 @@ static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
335 while (event->max_vcpu <= vcpu_id) 336 while (event->max_vcpu <= vcpu_id)
336 event->max_vcpu += DEFAULT_VCPU_NUM; 337 event->max_vcpu += DEFAULT_VCPU_NUM;
337 338
339 prev = event->vcpu;
338 event->vcpu = realloc(event->vcpu, 340 event->vcpu = realloc(event->vcpu,
339 event->max_vcpu * sizeof(*event->vcpu)); 341 event->max_vcpu * sizeof(*event->vcpu));
340 if (!event->vcpu) { 342 if (!event->vcpu) {
343 free(prev);
341 pr_err("Not enough memory\n"); 344 pr_err("Not enough memory\n");
342 return false; 345 return false;
343 } 346 }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index cdf58ecc04b1..fff985cf3852 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -198,7 +198,6 @@ static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
198 return; 198 return;
199 199
200 signal(signr, SIG_DFL); 200 signal(signr, SIG_DFL);
201 kill(getpid(), signr);
202} 201}
203 202
204static bool perf_evlist__equal(struct perf_evlist *evlist, 203static bool perf_evlist__equal(struct perf_evlist *evlist,
@@ -404,6 +403,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
404 signal(SIGCHLD, sig_handler); 403 signal(SIGCHLD, sig_handler);
405 signal(SIGINT, sig_handler); 404 signal(SIGINT, sig_handler);
406 signal(SIGUSR1, sig_handler); 405 signal(SIGUSR1, sig_handler);
406 signal(SIGTERM, sig_handler);
407 407
408 if (!output_name) { 408 if (!output_name) {
409 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 409 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bd0ca81eeaca..ca98d34cd58b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -52,6 +52,7 @@ struct perf_report {
52 symbol_filter_t annotate_init; 52 symbol_filter_t annotate_init;
53 const char *cpu_list; 53 const char *cpu_list;
54 const char *symbol_filter_str; 54 const char *symbol_filter_str;
55 float min_percent;
55 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 56 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
56}; 57};
57 58
@@ -61,6 +62,11 @@ static int perf_report_config(const char *var, const char *value, void *cb)
61 symbol_conf.event_group = perf_config_bool(var, value); 62 symbol_conf.event_group = perf_config_bool(var, value);
62 return 0; 63 return 0;
63 } 64 }
65 if (!strcmp(var, "report.percent-limit")) {
66 struct perf_report *rep = cb;
67 rep->min_percent = strtof(value, NULL);
68 return 0;
69 }
64 70
65 return perf_default_config(var, value, cb); 71 return perf_default_config(var, value, cb);
66} 72}
@@ -187,6 +193,9 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
187 for (i = 0; i < sample->branch_stack->nr; i++) { 193 for (i = 0; i < sample->branch_stack->nr; i++) {
188 if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) 194 if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
189 continue; 195 continue;
196
197 err = -ENOMEM;
198
190 /* 199 /*
191 * The report shows the percentage of total branches captured 200 * The report shows the percentage of total branches captured
192 * and not events sampled. Thus we use a pseudo period of 1. 201 * and not events sampled. Thus we use a pseudo period of 1.
@@ -195,7 +204,6 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
195 &bi[i], 1, 1); 204 &bi[i], 1, 1);
196 if (he) { 205 if (he) {
197 struct annotation *notes; 206 struct annotation *notes;
198 err = -ENOMEM;
199 bx = he->branch_info; 207 bx = he->branch_info;
200 if (bx->from.sym && use_browser == 1 && sort__has_sym) { 208 if (bx->from.sym && use_browser == 1 && sort__has_sym) {
201 notes = symbol__annotation(bx->from.sym); 209 notes = symbol__annotation(bx->from.sym);
@@ -226,11 +234,12 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
226 } 234 }
227 evsel->hists.stats.total_period += 1; 235 evsel->hists.stats.total_period += 1;
228 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 236 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
229 err = 0;
230 } else 237 } else
231 return -ENOMEM; 238 goto out;
232 } 239 }
240 err = 0;
233out: 241out:
242 free(bi);
234 return err; 243 return err;
235} 244}
236 245
@@ -294,6 +303,7 @@ static int process_sample_event(struct perf_tool *tool,
294{ 303{
295 struct perf_report *rep = container_of(tool, struct perf_report, tool); 304 struct perf_report *rep = container_of(tool, struct perf_report, tool);
296 struct addr_location al; 305 struct addr_location al;
306 int ret;
297 307
298 if (perf_event__preprocess_sample(event, machine, &al, sample, 308 if (perf_event__preprocess_sample(event, machine, &al, sample,
299 rep->annotate_init) < 0) { 309 rep->annotate_init) < 0) {
@@ -308,28 +318,25 @@ static int process_sample_event(struct perf_tool *tool,
308 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 318 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
309 return 0; 319 return 0;
310 320
311 if (sort__branch_mode == 1) { 321 if (sort__mode == SORT_MODE__BRANCH) {
312 if (perf_report__add_branch_hist_entry(tool, &al, sample, 322 ret = perf_report__add_branch_hist_entry(tool, &al, sample,
313 evsel, machine)) { 323 evsel, machine);
324 if (ret < 0)
314 pr_debug("problem adding lbr entry, skipping event\n"); 325 pr_debug("problem adding lbr entry, skipping event\n");
315 return -1;
316 }
317 } else if (rep->mem_mode == 1) { 326 } else if (rep->mem_mode == 1) {
318 if (perf_report__add_mem_hist_entry(tool, &al, sample, 327 ret = perf_report__add_mem_hist_entry(tool, &al, sample,
319 evsel, machine, event)) { 328 evsel, machine, event);
329 if (ret < 0)
320 pr_debug("problem adding mem entry, skipping event\n"); 330 pr_debug("problem adding mem entry, skipping event\n");
321 return -1;
322 }
323 } else { 331 } else {
324 if (al.map != NULL) 332 if (al.map != NULL)
325 al.map->dso->hit = 1; 333 al.map->dso->hit = 1;
326 334
327 if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { 335 ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine);
336 if (ret < 0)
328 pr_debug("problem incrementing symbol period, skipping event\n"); 337 pr_debug("problem incrementing symbol period, skipping event\n");
329 return -1;
330 }
331 } 338 }
332 return 0; 339 return ret;
333} 340}
334 341
335static int process_read_event(struct perf_tool *tool, 342static int process_read_event(struct perf_tool *tool,
@@ -384,7 +391,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
384 } 391 }
385 } 392 }
386 393
387 if (sort__branch_mode == 1) { 394 if (sort__mode == SORT_MODE__BRANCH) {
388 if (!self->fd_pipe && 395 if (!self->fd_pipe &&
389 !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { 396 !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
390 ui__error("Selected -b but no branch data. " 397 ui__error("Selected -b but no branch data. "
@@ -455,7 +462,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
455 continue; 462 continue;
456 463
457 hists__fprintf_nr_sample_events(rep, hists, evname, stdout); 464 hists__fprintf_nr_sample_events(rep, hists, evname, stdout);
458 hists__fprintf(hists, true, 0, 0, stdout); 465 hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout);
459 fprintf(stdout, "\n\n"); 466 fprintf(stdout, "\n\n");
460 } 467 }
461 468
@@ -574,8 +581,8 @@ static int __cmd_report(struct perf_report *rep)
574 if (use_browser > 0) { 581 if (use_browser > 0) {
575 if (use_browser == 1) { 582 if (use_browser == 1) {
576 ret = perf_evlist__tui_browse_hists(session->evlist, 583 ret = perf_evlist__tui_browse_hists(session->evlist,
577 help, 584 help, NULL,
578 NULL, 585 rep->min_percent,
579 &session->header.env); 586 &session->header.env);
580 /* 587 /*
581 * Usually "ret" is the last pressed key, and we only 588 * Usually "ret" is the last pressed key, and we only
@@ -586,7 +593,7 @@ static int __cmd_report(struct perf_report *rep)
586 593
587 } else if (use_browser == 2) { 594 } else if (use_browser == 2) {
588 perf_evlist__gtk_browse_hists(session->evlist, help, 595 perf_evlist__gtk_browse_hists(session->evlist, help,
589 NULL); 596 NULL, rep->min_percent);
590 } 597 }
591 } else 598 } else
592 perf_evlist__tty_browse_hists(session->evlist, rep, help); 599 perf_evlist__tty_browse_hists(session->evlist, rep, help);
@@ -691,7 +698,19 @@ static int
691parse_branch_mode(const struct option *opt __maybe_unused, 698parse_branch_mode(const struct option *opt __maybe_unused,
692 const char *str __maybe_unused, int unset) 699 const char *str __maybe_unused, int unset)
693{ 700{
694 sort__branch_mode = !unset; 701 int *branch_mode = opt->value;
702
703 *branch_mode = !unset;
704 return 0;
705}
706
707static int
708parse_percent_limit(const struct option *opt, const char *str,
709 int unset __maybe_unused)
710{
711 struct perf_report *rep = opt->value;
712
713 rep->min_percent = strtof(str, NULL);
695 return 0; 714 return 0;
696} 715}
697 716
@@ -700,6 +719,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
700 struct perf_session *session; 719 struct perf_session *session;
701 struct stat st; 720 struct stat st;
702 bool has_br_stack = false; 721 bool has_br_stack = false;
722 int branch_mode = -1;
703 int ret = -1; 723 int ret = -1;
704 char callchain_default_opt[] = "fractal,0.5,callee"; 724 char callchain_default_opt[] = "fractal,0.5,callee";
705 const char * const report_usage[] = { 725 const char * const report_usage[] = {
@@ -796,17 +816,19 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
796 "Show a column with the sum of periods"), 816 "Show a column with the sum of periods"),
797 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 817 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
798 "Show event group information together"), 818 "Show event group information together"),
799 OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", 819 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
800 "use branch records for histogram filling", parse_branch_mode), 820 "use branch records for histogram filling", parse_branch_mode),
801 OPT_STRING(0, "objdump", &objdump_path, "path", 821 OPT_STRING(0, "objdump", &objdump_path, "path",
802 "objdump binary to use for disassembly and annotations"), 822 "objdump binary to use for disassembly and annotations"),
803 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 823 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
804 "Disable symbol demangling"), 824 "Disable symbol demangling"),
805 OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), 825 OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
826 OPT_CALLBACK(0, "percent-limit", &report, "percent",
827 "Don't show entries under that percent", parse_percent_limit),
806 OPT_END() 828 OPT_END()
807 }; 829 };
808 830
809 perf_config(perf_report_config, NULL); 831 perf_config(perf_report_config, &report);
810 832
811 argc = parse_options(argc, argv, options, report_usage, 0); 833 argc = parse_options(argc, argv, options, report_usage, 0);
812 834
@@ -846,11 +868,11 @@ repeat:
846 has_br_stack = perf_header__has_feat(&session->header, 868 has_br_stack = perf_header__has_feat(&session->header,
847 HEADER_BRANCH_STACK); 869 HEADER_BRANCH_STACK);
848 870
849 if (sort__branch_mode == -1 && has_br_stack) 871 if (branch_mode == -1 && has_br_stack)
850 sort__branch_mode = 1; 872 sort__mode = SORT_MODE__BRANCH;
851 873
852 /* sort__branch_mode could be 0 if --no-branch-stack */ 874 /* sort__mode could be NORMAL if --no-branch-stack */
853 if (sort__branch_mode == 1) { 875 if (sort__mode == SORT_MODE__BRANCH) {
854 /* 876 /*
855 * if no sort_order is provided, then specify 877 * if no sort_order is provided, then specify
856 * branch-mode specific order 878 * branch-mode specific order
@@ -861,10 +883,12 @@ repeat:
861 883
862 } 884 }
863 if (report.mem_mode) { 885 if (report.mem_mode) {
864 if (sort__branch_mode == 1) { 886 if (sort__mode == SORT_MODE__BRANCH) {
865 fprintf(stderr, "branch and mem mode incompatible\n"); 887 fprintf(stderr, "branch and mem mode incompatible\n");
866 goto error; 888 goto error;
867 } 889 }
890 sort__mode = SORT_MODE__MEMORY;
891
868 /* 892 /*
869 * if no sort_order is provided, then specify 893 * if no sort_order is provided, then specify
870 * branch-mode specific order 894 * branch-mode specific order
@@ -929,25 +953,7 @@ repeat:
929 report.symbol_filter_str = argv[0]; 953 report.symbol_filter_str = argv[0];
930 } 954 }
931 955
932 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); 956 sort__setup_elide(stdout);
933
934 if (sort__branch_mode == 1) {
935 sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
936 sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
937 sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
938 sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
939 } else {
940 if (report.mem_mode) {
941 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout);
942 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout);
943 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout);
944 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout);
945 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout);
946 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout);
947 }
948 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
949 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
950 }
951 957
952 ret = __cmd_report(&report); 958 ret = __cmd_report(&report);
953 if (ret == K_SWITCH_INPUT_DATA) { 959 if (ret == K_SWITCH_INPUT_DATA) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 67bdb9f14ad6..f036af9b6f09 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -70,10 +70,11 @@
70 70
71static volatile int done; 71static volatile int done;
72 72
73#define HEADER_LINE_NR 5
74
73static void perf_top__update_print_entries(struct perf_top *top) 75static void perf_top__update_print_entries(struct perf_top *top)
74{ 76{
75 if (top->print_entries > 9) 77 top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
76 top->print_entries -= 9;
77} 78}
78 79
79static void perf_top__sig_winch(int sig __maybe_unused, 80static void perf_top__sig_winch(int sig __maybe_unused,
@@ -82,13 +83,6 @@ static void perf_top__sig_winch(int sig __maybe_unused,
82 struct perf_top *top = arg; 83 struct perf_top *top = arg;
83 84
84 get_term_dimensions(&top->winsize); 85 get_term_dimensions(&top->winsize);
85 if (!top->print_entries
86 || (top->print_entries+4) > top->winsize.ws_row) {
87 top->print_entries = top->winsize.ws_row;
88 } else {
89 top->print_entries += 4;
90 top->winsize.ws_row = top->print_entries;
91 }
92 perf_top__update_print_entries(top); 86 perf_top__update_print_entries(top);
93} 87}
94 88
@@ -251,8 +245,11 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
251{ 245{
252 struct hist_entry *he; 246 struct hist_entry *he;
253 247
248 pthread_mutex_lock(&evsel->hists.lock);
254 he = __hists__add_entry(&evsel->hists, al, NULL, sample->period, 249 he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
255 sample->weight); 250 sample->weight);
251 pthread_mutex_unlock(&evsel->hists.lock);
252
256 if (he == NULL) 253 if (he == NULL)
257 return NULL; 254 return NULL;
258 255
@@ -290,16 +287,17 @@ static void perf_top__print_sym_table(struct perf_top *top)
290 return; 287 return;
291 } 288 }
292 289
293 hists__collapse_resort_threaded(&top->sym_evsel->hists); 290 hists__collapse_resort(&top->sym_evsel->hists);
294 hists__output_resort_threaded(&top->sym_evsel->hists); 291 hists__output_resort(&top->sym_evsel->hists);
295 hists__decay_entries_threaded(&top->sym_evsel->hists, 292 hists__decay_entries(&top->sym_evsel->hists,
296 top->hide_user_symbols, 293 top->hide_user_symbols,
297 top->hide_kernel_symbols); 294 top->hide_kernel_symbols);
298 hists__output_recalc_col_len(&top->sym_evsel->hists, 295 hists__output_recalc_col_len(&top->sym_evsel->hists,
299 top->winsize.ws_row - 3); 296 top->print_entries - printed);
300 putchar('\n'); 297 putchar('\n');
301 hists__fprintf(&top->sym_evsel->hists, false, 298 hists__fprintf(&top->sym_evsel->hists, false,
302 top->winsize.ws_row - 4 - printed, win_width, stdout); 299 top->print_entries - printed, win_width,
300 top->min_percent, stdout);
303} 301}
304 302
305static void prompt_integer(int *target, const char *msg) 303static void prompt_integer(int *target, const char *msg)
@@ -477,7 +475,6 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
477 perf_top__sig_winch(SIGWINCH, NULL, top); 475 perf_top__sig_winch(SIGWINCH, NULL, top);
478 sigaction(SIGWINCH, &act, NULL); 476 sigaction(SIGWINCH, &act, NULL);
479 } else { 477 } else {
480 perf_top__sig_winch(SIGWINCH, NULL, top);
481 signal(SIGWINCH, SIG_DFL); 478 signal(SIGWINCH, SIG_DFL);
482 } 479 }
483 break; 480 break;
@@ -556,11 +553,11 @@ static void perf_top__sort_new_samples(void *arg)
556 if (t->evlist->selected != NULL) 553 if (t->evlist->selected != NULL)
557 t->sym_evsel = t->evlist->selected; 554 t->sym_evsel = t->evlist->selected;
558 555
559 hists__collapse_resort_threaded(&t->sym_evsel->hists); 556 hists__collapse_resort(&t->sym_evsel->hists);
560 hists__output_resort_threaded(&t->sym_evsel->hists); 557 hists__output_resort(&t->sym_evsel->hists);
561 hists__decay_entries_threaded(&t->sym_evsel->hists, 558 hists__decay_entries(&t->sym_evsel->hists,
562 t->hide_user_symbols, 559 t->hide_user_symbols,
563 t->hide_kernel_symbols); 560 t->hide_kernel_symbols);
564} 561}
565 562
566static void *display_thread_tui(void *arg) 563static void *display_thread_tui(void *arg)
@@ -584,7 +581,7 @@ static void *display_thread_tui(void *arg)
584 list_for_each_entry(pos, &top->evlist->entries, node) 581 list_for_each_entry(pos, &top->evlist->entries, node)
585 pos->hists.uid_filter_str = top->record_opts.target.uid_str; 582 pos->hists.uid_filter_str = top->record_opts.target.uid_str;
586 583
587 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 584 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
588 &top->session->header.env); 585 &top->session->header.env);
589 586
590 done = 1; 587 done = 1;
@@ -794,7 +791,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
794 return; 791 return;
795 } 792 }
796 793
797 if (top->sort_has_symbols) 794 if (sort__has_sym)
798 perf_top__record_precise_ip(top, he, evsel->idx, ip); 795 perf_top__record_precise_ip(top, he, evsel->idx, ip);
799 } 796 }
800 797
@@ -912,9 +909,9 @@ out_err:
912 return -1; 909 return -1;
913} 910}
914 911
915static int perf_top__setup_sample_type(struct perf_top *top) 912static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
916{ 913{
917 if (!top->sort_has_symbols) { 914 if (!sort__has_sym) {
918 if (symbol_conf.use_callchain) { 915 if (symbol_conf.use_callchain) {
919 ui__error("Selected -g but \"sym\" not present in --sort/-s."); 916 ui__error("Selected -g but \"sym\" not present in --sort/-s.");
920 return -EINVAL; 917 return -EINVAL;
@@ -1025,6 +1022,16 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1025 return record_parse_callchain_opt(opt, arg, unset); 1022 return record_parse_callchain_opt(opt, arg, unset);
1026} 1023}
1027 1024
1025static int
1026parse_percent_limit(const struct option *opt, const char *arg,
1027 int unset __maybe_unused)
1028{
1029 struct perf_top *top = opt->value;
1030
1031 top->min_percent = strtof(arg, NULL);
1032 return 0;
1033}
1034
1028int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) 1035int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1029{ 1036{
1030 int status; 1037 int status;
@@ -1110,6 +1117,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1110 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1117 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1111 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1118 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1112 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), 1119 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1120 OPT_CALLBACK(0, "percent-limit", &top, "percent",
1121 "Don't show entries under that percent", parse_percent_limit),
1113 OPT_END() 1122 OPT_END()
1114 }; 1123 };
1115 const char * const top_usage[] = { 1124 const char * const top_usage[] = {
@@ -1133,6 +1142,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1133 if (setup_sorting() < 0) 1142 if (setup_sorting() < 0)
1134 usage_with_options(top_usage, options); 1143 usage_with_options(top_usage, options);
1135 1144
1145 /* display thread wants entries to be collapsed in a different tree */
1146 sort__need_collapse = 1;
1147
1136 if (top.use_stdio) 1148 if (top.use_stdio)
1137 use_browser = 0; 1149 use_browser = 0;
1138 else if (top.use_tui) 1150 else if (top.use_tui)
@@ -1200,15 +1212,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1200 if (symbol__init() < 0) 1212 if (symbol__init() < 0)
1201 return -1; 1213 return -1;
1202 1214
1203 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); 1215 sort__setup_elide(stdout);
1204 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
1205 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
1206
1207 /*
1208 * Avoid annotation data structures overhead when symbols aren't on the
1209 * sort list.
1210 */
1211 top.sort_has_symbols = sort_sym.list.next != NULL;
1212 1216
1213 get_term_dimensions(&top.winsize); 1217 get_term_dimensions(&top.winsize);
1214 if (top.print_entries == 0) { 1218 if (top.print_entries == 0) {
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
new file mode 100644
index 000000000000..f139dcd2796e
--- /dev/null
+++ b/tools/perf/config/Makefile
@@ -0,0 +1,477 @@
1uname_M := $(shell uname -m 2>/dev/null || echo not)
2
3ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
4 -e s/arm.*/arm/ -e s/sa110/arm/ \
5 -e s/s390x/s390/ -e s/parisc64/parisc/ \
6 -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
7 -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
8NO_PERF_REGS := 1
9CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
10
11# Additional ARCH settings for x86
12ifeq ($(ARCH),i386)
13 override ARCH := x86
14 NO_PERF_REGS := 0
15 LIBUNWIND_LIBS = -lunwind -lunwind-x86
16endif
17
18ifeq ($(ARCH),x86_64)
19 override ARCH := x86
20 IS_X86_64 := 0
21 ifeq (, $(findstring m32,$(CFLAGS)))
22 IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
23 endif
24 ifeq (${IS_X86_64}, 1)
25 RAW_ARCH := x86_64
26 CFLAGS += -DARCH_X86_64
27 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
28 endif
29 NO_PERF_REGS := 0
30 LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
31endif
32
33ifeq ($(NO_PERF_REGS),0)
34 CFLAGS += -DHAVE_PERF_REGS
35endif
36
37ifeq ($(src-perf),)
38src-perf := $(srctree)/tools/perf
39endif
40
41ifeq ($(obj-perf),)
42obj-perf := $(objtree)
43endif
44
45ifneq ($(obj-perf),)
46obj-perf := $(abspath $(obj-perf))/
47endif
48
49# include ARCH specific config
50-include $(src-perf)/arch/$(ARCH)/Makefile
51
52include $(src-perf)/config/feature-tests.mak
53include $(src-perf)/config/utilities.mak
54
55ifeq ($(call get-executable,$(FLEX)),)
56 dummy := $(error Error: $(FLEX) is missing on this system, please install it)
57endif
58
59ifeq ($(call get-executable,$(BISON)),)
60 dummy := $(error Error: $(BISON) is missing on this system, please install it)
61endif
62
63# Treat warnings as errors unless directed not to
64ifneq ($(WERROR),0)
65 CFLAGS += -Werror
66endif
67
68ifeq ("$(origin DEBUG)", "command line")
69 PERF_DEBUG = $(DEBUG)
70endif
71ifndef PERF_DEBUG
72 CFLAGS += -O6
73endif
74
75ifdef PARSER_DEBUG
76 PARSER_DEBUG_BISON := -t
77 PARSER_DEBUG_FLEX := -d
78 CFLAGS += -DPARSER_DEBUG
79endif
80
81CFLAGS += -fno-omit-frame-pointer
82CFLAGS += -ggdb3
83CFLAGS += -funwind-tables
84CFLAGS += -Wall
85CFLAGS += -Wextra
86CFLAGS += -std=gnu99
87
88EXTLIBS = -lpthread -lrt -lelf -lm
89
90ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
91 CFLAGS += -fstack-protector-all
92endif
93
94ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y)
95 CFLAGS += -Wstack-protector
96endif
97
98ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y)
99 CFLAGS += -Wvolatile-register-var
100endif
101
102ifndef PERF_DEBUG
103 ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y)
104 CFLAGS += -D_FORTIFY_SOURCE=2
105 endif
106endif
107
108CFLAGS += -I$(src-perf)/util/include
109CFLAGS += -I$(src-perf)/arch/$(ARCH)/include
110CFLAGS += -I$(srctree)/arch/$(ARCH)/include/uapi
111CFLAGS += -I$(srctree)/arch/$(ARCH)/include
112CFLAGS += -I$(srctree)/include/uapi
113CFLAGS += -I$(srctree)/include
114
115# $(obj-perf) for generated common-cmds.h
116# $(obj-perf)/util for generated bison/flex headers
117ifneq ($(OUTPUT),)
118CFLAGS += -I$(obj-perf)/util
119CFLAGS += -I$(obj-perf)
120endif
121
122CFLAGS += -I$(src-perf)/util
123CFLAGS += -I$(src-perf)
124CFLAGS += -I$(TRACE_EVENT_DIR)
125CFLAGS += -I$(srctree)/tools/lib/
126
127CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
128
129ifndef NO_BIONIC
130ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
131 BIONIC := 1
132 EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
133 EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
134endif
135endif # NO_BIONIC
136
137ifdef NO_LIBELF
138 NO_DWARF := 1
139 NO_DEMANGLE := 1
140 NO_LIBUNWIND := 1
141else
142FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
143ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y)
144 FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS)
145 ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y)
146 LIBC_SUPPORT := 1
147 endif
148 ifeq ($(BIONIC),1)
149 LIBC_SUPPORT := 1
150 endif
151 ifeq ($(LIBC_SUPPORT),1)
152 msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
153
154 NO_LIBELF := 1
155 NO_DWARF := 1
156 NO_DEMANGLE := 1
157 else
158 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
159 endif
160else
161 # for linking with debug library, run like:
162 # make DEBUG=1 LIBDW_DIR=/opt/libdw/
163 ifdef LIBDW_DIR
164 LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
165 LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
166 endif
167
168 FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS)
169 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y)
170 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
171 NO_DWARF := 1
172 endif # Dwarf support
173endif # SOURCE_LIBELF
174endif # NO_LIBELF
175
176ifndef NO_LIBELF
177CFLAGS += -DLIBELF_SUPPORT
178FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
179ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
180 CFLAGS += -DLIBELF_MMAP
181endif
182
183# include ARCH specific config
184-include $(src-perf)/arch/$(ARCH)/Makefile
185
186ifndef NO_DWARF
187ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
188 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
189 NO_DWARF := 1
190else
191 CFLAGS += -DDWARF_SUPPORT $(LIBDW_CFLAGS)
192 LDFLAGS += $(LIBDW_LDFLAGS)
193 EXTLIBS += -lelf -ldw
194endif # PERF_HAVE_DWARF_REGS
195endif # NO_DWARF
196
197endif # NO_LIBELF
198
199ifndef NO_LIBELF
200CFLAGS += -DLIBELF_SUPPORT
201FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
202ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
203 CFLAGS += -DLIBELF_MMAP
204endif # try-cc
205endif # NO_LIBELF
206
207# There's only x86 (both 32 and 64) support for CFI unwind so far
208ifneq ($(ARCH),x86)
209 NO_LIBUNWIND := 1
210endif
211
212ifndef NO_LIBUNWIND
213# for linking with debug library, run like:
214# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
215ifdef LIBUNWIND_DIR
216 LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include
217 LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
218endif
219
220FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
221ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y)
222 msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
223 NO_LIBUNWIND := 1
224endif # Libunwind support
225endif # NO_LIBUNWIND
226
227ifndef NO_LIBUNWIND
228 CFLAGS += -DLIBUNWIND_SUPPORT
229 EXTLIBS += $(LIBUNWIND_LIBS)
230 CFLAGS += $(LIBUNWIND_CFLAGS)
231 LDFLAGS += $(LIBUNWIND_LDFLAGS)
232endif # NO_LIBUNWIND
233
234ifndef NO_LIBAUDIT
235 FLAGS_LIBAUDIT = $(CFLAGS) $(LDFLAGS) -laudit
236 ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y)
237 msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
238 NO_LIBAUDIT := 1
239 else
240 CFLAGS += -DLIBAUDIT_SUPPORT
241 EXTLIBS += -laudit
242 endif
243endif
244
245ifdef NO_NEWT
246 NO_SLANG=1
247endif
248
249ifndef NO_SLANG
250 FLAGS_SLANG=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
251 ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
252 msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
253 NO_SLANG := 1
254 else
255 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
256 CFLAGS += -I/usr/include/slang
257 CFLAGS += -DSLANG_SUPPORT
258 EXTLIBS += -lslang
259 endif
260endif
261
262ifndef NO_GTK2
263 FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
264 ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y)
265 msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
266 NO_GTK2 := 1
267 else
268 ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y)
269 CFLAGS += -DHAVE_GTK_INFO_BAR
270 endif
271 CFLAGS += -DGTK2_SUPPORT
272 CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
273 EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
274 endif
275endif
276
277grep-libs = $(filter -l%,$(1))
278strip-libs = $(filter-out -l%,$(1))
279
280ifdef NO_LIBPERL
281 CFLAGS += -DNO_LIBPERL
282else
283 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
284 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
285 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
286 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
287 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
288
289 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y)
290 CFLAGS += -DNO_LIBPERL
291 NO_LIBPERL := 1
292 else
293 LDFLAGS += $(PERL_EMBED_LDFLAGS)
294 EXTLIBS += $(PERL_EMBED_LIBADD)
295 endif
296endif
297
298disable-python = $(eval $(disable-python_code))
299define disable-python_code
300 CFLAGS += -DNO_LIBPYTHON
301 $(if $(1),$(warning No $(1) was found))
302 $(warning Python support will not be built)
303 NO_LIBPYTHON := 1
304endef
305
306override PYTHON := \
307 $(call get-executable-or-default,PYTHON,python)
308
309ifndef PYTHON
310 $(call disable-python,python interpreter)
311else
312
313 PYTHON_WORD := $(call shell-wordify,$(PYTHON))
314
315 ifdef NO_LIBPYTHON
316 $(call disable-python)
317 else
318
319 override PYTHON_CONFIG := \
320 $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON)-config)
321
322 ifndef PYTHON_CONFIG
323 $(call disable-python,python-config tool)
324 else
325
326 PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
327
328 PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
329 PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
330 PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
331 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
332 FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
333
334 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y)
335 $(call disable-python,Python.h (for Python 2.x))
336 else
337
338 ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y)
339 $(warning Python 3 is not yet supported; please set)
340 $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
341 $(warning If you also have Python 2 installed, then)
342 $(warning try something like:)
343 $(warning $(and ,))
344 $(warning $(and ,) make PYTHON=python2)
345 $(warning $(and ,))
346 $(warning Otherwise, disable Python support entirely:)
347 $(warning $(and ,))
348 $(warning $(and ,) make NO_LIBPYTHON=1)
349 $(warning $(and ,))
350 $(error $(and ,))
351 else
352 LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
353 EXTLIBS += $(PYTHON_EMBED_LIBADD)
354 LANG_BINDINGS += $(obj-perf)python/perf.so
355 endif
356 endif
357 endif
358 endif
359endif
360
361ifdef NO_DEMANGLE
362 CFLAGS += -DNO_DEMANGLE
363else
364 ifdef HAVE_CPLUS_DEMANGLE
365 EXTLIBS += -liberty
366 CFLAGS += -DHAVE_CPLUS_DEMANGLE
367 else
368 FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
369 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
370 ifeq ($(has_bfd),y)
371 EXTLIBS += -lbfd
372 else
373 FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
374 has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY),liberty)
375 ifeq ($(has_bfd_iberty),y)
376 EXTLIBS += -lbfd -liberty
377 else
378 FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
379 has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z),libz)
380 ifeq ($(has_bfd_iberty_z),y)
381 EXTLIBS += -lbfd -liberty -lz
382 else
383 FLAGS_CPLUS_DEMANGLE=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -liberty
384 has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
385 ifeq ($(has_cplus_demangle),y)
386 EXTLIBS += -liberty
387 CFLAGS += -DHAVE_CPLUS_DEMANGLE
388 else
389 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
390 CFLAGS += -DNO_DEMANGLE
391 endif
392 endif
393 endif
394 endif
395 endif
396endif
397
398ifndef NO_STRLCPY
399 ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y)
400 CFLAGS += -DHAVE_STRLCPY
401 endif
402endif
403
404ifndef NO_ON_EXIT
405 ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y)
406 CFLAGS += -DHAVE_ON_EXIT
407 endif
408endif
409
410ifndef NO_BACKTRACE
411 ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y)
412 CFLAGS += -DBACKTRACE_SUPPORT
413 endif
414endif
415
416ifndef NO_LIBNUMA
417 FLAGS_LIBNUMA = $(CFLAGS) $(LDFLAGS) -lnuma
418 ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
419 msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
420 NO_LIBNUMA := 1
421 else
422 CFLAGS += -DLIBNUMA_SUPPORT
423 EXTLIBS += -lnuma
424 endif
425endif
426
427# Among the variables below, these:
428# perfexecdir
429# template_dir
430# mandir
431# infodir
432# htmldir
433# ETC_PERFCONFIG (but not sysconfdir)
434# can be specified as a relative path some/where/else;
435# this is interpreted as relative to $(prefix) and "perf" at
436# runtime figures out where they are based on the path to the executable.
437# This can help installing the suite in a relocatable way.
438
439# Make the path relative to DESTDIR, not to prefix
440ifndef DESTDIR
441prefix = $(HOME)
442endif
443bindir_relative = bin
444bindir = $(prefix)/$(bindir_relative)
445mandir = share/man
446infodir = share/info
447perfexecdir = libexec/perf-core
448sharedir = $(prefix)/share
449template_dir = share/perf-core/templates
450htmldir = share/doc/perf-doc
451ifeq ($(prefix),/usr)
452sysconfdir = /etc
453ETC_PERFCONFIG = $(sysconfdir)/perfconfig
454else
455sysconfdir = $(prefix)/etc
456ETC_PERFCONFIG = etc/perfconfig
457endif
458lib = lib
459
460# Shell quote (do not use $(call) to accommodate ancient setups);
461ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
462DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
463bindir_SQ = $(subst ','\'',$(bindir))
464mandir_SQ = $(subst ','\'',$(mandir))
465infodir_SQ = $(subst ','\'',$(infodir))
466perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
467template_dir_SQ = $(subst ','\'',$(template_dir))
468htmldir_SQ = $(subst ','\'',$(htmldir))
469prefix_SQ = $(subst ','\'',$(prefix))
470sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
471
472ifneq ($(filter /%,$(firstword $(perfexecdir))),)
473perfexec_instdir = $(perfexecdir)
474else
475perfexec_instdir = $(prefix)/$(perfexecdir)
476endif
477perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index b4fc835de607..e9bd6391f2ae 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -27,8 +27,8 @@ watermark=0
27precise_ip=0 27precise_ip=0
28mmap_data=0 28mmap_data=0
29sample_id_all=1 29sample_id_all=1
30exclude_host=0 30exclude_host=0|1
31exclude_guest=1 31exclude_guest=0|1
32exclude_callchain_kernel=0 32exclude_callchain_kernel=0
33exclude_callchain_user=0 33exclude_callchain_user=0
34wakeup_events=0 34wakeup_events=0
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index 748ee949a204..91cd48b399f3 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -27,8 +27,8 @@ watermark=0
27precise_ip=0 27precise_ip=0
28mmap_data=0 28mmap_data=0
29sample_id_all=0 29sample_id_all=0
30exclude_host=0 30exclude_host=0|1
31exclude_guest=1 31exclude_guest=0|1
32exclude_callchain_kernel=0 32exclude_callchain_kernel=0
33exclude_callchain_user=0 33exclude_callchain_user=0
34wakeup_events=0 34wakeup_events=0
diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data
index 6627c3e7534a..716e143b5291 100644
--- a/tools/perf/tests/attr/test-record-data
+++ b/tools/perf/tests/attr/test-record-data
@@ -4,5 +4,8 @@ args = -d kill >/dev/null 2>&1
4 4
5[event:base-record] 5[event:base-record]
6sample_period=4000 6sample_period=4000
7sample_type=271 7
8# sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
9# PERF_SAMPLE_ADDR | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC
10sample_type=33039
8mmap_data=1 11mmap_data=1
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index 68daa289e94c..aba095489193 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -4,6 +4,12 @@
4 * (git://github.com/deater/perf_event_tests) 4 * (git://github.com/deater/perf_event_tests)
5 */ 5 */
6 6
7/*
8 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
9 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
10 */
11#define __SANE_USERSPACE_TYPES__
12
7#include <stdlib.h> 13#include <stdlib.h>
8#include <stdio.h> 14#include <stdio.h>
9#include <unistd.h> 15#include <unistd.h>
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index fe7ed28815f8..44ac82179708 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -3,6 +3,12 @@
3 * perf_event_tests (git://github.com/deater/perf_event_tests) 3 * perf_event_tests (git://github.com/deater/perf_event_tests)
4 */ 4 */
5 5
6/*
7 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
8 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
9 */
10#define __SANE_USERSPACE_TYPES__
11
6#include <stdlib.h> 12#include <stdlib.h>
7#include <stdio.h> 13#include <stdio.h>
8#include <unistd.h> 14#include <unistd.h>
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 0918ada4cc41..35b45f1466b5 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -70,7 +70,7 @@ static struct test {
70 .func = test__attr, 70 .func = test__attr,
71 }, 71 },
72 { 72 {
73 .desc = "Test matching and linking mutliple hists", 73 .desc = "Test matching and linking multiple hists",
74 .func = test__hists_link, 74 .func = test__hists_link,
75 }, 75 },
76 { 76 {
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
new file mode 100644
index 000000000000..c441a2875128
--- /dev/null
+++ b/tools/perf/tests/make
@@ -0,0 +1,138 @@
1PERF := .
2MK := Makefile
3
4# standard single make variable specified
5make_clean_all := clean all
6make_python_perf_so := python/perf.so
7make_debug := DEBUG=1
8make_no_libperl := NO_LIBPERL=1
9make_no_libpython := NO_LIBPYTHON=1
10make_no_scripts := NO_LIBPYTHON=1 NO_LIBPERL=1
11make_no_newt := NO_NEWT=1
12make_no_slang := NO_SLANG=1
13make_no_gtk2 := NO_GTK2=1
14make_no_ui := NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
15make_no_demangle := NO_DEMANGLE=1
16make_no_libelf := NO_LIBELF=1
17make_no_libunwind := NO_LIBUNWIND=1
18make_no_backtrace := NO_BACKTRACE=1
19make_no_libnuma := NO_LIBNUMA=1
20make_no_libaudit := NO_LIBAUDIT=1
21make_no_libbionic := NO_LIBBIONIC=1
22make_tags := tags
23make_cscope := cscope
24make_help := help
25make_doc := doc
26make_perf_o := perf.o
27make_util_map_o := util/map.o
28
29# all the NO_* variable combined
30make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
31make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
32make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
33
34# $(run) contains all available tests
35run := make_pure
36run += make_clean_all
37run += make_python_perf_so
38run += make_debug
39run += make_no_libperl
40run += make_no_libpython
41run += make_no_scripts
42run += make_no_newt
43run += make_no_slang
44run += make_no_gtk2
45run += make_no_ui
46run += make_no_demangle
47run += make_no_libelf
48run += make_no_libunwind
49run += make_no_backtrace
50run += make_no_libnuma
51run += make_no_libaudit
52run += make_no_libbionic
53run += make_tags
54run += make_cscope
55run += make_help
56run += make_doc
57run += make_perf_o
58run += make_util_map_o
59run += make_minimal
60
61# $(run_O) contains same portion of $(run) tests with '_O' attached
62# to distinguish O=... tests
63run_O := $(addsuffix _O,$(run))
64
65# disable some tests for O=...
66run_O := $(filter-out make_python_perf_so_O,$(run_O))
67
68# define test for each compile as 'test_NAME' variable
69# with the test itself as a value
70test_make_tags = test -f tags
71test_make_cscope = test -f cscope.out
72
73test_make_tags_O := $(test_make_tags)
74test_make_cscope_O := $(test_make_cscope)
75
76test_ok := true
77test_make_help := $(test_ok)
78test_make_doc := $(test_ok)
79test_make_help_O := $(test_ok)
80test_make_doc_O := $(test_ok)
81
82test_make_python_perf_so := test -f $(PERF)/python/perf.so
83
84test_make_perf_o := test -f $(PERF)/perf.o
85test_make_util_map_o := test -f $(PERF)/util/map.o
86
87# Kbuild tests only
88#test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so
89#test_make_perf_o_O := test -f $$TMP/tools/perf/perf.o
90#test_make_util_map_o_O := test -f $$TMP/tools/perf/util/map.o
91
92test_make_perf_o_O := true
93test_make_util_map_o_O := true
94
95test_default = test -x $(PERF)/perf
96test = $(if $(test_$1),$(test_$1),$(test_default))
97
98test_default_O = test -x $$TMP/perf
99test_O = $(if $(test_$1),$(test_$1),$(test_default_O))
100
101all:
102
103ifdef DEBUG
104d := $(info run $(run))
105d := $(info run_O $(run_O))
106endif
107
108MAKEFLAGS := --no-print-directory
109
110clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
111
112$(run):
113 $(call clean)
114 @cmd="cd $(PERF) && make -f $(MK) $($@)"; \
115 echo "- $@: $$cmd" && echo $$cmd > $@ && \
116 ( eval $$cmd ) >> $@ 2>&1; \
117 echo " test: $(call test,$@)"; \
118 $(call test,$@) && \
119 rm -f $@
120
121$(run_O):
122 $(call clean)
123 @TMP=$$(mktemp -d); \
124 cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP"; \
125 echo "- $@: $$cmd" && echo $$cmd > $@ && \
126 ( eval $$cmd ) >> $@ 2>&1 && \
127 echo " test: $(call test_O,$@)"; \
128 $(call test_O,$@) && \
129 rm -f $@ && \
130 rm -rf $$TMP
131
132all: $(run) $(run_O)
133 @echo OK
134
135out: $(run_O)
136 @echo OK
137
138.PHONY: all $(run) $(run_O) clean
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index d88a2d0acb6d..fc0bd3843d34 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -25,7 +25,8 @@ struct hist_browser {
25 struct map_symbol *selection; 25 struct map_symbol *selection;
26 int print_seq; 26 int print_seq;
27 bool show_dso; 27 bool show_dso;
28 bool has_symbols; 28 float min_pcnt;
29 u64 nr_pcnt_entries;
29}; 30};
30 31
31extern void hist_browser__init_hpp(void); 32extern void hist_browser__init_hpp(void);
@@ -309,6 +310,8 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
309 "Or reduce the sampling frequency."); 310 "Or reduce the sampling frequency.");
310} 311}
311 312
313static void hist_browser__update_pcnt_entries(struct hist_browser *hb);
314
312static int hist_browser__run(struct hist_browser *browser, const char *ev_name, 315static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
313 struct hist_browser_timer *hbt) 316 struct hist_browser_timer *hbt)
314{ 317{
@@ -318,6 +321,8 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
318 321
319 browser->b.entries = &browser->hists->entries; 322 browser->b.entries = &browser->hists->entries;
320 browser->b.nr_entries = browser->hists->nr_entries; 323 browser->b.nr_entries = browser->hists->nr_entries;
324 if (browser->min_pcnt)
325 browser->b.nr_entries = browser->nr_pcnt_entries;
321 326
322 hist_browser__refresh_dimensions(browser); 327 hist_browser__refresh_dimensions(browser);
323 hists__browser_title(browser->hists, title, sizeof(title), ev_name); 328 hists__browser_title(browser->hists, title, sizeof(title), ev_name);
@@ -330,9 +335,18 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
330 key = ui_browser__run(&browser->b, delay_secs); 335 key = ui_browser__run(&browser->b, delay_secs);
331 336
332 switch (key) { 337 switch (key) {
333 case K_TIMER: 338 case K_TIMER: {
339 u64 nr_entries;
334 hbt->timer(hbt->arg); 340 hbt->timer(hbt->arg);
335 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); 341
342 if (browser->min_pcnt) {
343 hist_browser__update_pcnt_entries(browser);
344 nr_entries = browser->nr_pcnt_entries;
345 } else {
346 nr_entries = browser->hists->nr_entries;
347 }
348
349 ui_browser__update_nr_entries(&browser->b, nr_entries);
336 350
337 if (browser->hists->stats.nr_lost_warned != 351 if (browser->hists->stats.nr_lost_warned !=
338 browser->hists->stats.nr_events[PERF_RECORD_LOST]) { 352 browser->hists->stats.nr_events[PERF_RECORD_LOST]) {
@@ -344,6 +358,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *ev_name,
344 hists__browser_title(browser->hists, title, sizeof(title), ev_name); 358 hists__browser_title(browser->hists, title, sizeof(title), ev_name);
345 ui_browser__show_title(&browser->b, title); 359 ui_browser__show_title(&browser->b, title);
346 continue; 360 continue;
361 }
347 case 'D': { /* Debug */ 362 case 'D': { /* Debug */
348 static int seq; 363 static int seq;
349 struct hist_entry *h = rb_entry(browser->b.top, 364 struct hist_entry *h = rb_entry(browser->b.top,
@@ -796,10 +811,15 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
796 811
797 for (nd = browser->top; nd; nd = rb_next(nd)) { 812 for (nd = browser->top; nd; nd = rb_next(nd)) {
798 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 813 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
814 float percent = h->stat.period * 100.0 /
815 hb->hists->stats.total_period;
799 816
800 if (h->filtered) 817 if (h->filtered)
801 continue; 818 continue;
802 819
820 if (percent < hb->min_pcnt)
821 continue;
822
803 row += hist_browser__show_entry(hb, h, row); 823 row += hist_browser__show_entry(hb, h, row);
804 if (row == browser->height) 824 if (row == browser->height)
805 break; 825 break;
@@ -808,10 +828,18 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
808 return row; 828 return row;
809} 829}
810 830
811static struct rb_node *hists__filter_entries(struct rb_node *nd) 831static struct rb_node *hists__filter_entries(struct rb_node *nd,
832 struct hists *hists,
833 float min_pcnt)
812{ 834{
813 while (nd != NULL) { 835 while (nd != NULL) {
814 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 836 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
837 float percent = h->stat.period * 100.0 /
838 hists->stats.total_period;
839
840 if (percent < min_pcnt)
841 return NULL;
842
815 if (!h->filtered) 843 if (!h->filtered)
816 return nd; 844 return nd;
817 845
@@ -821,11 +849,16 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd)
821 return NULL; 849 return NULL;
822} 850}
823 851
824static struct rb_node *hists__filter_prev_entries(struct rb_node *nd) 852static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
853 struct hists *hists,
854 float min_pcnt)
825{ 855{
826 while (nd != NULL) { 856 while (nd != NULL) {
827 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 857 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
828 if (!h->filtered) 858 float percent = h->stat.period * 100.0 /
859 hists->stats.total_period;
860
861 if (!h->filtered && percent >= min_pcnt)
829 return nd; 862 return nd;
830 863
831 nd = rb_prev(nd); 864 nd = rb_prev(nd);
@@ -840,6 +873,9 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
840 struct hist_entry *h; 873 struct hist_entry *h;
841 struct rb_node *nd; 874 struct rb_node *nd;
842 bool first = true; 875 bool first = true;
876 struct hist_browser *hb;
877
878 hb = container_of(browser, struct hist_browser, b);
843 879
844 if (browser->nr_entries == 0) 880 if (browser->nr_entries == 0)
845 return; 881 return;
@@ -848,13 +884,15 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
848 884
849 switch (whence) { 885 switch (whence) {
850 case SEEK_SET: 886 case SEEK_SET:
851 nd = hists__filter_entries(rb_first(browser->entries)); 887 nd = hists__filter_entries(rb_first(browser->entries),
888 hb->hists, hb->min_pcnt);
852 break; 889 break;
853 case SEEK_CUR: 890 case SEEK_CUR:
854 nd = browser->top; 891 nd = browser->top;
855 goto do_offset; 892 goto do_offset;
856 case SEEK_END: 893 case SEEK_END:
857 nd = hists__filter_prev_entries(rb_last(browser->entries)); 894 nd = hists__filter_prev_entries(rb_last(browser->entries),
895 hb->hists, hb->min_pcnt);
858 first = false; 896 first = false;
859 break; 897 break;
860 default: 898 default:
@@ -897,7 +935,8 @@ do_offset:
897 break; 935 break;
898 } 936 }
899 } 937 }
900 nd = hists__filter_entries(rb_next(nd)); 938 nd = hists__filter_entries(rb_next(nd), hb->hists,
939 hb->min_pcnt);
901 if (nd == NULL) 940 if (nd == NULL)
902 break; 941 break;
903 --offset; 942 --offset;
@@ -930,7 +969,8 @@ do_offset:
930 } 969 }
931 } 970 }
932 971
933 nd = hists__filter_prev_entries(rb_prev(nd)); 972 nd = hists__filter_prev_entries(rb_prev(nd), hb->hists,
973 hb->min_pcnt);
934 if (nd == NULL) 974 if (nd == NULL)
935 break; 975 break;
936 ++offset; 976 ++offset;
@@ -1099,14 +1139,17 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
1099 1139
1100static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) 1140static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
1101{ 1141{
1102 struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries)); 1142 struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
1143 browser->hists,
1144 browser->min_pcnt);
1103 int printed = 0; 1145 int printed = 0;
1104 1146
1105 while (nd) { 1147 while (nd) {
1106 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1148 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1107 1149
1108 printed += hist_browser__fprintf_entry(browser, h, fp); 1150 printed += hist_browser__fprintf_entry(browser, h, fp);
1109 nd = hists__filter_entries(rb_next(nd)); 1151 nd = hists__filter_entries(rb_next(nd), browser->hists,
1152 browser->min_pcnt);
1110 } 1153 }
1111 1154
1112 return printed; 1155 return printed;
@@ -1155,10 +1198,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
1155 browser->b.refresh = hist_browser__refresh; 1198 browser->b.refresh = hist_browser__refresh;
1156 browser->b.seek = ui_browser__hists_seek; 1199 browser->b.seek = ui_browser__hists_seek;
1157 browser->b.use_navkeypressed = true; 1200 browser->b.use_navkeypressed = true;
1158 if (sort__branch_mode == 1)
1159 browser->has_symbols = sort_sym_from.list.next != NULL;
1160 else
1161 browser->has_symbols = sort_sym.list.next != NULL;
1162 } 1201 }
1163 1202
1164 return browser; 1203 return browser;
@@ -1329,11 +1368,25 @@ close_file_and_continue:
1329 return ret; 1368 return ret;
1330} 1369}
1331 1370
1371static void hist_browser__update_pcnt_entries(struct hist_browser *hb)
1372{
1373 u64 nr_entries = 0;
1374 struct rb_node *nd = rb_first(&hb->hists->entries);
1375
1376 while (nd) {
1377 nr_entries++;
1378 nd = hists__filter_entries(rb_next(nd), hb->hists,
1379 hb->min_pcnt);
1380 }
1381
1382 hb->nr_pcnt_entries = nr_entries;
1383}
1332 1384
1333static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, 1385static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1334 const char *helpline, const char *ev_name, 1386 const char *helpline, const char *ev_name,
1335 bool left_exits, 1387 bool left_exits,
1336 struct hist_browser_timer *hbt, 1388 struct hist_browser_timer *hbt,
1389 float min_pcnt,
1337 struct perf_session_env *env) 1390 struct perf_session_env *env)
1338{ 1391{
1339 struct hists *hists = &evsel->hists; 1392 struct hists *hists = &evsel->hists;
@@ -1350,6 +1403,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1350 if (browser == NULL) 1403 if (browser == NULL)
1351 return -1; 1404 return -1;
1352 1405
1406 if (min_pcnt) {
1407 browser->min_pcnt = min_pcnt;
1408 hist_browser__update_pcnt_entries(browser);
1409 }
1410
1353 fstack = pstack__new(2); 1411 fstack = pstack__new(2);
1354 if (fstack == NULL) 1412 if (fstack == NULL)
1355 goto out; 1413 goto out;
@@ -1386,7 +1444,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1386 */ 1444 */
1387 goto out_free_stack; 1445 goto out_free_stack;
1388 case 'a': 1446 case 'a':
1389 if (!browser->has_symbols) { 1447 if (!sort__has_sym) {
1390 ui_browser__warning(&browser->b, delay_secs * 2, 1448 ui_browser__warning(&browser->b, delay_secs * 2,
1391 "Annotation is only available for symbolic views, " 1449 "Annotation is only available for symbolic views, "
1392 "include \"sym*\" in --sort to use it."); 1450 "include \"sym*\" in --sort to use it.");
@@ -1485,10 +1543,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1485 continue; 1543 continue;
1486 } 1544 }
1487 1545
1488 if (!browser->has_symbols) 1546 if (!sort__has_sym)
1489 goto add_exit_option; 1547 goto add_exit_option;
1490 1548
1491 if (sort__branch_mode == 1) { 1549 if (sort__mode == SORT_MODE__BRANCH) {
1492 bi = browser->he_selection->branch_info; 1550 bi = browser->he_selection->branch_info;
1493 if (browser->selection != NULL && 1551 if (browser->selection != NULL &&
1494 bi && 1552 bi &&
@@ -1689,6 +1747,7 @@ struct perf_evsel_menu {
1689 struct ui_browser b; 1747 struct ui_browser b;
1690 struct perf_evsel *selection; 1748 struct perf_evsel *selection;
1691 bool lost_events, lost_events_warned; 1749 bool lost_events, lost_events_warned;
1750 float min_pcnt;
1692 struct perf_session_env *env; 1751 struct perf_session_env *env;
1693}; 1752};
1694 1753
@@ -1782,6 +1841,7 @@ browse_hists:
1782 ev_name = perf_evsel__name(pos); 1841 ev_name = perf_evsel__name(pos);
1783 key = perf_evsel__hists_browse(pos, nr_events, help, 1842 key = perf_evsel__hists_browse(pos, nr_events, help,
1784 ev_name, true, hbt, 1843 ev_name, true, hbt,
1844 menu->min_pcnt,
1785 menu->env); 1845 menu->env);
1786 ui_browser__show_title(&menu->b, title); 1846 ui_browser__show_title(&menu->b, title);
1787 switch (key) { 1847 switch (key) {
@@ -1843,6 +1903,7 @@ static bool filter_group_entries(struct ui_browser *self __maybe_unused,
1843static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, 1903static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1844 int nr_entries, const char *help, 1904 int nr_entries, const char *help,
1845 struct hist_browser_timer *hbt, 1905 struct hist_browser_timer *hbt,
1906 float min_pcnt,
1846 struct perf_session_env *env) 1907 struct perf_session_env *env)
1847{ 1908{
1848 struct perf_evsel *pos; 1909 struct perf_evsel *pos;
@@ -1856,6 +1917,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1856 .nr_entries = nr_entries, 1917 .nr_entries = nr_entries,
1857 .priv = evlist, 1918 .priv = evlist,
1858 }, 1919 },
1920 .min_pcnt = min_pcnt,
1859 .env = env, 1921 .env = env,
1860 }; 1922 };
1861 1923
@@ -1874,6 +1936,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1874 1936
1875int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 1937int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
1876 struct hist_browser_timer *hbt, 1938 struct hist_browser_timer *hbt,
1939 float min_pcnt,
1877 struct perf_session_env *env) 1940 struct perf_session_env *env)
1878{ 1941{
1879 int nr_entries = evlist->nr_entries; 1942 int nr_entries = evlist->nr_entries;
@@ -1885,7 +1948,8 @@ single_entry:
1885 const char *ev_name = perf_evsel__name(first); 1948 const char *ev_name = perf_evsel__name(first);
1886 1949
1887 return perf_evsel__hists_browse(first, nr_entries, help, 1950 return perf_evsel__hists_browse(first, nr_entries, help,
1888 ev_name, false, hbt, env); 1951 ev_name, false, hbt, min_pcnt,
1952 env);
1889 } 1953 }
1890 1954
1891 if (symbol_conf.event_group) { 1955 if (symbol_conf.event_group) {
@@ -1901,5 +1965,5 @@ single_entry:
1901 } 1965 }
1902 1966
1903 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, 1967 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
1904 hbt, env); 1968 hbt, min_pcnt, env);
1905} 1969}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 6f259b3d14e2..9708dd5fb8f3 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -124,7 +124,8 @@ void perf_gtk__init_hpp(void)
124 perf_gtk__hpp_color_overhead_guest_us; 124 perf_gtk__hpp_color_overhead_guest_us;
125} 125}
126 126
127static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) 127static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
128 float min_pcnt)
128{ 129{
129 struct perf_hpp_fmt *fmt; 130 struct perf_hpp_fmt *fmt;
130 GType col_types[MAX_COLUMNS]; 131 GType col_types[MAX_COLUMNS];
@@ -189,10 +190,15 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
189 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 190 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
190 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 191 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
191 GtkTreeIter iter; 192 GtkTreeIter iter;
193 float percent = h->stat.period * 100.0 /
194 hists->stats.total_period;
192 195
193 if (h->filtered) 196 if (h->filtered)
194 continue; 197 continue;
195 198
199 if (percent < min_pcnt)
200 continue;
201
196 gtk_list_store_append(store, &iter); 202 gtk_list_store_append(store, &iter);
197 203
198 col_idx = 0; 204 col_idx = 0;
@@ -222,7 +228,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
222 228
223int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, 229int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
224 const char *help, 230 const char *help,
225 struct hist_browser_timer *hbt __maybe_unused) 231 struct hist_browser_timer *hbt __maybe_unused,
232 float min_pcnt)
226{ 233{
227 struct perf_evsel *pos; 234 struct perf_evsel *pos;
228 GtkWidget *vbox; 235 GtkWidget *vbox;
@@ -286,7 +293,7 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
286 GTK_POLICY_AUTOMATIC, 293 GTK_POLICY_AUTOMATIC,
287 GTK_POLICY_AUTOMATIC); 294 GTK_POLICY_AUTOMATIC);
288 295
289 perf_gtk__show_hists(scrolled_window, hists); 296 perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
290 297
291 tab_label = gtk_label_new(evname); 298 tab_label = gtk_label_new(evname);
292 299
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index ff1f60cf442e..ae7a75432249 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -334,7 +334,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
334} 334}
335 335
336size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, 336size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
337 int max_cols, FILE *fp) 337 int max_cols, float min_pcnt, FILE *fp)
338{ 338{
339 struct perf_hpp_fmt *fmt; 339 struct perf_hpp_fmt *fmt;
340 struct sort_entry *se; 340 struct sort_entry *se;
@@ -440,10 +440,15 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
440print_entries: 440print_entries:
441 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 441 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
442 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 442 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
443 float percent = h->stat.period * 100.0 /
444 hists->stats.total_period;
443 445
444 if (h->filtered) 446 if (h->filtered)
445 continue; 447 continue;
446 448
449 if (percent < min_pcnt)
450 continue;
451
447 ret += hist_entry__fprintf(h, max_cols, hists, fp); 452 ret += hist_entry__fprintf(h, max_cols, hists, fp);
448 453
449 if (max_rows && ++nr_rows >= max_rows) 454 if (max_rows && ++nr_rows >= max_rows)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f7c727801aab..99b43dd18c57 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -776,6 +776,8 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
776 if (pipe_output) 776 if (pipe_output)
777 dup2(2, 1); 777 dup2(2, 1);
778 778
779 signal(SIGTERM, SIG_DFL);
780
779 close(child_ready_pipe[0]); 781 close(child_ready_pipe[0]);
780 close(go_pipe[1]); 782 close(go_pipe[1]);
781 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 783 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 07b1a3ad3e24..63b6f8c8edf2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1514,7 +1514,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel,
1514 switch (err) { 1514 switch (err) {
1515 case EPERM: 1515 case EPERM:
1516 case EACCES: 1516 case EACCES:
1517 return scnprintf(msg, size, "%s", 1517 return scnprintf(msg, size,
1518 "You may not have permission to collect %sstats.\n" 1518 "You may not have permission to collect %sstats.\n"
1519 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" 1519 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
1520 " -1 - Not paranoid at all\n" 1520 " -1 - Not paranoid at all\n"
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 326068a593a5..738d3b8d9745 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2391,7 +2391,6 @@ out_err_write:
2391 } 2391 }
2392 lseek(fd, header->data_offset + header->data_size, SEEK_SET); 2392 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
2393 2393
2394 header->frozen = 1;
2395 return 0; 2394 return 0;
2396} 2395}
2397 2396
@@ -2871,7 +2870,6 @@ int perf_session__read_header(struct perf_session *session, int fd)
2871 session->pevent)) 2870 session->pevent))
2872 goto out_delete_evlist; 2871 goto out_delete_evlist;
2873 2872
2874 header->frozen = 1;
2875 return 0; 2873 return 0;
2876out_errno: 2874out_errno:
2877 return -errno; 2875 return -errno;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c9fc55cada6d..16a3e83c584e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -84,7 +84,6 @@ struct perf_session_env {
84}; 84};
85 85
86struct perf_header { 86struct perf_header {
87 int frozen;
88 bool needs_swap; 87 bool needs_swap;
89 s64 attr_offset; 88 s64 attr_offset;
90 u64 data_offset; 89 u64 data_offset;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6b32721f829a..b11a6cfdb414 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -70,9 +70,17 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
70 int symlen; 70 int symlen;
71 u16 len; 71 u16 len;
72 72
73 if (h->ms.sym) 73 /*
74 hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); 74 * +4 accounts for '[x] ' priv level info
75 else { 75 * +2 accounts for 0x prefix on raw addresses
76 * +3 accounts for ' y ' symtab origin info
77 */
78 if (h->ms.sym) {
79 symlen = h->ms.sym->namelen + 4;
80 if (verbose)
81 symlen += BITS_PER_LONG / 4 + 2 + 3;
82 hists__new_col_len(hists, HISTC_SYMBOL, symlen);
83 } else {
76 symlen = unresolved_col_width + 4 + 2; 84 symlen = unresolved_col_width + 4 + 2;
77 hists__new_col_len(hists, HISTC_SYMBOL, symlen); 85 hists__new_col_len(hists, HISTC_SYMBOL, symlen);
78 hists__set_unres_dso_col_len(hists, HISTC_DSO); 86 hists__set_unres_dso_col_len(hists, HISTC_DSO);
@@ -91,12 +99,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
91 hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen); 99 hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
92 100
93 if (h->branch_info) { 101 if (h->branch_info) {
94 /*
95 * +4 accounts for '[x] ' priv level info
96 * +2 account of 0x prefix on raw addresses
97 */
98 if (h->branch_info->from.sym) { 102 if (h->branch_info->from.sym) {
99 symlen = (int)h->branch_info->from.sym->namelen + 4; 103 symlen = (int)h->branch_info->from.sym->namelen + 4;
104 if (verbose)
105 symlen += BITS_PER_LONG / 4 + 2 + 3;
100 hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); 106 hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
101 107
102 symlen = dso__name_len(h->branch_info->from.map->dso); 108 symlen = dso__name_len(h->branch_info->from.map->dso);
@@ -109,6 +115,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
109 115
110 if (h->branch_info->to.sym) { 116 if (h->branch_info->to.sym) {
111 symlen = (int)h->branch_info->to.sym->namelen + 4; 117 symlen = (int)h->branch_info->to.sym->namelen + 4;
118 if (verbose)
119 symlen += BITS_PER_LONG / 4 + 2 + 3;
112 hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); 120 hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
113 121
114 symlen = dso__name_len(h->branch_info->to.map->dso); 122 symlen = dso__name_len(h->branch_info->to.map->dso);
@@ -121,10 +129,6 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
121 } 129 }
122 130
123 if (h->mem_info) { 131 if (h->mem_info) {
124 /*
125 * +4 accounts for '[x] ' priv level info
126 * +2 account of 0x prefix on raw addresses
127 */
128 if (h->mem_info->daddr.sym) { 132 if (h->mem_info->daddr.sym) {
129 symlen = (int)h->mem_info->daddr.sym->namelen + 4 133 symlen = (int)h->mem_info->daddr.sym->namelen + 4
130 + unresolved_col_width + 2; 134 + unresolved_col_width + 2;
@@ -236,8 +240,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
236 return he->stat.period == 0; 240 return he->stat.period == 0;
237} 241}
238 242
239static void __hists__decay_entries(struct hists *hists, bool zap_user, 243void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
240 bool zap_kernel, bool threaded)
241{ 244{
242 struct rb_node *next = rb_first(&hists->entries); 245 struct rb_node *next = rb_first(&hists->entries);
243 struct hist_entry *n; 246 struct hist_entry *n;
@@ -256,7 +259,7 @@ static void __hists__decay_entries(struct hists *hists, bool zap_user,
256 !n->used) { 259 !n->used) {
257 rb_erase(&n->rb_node, &hists->entries); 260 rb_erase(&n->rb_node, &hists->entries);
258 261
259 if (sort__need_collapse || threaded) 262 if (sort__need_collapse)
260 rb_erase(&n->rb_node_in, &hists->entries_collapsed); 263 rb_erase(&n->rb_node_in, &hists->entries_collapsed);
261 264
262 hist_entry__free(n); 265 hist_entry__free(n);
@@ -265,17 +268,6 @@ static void __hists__decay_entries(struct hists *hists, bool zap_user,
265 } 268 }
266} 269}
267 270
268void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
269{
270 return __hists__decay_entries(hists, zap_user, zap_kernel, false);
271}
272
273void hists__decay_entries_threaded(struct hists *hists,
274 bool zap_user, bool zap_kernel)
275{
276 return __hists__decay_entries(hists, zap_user, zap_kernel, true);
277}
278
279/* 271/*
280 * histogram, sorted on item, collects periods 272 * histogram, sorted on item, collects periods
281 */ 273 */
@@ -292,6 +284,20 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
292 he->ms.map->referenced = true; 284 he->ms.map->referenced = true;
293 285
294 if (he->branch_info) { 286 if (he->branch_info) {
287 /*
288 * This branch info is (a part of) allocated from
289 * machine__resolve_bstack() and will be freed after
290 * adding new entries. So we need to save a copy.
291 */
292 he->branch_info = malloc(sizeof(*he->branch_info));
293 if (he->branch_info == NULL) {
294 free(he);
295 return NULL;
296 }
297
298 memcpy(he->branch_info, template->branch_info,
299 sizeof(*he->branch_info));
300
295 if (he->branch_info->from.map) 301 if (he->branch_info->from.map)
296 he->branch_info->from.map->referenced = true; 302 he->branch_info->from.map->referenced = true;
297 if (he->branch_info->to.map) 303 if (he->branch_info->to.map)
@@ -341,8 +347,6 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
341 struct hist_entry *he; 347 struct hist_entry *he;
342 int cmp; 348 int cmp;
343 349
344 pthread_mutex_lock(&hists->lock);
345
346 p = &hists->entries_in->rb_node; 350 p = &hists->entries_in->rb_node;
347 351
348 while (*p != NULL) { 352 while (*p != NULL) {
@@ -360,6 +364,12 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
360 if (!cmp) { 364 if (!cmp) {
361 he_stat__add_period(&he->stat, period, weight); 365 he_stat__add_period(&he->stat, period, weight);
362 366
367 /*
368 * This mem info was allocated from machine__resolve_mem
369 * and will not be used anymore.
370 */
371 free(entry->mem_info);
372
363 /* If the map of an existing hist_entry has 373 /* If the map of an existing hist_entry has
364 * become out-of-date due to an exec() or 374 * become out-of-date due to an exec() or
365 * similar, update it. Otherwise we will 375 * similar, update it. Otherwise we will
@@ -382,14 +392,12 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
382 392
383 he = hist_entry__new(entry); 393 he = hist_entry__new(entry);
384 if (!he) 394 if (!he)
385 goto out_unlock; 395 return NULL;
386 396
387 rb_link_node(&he->rb_node_in, parent, p); 397 rb_link_node(&he->rb_node_in, parent, p);
388 rb_insert_color(&he->rb_node_in, hists->entries_in); 398 rb_insert_color(&he->rb_node_in, hists->entries_in);
389out: 399out:
390 hist_entry__add_cpumode_period(he, al->cpumode, period); 400 hist_entry__add_cpumode_period(he, al->cpumode, period);
391out_unlock:
392 pthread_mutex_unlock(&hists->lock);
393 return he; 401 return he;
394} 402}
395 403
@@ -589,13 +597,13 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
589 hists__filter_entry_by_symbol(hists, he); 597 hists__filter_entry_by_symbol(hists, he);
590} 598}
591 599
592static void __hists__collapse_resort(struct hists *hists, bool threaded) 600void hists__collapse_resort(struct hists *hists)
593{ 601{
594 struct rb_root *root; 602 struct rb_root *root;
595 struct rb_node *next; 603 struct rb_node *next;
596 struct hist_entry *n; 604 struct hist_entry *n;
597 605
598 if (!sort__need_collapse && !threaded) 606 if (!sort__need_collapse)
599 return; 607 return;
600 608
601 root = hists__get_rotate_entries_in(hists); 609 root = hists__get_rotate_entries_in(hists);
@@ -617,16 +625,6 @@ static void __hists__collapse_resort(struct hists *hists, bool threaded)
617 } 625 }
618} 626}
619 627
620void hists__collapse_resort(struct hists *hists)
621{
622 return __hists__collapse_resort(hists, false);
623}
624
625void hists__collapse_resort_threaded(struct hists *hists)
626{
627 return __hists__collapse_resort(hists, true);
628}
629
630/* 628/*
631 * reverse the map, sort on period. 629 * reverse the map, sort on period.
632 */ 630 */
@@ -713,7 +711,7 @@ static void __hists__insert_output_entry(struct rb_root *entries,
713 rb_insert_color(&he->rb_node, entries); 711 rb_insert_color(&he->rb_node, entries);
714} 712}
715 713
716static void __hists__output_resort(struct hists *hists, bool threaded) 714void hists__output_resort(struct hists *hists)
717{ 715{
718 struct rb_root *root; 716 struct rb_root *root;
719 struct rb_node *next; 717 struct rb_node *next;
@@ -722,7 +720,7 @@ static void __hists__output_resort(struct hists *hists, bool threaded)
722 720
723 min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); 721 min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
724 722
725 if (sort__need_collapse || threaded) 723 if (sort__need_collapse)
726 root = &hists->entries_collapsed; 724 root = &hists->entries_collapsed;
727 else 725 else
728 root = hists->entries_in; 726 root = hists->entries_in;
@@ -743,16 +741,6 @@ static void __hists__output_resort(struct hists *hists, bool threaded)
743 } 741 }
744} 742}
745 743
746void hists__output_resort(struct hists *hists)
747{
748 return __hists__output_resort(hists, false);
749}
750
751void hists__output_resort_threaded(struct hists *hists)
752{
753 return __hists__output_resort(hists, true);
754}
755
756static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, 744static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
757 enum hist_filter filter) 745 enum hist_filter filter)
758{ 746{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 14c2fe20aa62..2d3790fd99bb 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -43,12 +43,12 @@ enum hist_column {
43 HISTC_COMM, 43 HISTC_COMM,
44 HISTC_PARENT, 44 HISTC_PARENT,
45 HISTC_CPU, 45 HISTC_CPU,
46 HISTC_SRCLINE,
46 HISTC_MISPREDICT, 47 HISTC_MISPREDICT,
47 HISTC_SYMBOL_FROM, 48 HISTC_SYMBOL_FROM,
48 HISTC_SYMBOL_TO, 49 HISTC_SYMBOL_TO,
49 HISTC_DSO_FROM, 50 HISTC_DSO_FROM,
50 HISTC_DSO_TO, 51 HISTC_DSO_TO,
51 HISTC_SRCLINE,
52 HISTC_LOCAL_WEIGHT, 52 HISTC_LOCAL_WEIGHT,
53 HISTC_GLOBAL_WEIGHT, 53 HISTC_GLOBAL_WEIGHT,
54 HISTC_MEM_DADDR_SYMBOL, 54 HISTC_MEM_DADDR_SYMBOL,
@@ -104,13 +104,9 @@ struct hist_entry *__hists__add_mem_entry(struct hists *self,
104 u64 weight); 104 u64 weight);
105 105
106void hists__output_resort(struct hists *self); 106void hists__output_resort(struct hists *self);
107void hists__output_resort_threaded(struct hists *hists);
108void hists__collapse_resort(struct hists *self); 107void hists__collapse_resort(struct hists *self);
109void hists__collapse_resort_threaded(struct hists *hists);
110 108
111void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); 109void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
112void hists__decay_entries_threaded(struct hists *hists, bool zap_user,
113 bool zap_kernel);
114void hists__output_recalc_col_len(struct hists *hists, int max_rows); 110void hists__output_recalc_col_len(struct hists *hists, int max_rows);
115 111
116void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h); 112void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
@@ -119,7 +115,7 @@ void events_stats__inc(struct events_stats *stats, u32 type);
119size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); 115size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
120 116
121size_t hists__fprintf(struct hists *self, bool show_header, int max_rows, 117size_t hists__fprintf(struct hists *self, bool show_header, int max_rows,
122 int max_cols, FILE *fp); 118 int max_cols, float min_pcnt, FILE *fp);
123 119
124int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); 120int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
125int hist_entry__annotate(struct hist_entry *self, size_t privsize); 121int hist_entry__annotate(struct hist_entry *self, size_t privsize);
@@ -199,6 +195,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
199 195
200int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 196int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
201 struct hist_browser_timer *hbt, 197 struct hist_browser_timer *hbt,
198 float min_pcnt,
202 struct perf_session_env *env); 199 struct perf_session_env *env);
203int script_browse(const char *script_opt); 200int script_browse(const char *script_opt);
204#else 201#else
@@ -206,6 +203,7 @@ static inline
206int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, 203int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
207 const char *help __maybe_unused, 204 const char *help __maybe_unused,
208 struct hist_browser_timer *hbt __maybe_unused, 205 struct hist_browser_timer *hbt __maybe_unused,
206 float min_pcnt __maybe_unused,
209 struct perf_session_env *env __maybe_unused) 207 struct perf_session_env *env __maybe_unused)
210{ 208{
211 return 0; 209 return 0;
@@ -233,12 +231,14 @@ static inline int script_browse(const char *script_opt __maybe_unused)
233 231
234#ifdef GTK2_SUPPORT 232#ifdef GTK2_SUPPORT
235int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, 233int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
236 struct hist_browser_timer *hbt __maybe_unused); 234 struct hist_browser_timer *hbt __maybe_unused,
235 float min_pcnt);
237#else 236#else
238static inline 237static inline
239int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused, 238int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
240 const char *help __maybe_unused, 239 const char *help __maybe_unused,
241 struct hist_browser_timer *hbt __maybe_unused) 240 struct hist_browser_timer *hbt __maybe_unused,
241 float min_pcnt __maybe_unused)
242{ 242{
243 return 0; 243 return 0;
244} 244}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 6fcb9de62340..8bcdf9e54089 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -21,6 +21,7 @@ const char *map_type__name[MAP__NR_TYPES] = {
21static inline int is_anon_memory(const char *filename) 21static inline int is_anon_memory(const char *filename)
22{ 22{
23 return !strcmp(filename, "//anon") || 23 return !strcmp(filename, "//anon") ||
24 !strcmp(filename, "/dev/zero (deleted)") ||
24 !strcmp(filename, "/anon_hugepage (deleted)"); 25 !strcmp(filename, "/anon_hugepage (deleted)");
25} 26}
26 27
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6b51d47acdba..f3b235ec7bf4 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -37,7 +37,6 @@ struct perf_session {
37 int fd; 37 int fd;
38 bool fd_pipe; 38 bool fd_pipe;
39 bool repipe; 39 bool repipe;
40 int cwdlen;
41 char *cwd; 40 char *cwd;
42 struct ordered_samples ordered_samples; 41 struct ordered_samples ordered_samples;
43 char filename[1]; 42 char filename[1];
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 6b0ed322907e..58ea5ca6c255 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -18,8 +18,9 @@ class install_lib(_install_lib):
18 self.build_dir = build_lib 18 self.build_dir = build_lib
19 19
20 20
21cflags = ['-fno-strict-aliasing', '-Wno-write-strings'] 21cflags = getenv('CFLAGS', '').split()
22cflags += getenv('CFLAGS', '').split() 22# switch off several checks (need to be at the end of cflags list)
23cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
23 24
24build_lib = getenv('PYTHON_EXTBUILD_LIB') 25build_lib = getenv('PYTHON_EXTBUILD_LIB')
25build_tmp = getenv('PYTHON_EXTBUILD_TMP') 26build_tmp = getenv('PYTHON_EXTBUILD_TMP')
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5f52d492590c..313a5a730112 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,5 +1,6 @@
1#include "sort.h" 1#include "sort.h"
2#include "hist.h" 2#include "hist.h"
3#include "symbol.h"
3 4
4regex_t parent_regex; 5regex_t parent_regex;
5const char default_parent_pattern[] = "^sys_|^do_page_fault"; 6const char default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -9,7 +10,7 @@ const char *sort_order = default_sort_order;
9int sort__need_collapse = 0; 10int sort__need_collapse = 0;
10int sort__has_parent = 0; 11int sort__has_parent = 0;
11int sort__has_sym = 0; 12int sort__has_sym = 0;
12int sort__branch_mode = -1; /* -1 = means not set */ 13enum sort_mode sort__mode = SORT_MODE__NORMAL;
13 14
14enum sort_type sort__first_dimension; 15enum sort_type sort__first_dimension;
15 16
@@ -194,7 +195,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
194 if (verbose) { 195 if (verbose) {
195 char o = map ? dso__symtab_origin(map->dso) : '!'; 196 char o = map ? dso__symtab_origin(map->dso) : '!';
196 ret += repsep_snprintf(bf, size, "%-#*llx %c ", 197 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
197 BITS_PER_LONG / 4, ip, o); 198 BITS_PER_LONG / 4 + 2, ip, o);
198 } 199 }
199 200
200 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); 201 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
@@ -871,14 +872,6 @@ static struct sort_dimension common_sort_dimensions[] = {
871 DIM(SORT_PARENT, "parent", sort_parent), 872 DIM(SORT_PARENT, "parent", sort_parent),
872 DIM(SORT_CPU, "cpu", sort_cpu), 873 DIM(SORT_CPU, "cpu", sort_cpu),
873 DIM(SORT_SRCLINE, "srcline", sort_srcline), 874 DIM(SORT_SRCLINE, "srcline", sort_srcline),
874 DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
875 DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
876 DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
877 DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
878 DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
879 DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
880 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
881 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
882}; 875};
883 876
884#undef DIM 877#undef DIM
@@ -895,6 +888,36 @@ static struct sort_dimension bstack_sort_dimensions[] = {
895 888
896#undef DIM 889#undef DIM
897 890
891#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
892
893static struct sort_dimension memory_sort_dimensions[] = {
894 DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
895 DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
896 DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
897 DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
898 DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
899 DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
900 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
901 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
902};
903
904#undef DIM
905
906static void __sort_dimension__add(struct sort_dimension *sd, enum sort_type idx)
907{
908 if (sd->taken)
909 return;
910
911 if (sd->entry->se_collapse)
912 sort__need_collapse = 1;
913
914 if (list_empty(&hist_entry__sort_list))
915 sort__first_dimension = idx;
916
917 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
918 sd->taken = 1;
919}
920
898int sort_dimension__add(const char *tok) 921int sort_dimension__add(const char *tok)
899{ 922{
900 unsigned int i; 923 unsigned int i;
@@ -915,25 +938,11 @@ int sort_dimension__add(const char *tok)
915 return -EINVAL; 938 return -EINVAL;
916 } 939 }
917 sort__has_parent = 1; 940 sort__has_parent = 1;
918 } else if (sd->entry == &sort_sym || 941 } else if (sd->entry == &sort_sym) {
919 sd->entry == &sort_sym_from ||
920 sd->entry == &sort_sym_to ||
921 sd->entry == &sort_mem_daddr_sym) {
922 sort__has_sym = 1; 942 sort__has_sym = 1;
923 } 943 }
924 944
925 if (sd->taken) 945 __sort_dimension__add(sd, i);
926 return 0;
927
928 if (sd->entry->se_collapse)
929 sort__need_collapse = 1;
930
931 if (list_empty(&hist_entry__sort_list))
932 sort__first_dimension = i;
933
934 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
935 sd->taken = 1;
936
937 return 0; 946 return 0;
938 } 947 }
939 948
@@ -943,24 +952,29 @@ int sort_dimension__add(const char *tok)
943 if (strncasecmp(tok, sd->name, strlen(tok))) 952 if (strncasecmp(tok, sd->name, strlen(tok)))
944 continue; 953 continue;
945 954
946 if (sort__branch_mode != 1) 955 if (sort__mode != SORT_MODE__BRANCH)
947 return -EINVAL; 956 return -EINVAL;
948 957
949 if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) 958 if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
950 sort__has_sym = 1; 959 sort__has_sym = 1;
951 960
952 if (sd->taken) 961 __sort_dimension__add(sd, i + __SORT_BRANCH_STACK);
953 return 0; 962 return 0;
963 }
954 964
955 if (sd->entry->se_collapse) 965 for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
956 sort__need_collapse = 1; 966 struct sort_dimension *sd = &memory_sort_dimensions[i];
957 967
958 if (list_empty(&hist_entry__sort_list)) 968 if (strncasecmp(tok, sd->name, strlen(tok)))
959 sort__first_dimension = i + __SORT_BRANCH_STACK; 969 continue;
960 970
961 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 971 if (sort__mode != SORT_MODE__MEMORY)
962 sd->taken = 1; 972 return -EINVAL;
973
974 if (sd->entry == &sort_mem_daddr_sym)
975 sort__has_sym = 1;
963 976
977 __sort_dimension__add(sd, i + __SORT_MEMORY_MODE);
964 return 0; 978 return 0;
965 } 979 }
966 980
@@ -993,8 +1007,9 @@ int setup_sorting(void)
993 return ret; 1007 return ret;
994} 1008}
995 1009
996void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 1010static void sort_entry__setup_elide(struct sort_entry *self,
997 const char *list_name, FILE *fp) 1011 struct strlist *list,
1012 const char *list_name, FILE *fp)
998{ 1013{
999 if (list && strlist__nr_entries(list) == 1) { 1014 if (list && strlist__nr_entries(list) == 1) {
1000 if (fp != NULL) 1015 if (fp != NULL)
@@ -1003,3 +1018,42 @@ void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
1003 self->elide = true; 1018 self->elide = true;
1004 } 1019 }
1005} 1020}
1021
1022void sort__setup_elide(FILE *output)
1023{
1024 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1025 "dso", output);
1026 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list,
1027 "comm", output);
1028 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list,
1029 "symbol", output);
1030
1031 if (sort__mode == SORT_MODE__BRANCH) {
1032 sort_entry__setup_elide(&sort_dso_from,
1033 symbol_conf.dso_from_list,
1034 "dso_from", output);
1035 sort_entry__setup_elide(&sort_dso_to,
1036 symbol_conf.dso_to_list,
1037 "dso_to", output);
1038 sort_entry__setup_elide(&sort_sym_from,
1039 symbol_conf.sym_from_list,
1040 "sym_from", output);
1041 sort_entry__setup_elide(&sort_sym_to,
1042 symbol_conf.sym_to_list,
1043 "sym_to", output);
1044 } else if (sort__mode == SORT_MODE__MEMORY) {
1045 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1046 "symbol_daddr", output);
1047 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1048 "dso_daddr", output);
1049 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1050 "mem", output);
1051 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1052 "local_weight", output);
1053 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1054 "tlb", output);
1055 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1056 "snoop", output);
1057 }
1058
1059}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f24bdf64238c..45ac84c1e037 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -32,7 +32,7 @@ extern const char default_sort_order[];
32extern int sort__need_collapse; 32extern int sort__need_collapse;
33extern int sort__has_parent; 33extern int sort__has_parent;
34extern int sort__has_sym; 34extern int sort__has_sym;
35extern int sort__branch_mode; 35extern enum sort_mode sort__mode;
36extern struct sort_entry sort_comm; 36extern struct sort_entry sort_comm;
37extern struct sort_entry sort_dso; 37extern struct sort_entry sort_dso;
38extern struct sort_entry sort_sym; 38extern struct sort_entry sort_sym;
@@ -117,12 +117,18 @@ static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
117 return NULL; 117 return NULL;
118} 118}
119 119
120static inline void hist_entry__add_pair(struct hist_entry *he, 120static inline void hist_entry__add_pair(struct hist_entry *pair,
121 struct hist_entry *pair) 121 struct hist_entry *he)
122{ 122{
123 list_add_tail(&he->pairs.head, &pair->pairs.node); 123 list_add_tail(&pair->pairs.node, &he->pairs.head);
124} 124}
125 125
126enum sort_mode {
127 SORT_MODE__NORMAL,
128 SORT_MODE__BRANCH,
129 SORT_MODE__MEMORY,
130};
131
126enum sort_type { 132enum sort_type {
127 /* common sort keys */ 133 /* common sort keys */
128 SORT_PID, 134 SORT_PID,
@@ -132,14 +138,6 @@ enum sort_type {
132 SORT_PARENT, 138 SORT_PARENT,
133 SORT_CPU, 139 SORT_CPU,
134 SORT_SRCLINE, 140 SORT_SRCLINE,
135 SORT_LOCAL_WEIGHT,
136 SORT_GLOBAL_WEIGHT,
137 SORT_MEM_DADDR_SYMBOL,
138 SORT_MEM_DADDR_DSO,
139 SORT_MEM_LOCKED,
140 SORT_MEM_TLB,
141 SORT_MEM_LVL,
142 SORT_MEM_SNOOP,
143 141
144 /* branch stack specific sort keys */ 142 /* branch stack specific sort keys */
145 __SORT_BRANCH_STACK, 143 __SORT_BRANCH_STACK,
@@ -148,6 +146,17 @@ enum sort_type {
148 SORT_SYM_FROM, 146 SORT_SYM_FROM,
149 SORT_SYM_TO, 147 SORT_SYM_TO,
150 SORT_MISPREDICT, 148 SORT_MISPREDICT,
149
150 /* memory mode specific sort keys */
151 __SORT_MEMORY_MODE,
152 SORT_LOCAL_WEIGHT = __SORT_MEMORY_MODE,
153 SORT_GLOBAL_WEIGHT,
154 SORT_MEM_DADDR_SYMBOL,
155 SORT_MEM_DADDR_DSO,
156 SORT_MEM_LOCKED,
157 SORT_MEM_TLB,
158 SORT_MEM_LVL,
159 SORT_MEM_SNOOP,
151}; 160};
152 161
153/* 162/*
@@ -172,7 +181,6 @@ extern struct list_head hist_entry__sort_list;
172 181
173int setup_sorting(void); 182int setup_sorting(void);
174extern int sort_dimension__add(const char *); 183extern int sort_dimension__add(const char *);
175void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 184void sort__setup_elide(FILE *fp);
176 const char *list_name, FILE *fp);
177 185
178#endif /* __PERF_SORT_H */ 186#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 23742126f47c..7c59c28afcc5 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -37,7 +37,7 @@ double stddev_stats(struct stats *stats)
37{ 37{
38 double variance, variance_mean; 38 double variance, variance_mean;
39 39
40 if (!stats->n) 40 if (stats->n < 2)
41 return 0.0; 41 return 0.0;
42 42
43 variance = stats->M2 / (stats->n - 1); 43 variance = stats->M2 / (stats->n - 1);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 632e40e5ceca..40399cbcca77 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -14,6 +14,7 @@ struct thread *thread__new(pid_t pid)
14 if (self != NULL) { 14 if (self != NULL) {
15 map_groups__init(&self->mg); 15 map_groups__init(&self->mg);
16 self->pid = pid; 16 self->pid = pid;
17 self->ppid = -1;
17 self->comm = malloc(32); 18 self->comm = malloc(32);
18 if (self->comm) 19 if (self->comm)
19 snprintf(self->comm, 32, ":%d", self->pid); 20 snprintf(self->comm, 32, ":%d", self->pid);
@@ -82,5 +83,8 @@ int thread__fork(struct thread *self, struct thread *parent)
82 for (i = 0; i < MAP__NR_TYPES; ++i) 83 for (i = 0; i < MAP__NR_TYPES; ++i)
83 if (map_groups__clone(&self->mg, &parent->mg, i) < 0) 84 if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
84 return -ENOMEM; 85 return -ENOMEM;
86
87 self->ppid = parent->pid;
88
85 return 0; 89 return 0;
86} 90}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 5ad266403098..eeb7ac62b9e3 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -13,6 +13,7 @@ struct thread {
13 }; 13 };
14 struct map_groups mg; 14 struct map_groups mg;
15 pid_t pid; 15 pid_t pid;
16 pid_t ppid;
16 char shortname[3]; 17 char shortname[3];
17 bool comm_set; 18 bool comm_set;
18 char *comm; 19 char *comm;
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 54d37a4753c5..f857b51b6bde 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -23,20 +23,31 @@
23 23
24size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) 24size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
25{ 25{
26 float samples_per_sec = top->samples / top->delay_secs; 26 float samples_per_sec;
27 float ksamples_per_sec = top->kernel_samples / top->delay_secs; 27 float ksamples_per_sec;
28 float esamples_percent = (100.0 * top->exact_samples) / top->samples; 28 float esamples_percent;
29 struct perf_record_opts *opts = &top->record_opts; 29 struct perf_record_opts *opts = &top->record_opts;
30 struct perf_target *target = &opts->target; 30 struct perf_target *target = &opts->target;
31 size_t ret = 0; 31 size_t ret = 0;
32 32
33 if (top->samples) {
34 samples_per_sec = top->samples / top->delay_secs;
35 ksamples_per_sec = top->kernel_samples / top->delay_secs;
36 esamples_percent = (100.0 * top->exact_samples) / top->samples;
37 } else {
38 samples_per_sec = ksamples_per_sec = esamples_percent = 0.0;
39 }
40
33 if (!perf_guest) { 41 if (!perf_guest) {
42 float ksamples_percent = 0.0;
43
44 if (samples_per_sec)
45 ksamples_percent = (100.0 * ksamples_per_sec) /
46 samples_per_sec;
34 ret = SNPRINTF(bf, size, 47 ret = SNPRINTF(bf, size,
35 " PerfTop:%8.0f irqs/sec kernel:%4.1f%%" 48 " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
36 " exact: %4.1f%% [", samples_per_sec, 49 " exact: %4.1f%% [", samples_per_sec,
37 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / 50 ksamples_percent, esamples_percent);
38 samples_per_sec)),
39 esamples_percent);
40 } else { 51 } else {
41 float us_samples_per_sec = top->us_samples / top->delay_secs; 52 float us_samples_per_sec = top->us_samples / top->delay_secs;
42 float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs; 53 float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 7ebf357dc9e1..df46be93d902 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -26,7 +26,6 @@ struct perf_top {
26 int print_entries, count_filter, delay_secs; 26 int print_entries, count_filter, delay_secs;
27 bool hide_kernel_symbols, hide_user_symbols, zero; 27 bool hide_kernel_symbols, hide_user_symbols, zero;
28 bool use_tui, use_stdio; 28 bool use_tui, use_stdio;
29 bool sort_has_symbols;
30 bool kptr_restrict_warned; 29 bool kptr_restrict_warned;
31 bool vmlinux_warned; 30 bool vmlinux_warned;
32 bool dump_symtab; 31 bool dump_symtab;
@@ -37,6 +36,7 @@ struct perf_top {
37 int realtime_prio; 36 int realtime_prio;
38 int sym_pcnt_filter; 37 int sym_pcnt_filter;
39 const char *sym_filter; 38 const char *sym_filter;
39 float min_percent;
40}; 40};
41 41
42size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); 42size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index a45710b70a55..7a484c97e500 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -221,8 +221,8 @@ extern unsigned char sane_ctype[256];
221#define isalpha(x) sane_istest(x,GIT_ALPHA) 221#define isalpha(x) sane_istest(x,GIT_ALPHA)
222#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 222#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
223#define isprint(x) sane_istest(x,GIT_PRINT) 223#define isprint(x) sane_istest(x,GIT_PRINT)
224#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20)) 224#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20))
225#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20)) 225#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20))
226#define tolower(x) sane_case((unsigned char)(x), 0x20) 226#define tolower(x) sane_case((unsigned char)(x), 0x20)
227#define toupper(x) sane_case((unsigned char)(x), 0) 227#define toupper(x) sane_case((unsigned char)(x), 0)
228 228