aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-11-23 03:13:48 -0500
committerIngo Molnar <mingo@kernel.org>2015-11-23 03:45:53 -0500
commit8c2accc8ca0be9cd8119ca439038243dfc8fcd0d (patch)
treeec57994aef7e5c4a63d7f36f798c3e8a2139066c
parent90eec103b96e30401c0b846045bf8a1c7159b6da (diff)
parent2c6caff2b26fde8f3f87183f8c97f2cebfdbcb98 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Allows BPF scriptlets specify arguments to be fetched using DWARF info, using a prologue generated at compile/build time (He Kuang, Wang Nan) - Allow attaching BPF scriptlets to module symbols (Wang Nan) - Allow attaching BPF scriptlets to userspace code using uprobe (Wang Nan) - BPF programs now can specify 'perf probe' tunables via its section name, separating key=val values using semicolons (Wang Nan) Testing some of these new BPF features: Use case: get callchains when receiving SSL packets, filter then in the kernel, at arbitrary place. # cat ssl.bpf.c #define SEC(NAME) __attribute__((section(NAME), used)) struct pt_regs; SEC("func=__inet_lookup_established hnum") int func(struct pt_regs *ctx, int err, unsigned short port) { return err == 0 && port == 443; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; # # perf record -a -g -e ssl.bpf.c ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ] # perf script | head -30 swapper 0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux) 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux) 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux) 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) 8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux) 856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux) 2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux) 2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux) 96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux) 969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux) 2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux) 95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux) 1163ffa start_kernel ([kernel.vmlinux].init.text) 11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text) 1163623 x86_64_start_kernel ([kernel.vmlinux].init.text) qemu-system-x86 9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux) 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux) 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux) 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) 856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux) 8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux) 430a br_handle_frame_finish ([bridge]) 48bc br_handle_frame ([bridge]) 855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) # Use 'perf probe' various options to list functions, see what variables can be collected at any given point, experiment first collecting without a filter, then filter, use it together with 'perf trace', 'perf top', with or without callchains, if it explodes, please tell us! - Introduce a new callchain mode: "folded", that will list per line representations of all callchains for a give histogram entry, facilitating 'perf report' output processing by other tools, such as Brendan Gregg's flamegraph tools (Namhyung Kim) E.g: # perf report | grep -v ^# | head 18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry | ---cpu_startup_entry | |--12.07%--start_secondary | --6.30%--rest_init start_kernel x86_64_start_reservations x86_64_start_kernel # Becomes, in "folded" mode: # perf report -g folded | grep -v ^# | head -5 18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry 12.07% cpu_startup_entry;start_secondary 6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 16.90% 0.00% swapper [kernel.kallsyms] [k] call_cpuidle 11.23% call_cpuidle;cpu_startup_entry;start_secondary 5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 16.90% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter 11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary 5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 15.12% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter_state # The user can also select one of "count", "period" or "percent" as the first column. Infrastructure changes: - Fix multiple leaks found with Valgrind and a refcount debugger (Masami Hiramatsu) - Add further 'perf test' entries for BPF and LLVM (Wang Nan) - Improve 'perf test' to suport subtests, so that the series of tests performed in the LLVM and BPF main tests appear in the default 'perf test' output (Wang Nan) - Move memdup() from tools/perf to tools/lib/string.c (Arnaldo Carvalho de Melo) - Adopt strtobool() from the kernel into tools/lib/ (Wang Nan) - Fix selftests_install tools/ Makefile rule (Kevin Hilman) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/Makefile2
-rw-r--r--tools/include/linux/string.h11
-rw-r--r--tools/lib/bpf/libbpf.c146
-rw-r--r--tools/lib/bpf/libbpf.h64
-rw-r--r--tools/lib/string.c62
-rw-r--r--tools/perf/Documentation/perf-report.txt14
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h8
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c2
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c2
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/arch/x86/tests/rdpmc.c2
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/config/Makefile12
-rw-r--r--tools/perf/tests/.gitignore1
-rw-r--r--tools/perf/tests/Build9
-rw-r--r--tools/perf/tests/attr.c2
-rw-r--r--tools/perf/tests/bp_signal.c2
-rw-r--r--tools/perf/tests/bp_signal_overflow.c2
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c35
-rw-r--r--tools/perf/tests/bpf.c93
-rw-r--r--tools/perf/tests/builtin-test.c112
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/dso-data.c6
-rw-r--r--tools/perf/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c2
-rw-r--r--tools/perf/tests/evsel-tp-sched.c2
-rw-r--r--tools/perf/tests/fdarray.c4
-rw-r--r--tools/perf/tests/hists_cumulate.c2
-rw-r--r--tools/perf/tests/hists_filter.c2
-rw-r--r--tools/perf/tests/hists_link.c2
-rw-r--r--tools/perf/tests/hists_output.c2
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/kmod-path.c2
-rw-r--r--tools/perf/tests/llvm.c75
-rw-r--r--tools/perf/tests/llvm.h2
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c2
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/openat-syscall.c2
-rw-r--r--tools/perf/tests/parse-events.c2
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c2
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/pmu.c2
-rw-r--r--tools/perf/tests/python-use.c3
-rw-r--r--tools/perf/tests/sample-parsing.c2
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/tests/tests.h89
-rw-r--r--tools/perf/tests/thread-map.c2
-rw-r--r--tools/perf/tests/thread-mg-share.c2
-rw-r--r--tools/perf/tests/topology.c2
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c2
-rw-r--r--tools/perf/ui/browsers/hists.c315
-rw-r--r--tools/perf/ui/gtk/hists.c148
-rw-r--r--tools/perf/ui/stdio/hist.c94
-rw-r--r--tools/perf/util/Build7
-rw-r--r--tools/perf/util/bpf-loader.c434
-rw-r--r--tools/perf/util/bpf-loader.h4
-rw-r--r--tools/perf/util/bpf-prologue.c455
-rw-r--r--tools/perf/util/bpf-prologue.h34
-rw-r--r--tools/perf/util/callchain.c135
-rw-r--r--tools/perf/util/callchain.h28
-rw-r--r--tools/perf/util/dso.c2
-rw-r--r--tools/perf/util/exec_cmd.c21
-rw-r--r--tools/perf/util/exec_cmd.h5
-rw-r--r--tools/perf/util/help.c6
-rw-r--r--tools/perf/util/include/linux/string.h3
-rw-r--r--tools/perf/util/machine.c17
-rw-r--r--tools/perf/util/probe-event.c7
-rw-r--r--tools/perf/util/probe-finder.c9
-rw-r--r--tools/perf/util/string.c16
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/util.c3
77 files changed, 2286 insertions, 282 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 7dc820a8c1f1..0ba0df3b516f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -96,7 +96,7 @@ cgroup_install firewire_install hv_install lguest_install perf_install usb_insta
96 $(call descend,$(@:_install=),install) 96 $(call descend,$(@:_install=),install)
97 97
98selftests_install: 98selftests_install:
99 $(call descend,testing/$(@:_clean=),install) 99 $(call descend,testing/$(@:_install=),install)
100 100
101turbostat_install x86_energy_perf_policy_install: 101turbostat_install x86_energy_perf_policy_install:
102 $(call descend,power/x86/$(@:_install=),install) 102 $(call descend,power/x86/$(@:_install=),install)
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
new file mode 100644
index 000000000000..2e2f736c039c
--- /dev/null
+++ b/tools/include/linux/string.h
@@ -0,0 +1,11 @@
1#ifndef _TOOLS_LINUX_STRING_H_
2#define _TOOLS_LINUX_STRING_H_
3
4
5#include <linux/types.h> /* for size_t */
6
7void *memdup(const void *src, size_t len);
8
9int strtobool(const char *s, bool *res);
10
11#endif /* _LINUX_STRING_H_ */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e176bad19bcb..e3f4c3379f14 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,7 +152,11 @@ struct bpf_program {
152 } *reloc_desc; 152 } *reloc_desc;
153 int nr_reloc; 153 int nr_reloc;
154 154
155 int fd; 155 struct {
156 int nr;
157 int *fds;
158 } instances;
159 bpf_program_prep_t preprocessor;
156 160
157 struct bpf_object *obj; 161 struct bpf_object *obj;
158 void *priv; 162 void *priv;
@@ -206,10 +210,25 @@ struct bpf_object {
206 210
207static void bpf_program__unload(struct bpf_program *prog) 211static void bpf_program__unload(struct bpf_program *prog)
208{ 212{
213 int i;
214
209 if (!prog) 215 if (!prog)
210 return; 216 return;
211 217
212 zclose(prog->fd); 218 /*
219 * If the object is opened but the program was never loaded,
220 * it is possible that prog->instances.nr == -1.
221 */
222 if (prog->instances.nr > 0) {
223 for (i = 0; i < prog->instances.nr; i++)
224 zclose(prog->instances.fds[i]);
225 } else if (prog->instances.nr != -1) {
226 pr_warning("Internal error: instances.nr is %d\n",
227 prog->instances.nr);
228 }
229
230 prog->instances.nr = -1;
231 zfree(&prog->instances.fds);
213} 232}
214 233
215static void bpf_program__exit(struct bpf_program *prog) 234static void bpf_program__exit(struct bpf_program *prog)
@@ -260,7 +279,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
260 memcpy(prog->insns, data, 279 memcpy(prog->insns, data,
261 prog->insns_cnt * sizeof(struct bpf_insn)); 280 prog->insns_cnt * sizeof(struct bpf_insn));
262 prog->idx = idx; 281 prog->idx = idx;
263 prog->fd = -1; 282 prog->instances.fds = NULL;
283 prog->instances.nr = -1;
264 284
265 return 0; 285 return 0;
266errout: 286errout:
@@ -860,13 +880,73 @@ static int
860bpf_program__load(struct bpf_program *prog, 880bpf_program__load(struct bpf_program *prog,
861 char *license, u32 kern_version) 881 char *license, u32 kern_version)
862{ 882{
863 int err, fd; 883 int err = 0, fd, i;
864 884
865 err = load_program(prog->insns, prog->insns_cnt, 885 if (prog->instances.nr < 0 || !prog->instances.fds) {
866 license, kern_version, &fd); 886 if (prog->preprocessor) {
867 if (!err) 887 pr_warning("Internal error: can't load program '%s'\n",
868 prog->fd = fd; 888 prog->section_name);
889 return -LIBBPF_ERRNO__INTERNAL;
890 }
869 891
892 prog->instances.fds = malloc(sizeof(int));
893 if (!prog->instances.fds) {
894 pr_warning("Not enough memory for BPF fds\n");
895 return -ENOMEM;
896 }
897 prog->instances.nr = 1;
898 prog->instances.fds[0] = -1;
899 }
900
901 if (!prog->preprocessor) {
902 if (prog->instances.nr != 1) {
903 pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
904 prog->section_name, prog->instances.nr);
905 }
906 err = load_program(prog->insns, prog->insns_cnt,
907 license, kern_version, &fd);
908 if (!err)
909 prog->instances.fds[0] = fd;
910 goto out;
911 }
912
913 for (i = 0; i < prog->instances.nr; i++) {
914 struct bpf_prog_prep_result result;
915 bpf_program_prep_t preprocessor = prog->preprocessor;
916
917 bzero(&result, sizeof(result));
918 err = preprocessor(prog, i, prog->insns,
919 prog->insns_cnt, &result);
920 if (err) {
921 pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
922 i, prog->section_name);
923 goto out;
924 }
925
926 if (!result.new_insn_ptr || !result.new_insn_cnt) {
927 pr_debug("Skip loading the %dth instance of program '%s'\n",
928 i, prog->section_name);
929 prog->instances.fds[i] = -1;
930 if (result.pfd)
931 *result.pfd = -1;
932 continue;
933 }
934
935 err = load_program(result.new_insn_ptr,
936 result.new_insn_cnt,
937 license, kern_version, &fd);
938
939 if (err) {
940 pr_warning("Loading the %dth instance of program '%s' failed\n",
941 i, prog->section_name);
942 goto out;
943 }
944
945 if (result.pfd)
946 *result.pfd = fd;
947 prog->instances.fds[i] = fd;
948 }
949out:
870 if (err) 950 if (err)
871 pr_warning("failed to load program '%s'\n", 951 pr_warning("failed to load program '%s'\n",
872 prog->section_name); 952 prog->section_name);
@@ -1121,5 +1201,53 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
1121 1201
1122int bpf_program__fd(struct bpf_program *prog) 1202int bpf_program__fd(struct bpf_program *prog)
1123{ 1203{
1124 return prog->fd; 1204 return bpf_program__nth_fd(prog, 0);
1205}
1206
1207int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
1208 bpf_program_prep_t prep)
1209{
1210 int *instances_fds;
1211
1212 if (nr_instances <= 0 || !prep)
1213 return -EINVAL;
1214
1215 if (prog->instances.nr > 0 || prog->instances.fds) {
1216 pr_warning("Can't set pre-processor after loading\n");
1217 return -EINVAL;
1218 }
1219
1220 instances_fds = malloc(sizeof(int) * nr_instances);
1221 if (!instances_fds) {
1222 pr_warning("alloc memory failed for fds\n");
1223 return -ENOMEM;
1224 }
1225
1226 /* fill all fd with -1 */
1227 memset(instances_fds, -1, sizeof(int) * nr_instances);
1228
1229 prog->instances.nr = nr_instances;
1230 prog->instances.fds = instances_fds;
1231 prog->preprocessor = prep;
1232 return 0;
1233}
1234
1235int bpf_program__nth_fd(struct bpf_program *prog, int n)
1236{
1237 int fd;
1238
1239 if (n >= prog->instances.nr || n < 0) {
1240 pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
1241 n, prog->section_name, prog->instances.nr);
1242 return -EINVAL;
1243 }
1244
1245 fd = prog->instances.fds[n];
1246 if (fd < 0) {
1247 pr_warning("%dth instance of program '%s' is invalid\n",
1248 n, prog->section_name);
1249 return -ENOENT;
1250 }
1251
1252 return fd;
1125} 1253}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c9a9aef2806c..949df4b346cf 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
88 88
89int bpf_program__fd(struct bpf_program *prog); 89int bpf_program__fd(struct bpf_program *prog);
90 90
91struct bpf_insn;
92
93/*
94 * Libbpf allows callers to adjust BPF programs before being loaded
95 * into kernel. One program in an object file can be transform into
96 * multiple variants to be attached to different code.
97 *
98 * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
99 * are APIs for this propose.
100 *
101 * - bpf_program_prep_t:
102 * It defines 'preprocessor', which is a caller defined function
103 * passed to libbpf through bpf_program__set_prep(), and will be
104 * called before program is loaded. The processor should adjust
105 * the program one time for each instances according to the number
106 * passed to it.
107 *
108 * - bpf_program__set_prep:
109 * Attachs a preprocessor to a BPF program. The number of instances
110 * whould be created is also passed through this function.
111 *
112 * - bpf_program__nth_fd:
113 * After the program is loaded, get resuling fds from bpf program for
114 * each instances.
115 *
116 * If bpf_program__set_prep() is not used, the program whould be loaded
117 * without adjustment during bpf_object__load(). The program has only
118 * one instance. In this case bpf_program__fd(prog) is equal to
119 * bpf_program__nth_fd(prog, 0).
120 */
121
122struct bpf_prog_prep_result {
123 /*
124 * If not NULL, load new instruction array.
125 * If set to NULL, don't load this instance.
126 */
127 struct bpf_insn *new_insn_ptr;
128 int new_insn_cnt;
129
130 /* If not NULL, result fd is set to it */
131 int *pfd;
132};
133
134/*
135 * Parameters of bpf_program_prep_t:
136 * - prog: The bpf_program being loaded.
137 * - n: Index of instance being generated.
138 * - insns: BPF instructions array.
139 * - insns_cnt:Number of instructions in insns.
140 * - res: Output parameter, result of transformation.
141 *
142 * Return value:
143 * - Zero: pre-processing success.
144 * - Non-zero: pre-processing, stop loading.
145 */
146typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
147 struct bpf_insn *insns, int insns_cnt,
148 struct bpf_prog_prep_result *res);
149
150int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
151 bpf_program_prep_t prep);
152
153int bpf_program__nth_fd(struct bpf_program *prog, int n);
154
91/* 155/*
92 * We don't need __attribute__((packed)) now since it is 156 * We don't need __attribute__((packed)) now since it is
93 * unnecessary for 'bpf_map_def' because they are all aligned. 157 * unnecessary for 'bpf_map_def' because they are all aligned.
diff --git a/tools/lib/string.c b/tools/lib/string.c
new file mode 100644
index 000000000000..065e54f42d8f
--- /dev/null
+++ b/tools/lib/string.c
@@ -0,0 +1,62 @@
1/*
2 * linux/tools/lib/string.c
3 *
4 * Copied from linux/lib/string.c, where it is:
5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * More specifically, the first copied function was strtobool, which
9 * was introduced by:
10 *
11 * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
12 * Author: Jonathan Cameron <jic23@cam.ac.uk>
13 */
14
15#include <stdlib.h>
16#include <string.h>
17#include <errno.h>
18#include <linux/string.h>
19
20/**
21 * memdup - duplicate region of memory
22 *
23 * @src: memory region to duplicate
24 * @len: memory region length
25 */
26void *memdup(const void *src, size_t len)
27{
28 void *p = malloc(len);
29
30 if (p)
31 memcpy(p, src, len);
32
33 return p;
34}
35
36/**
37 * strtobool - convert common user inputs into boolean values
38 * @s: input string
39 * @res: result
40 *
41 * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
42 * Otherwise it will return -EINVAL. Value pointed to by res is
43 * updated upon finding a match.
44 */
45int strtobool(const char *s, bool *res)
46{
47 switch (s[0]) {
48 case 'y':
49 case 'Y':
50 case '1':
51 *res = true;
52 break;
53 case 'n':
54 case 'N':
55 case '0':
56 *res = false;
57 break;
58 default:
59 return -EINVAL;
60 }
61 return 0;
62}
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 5ce8da1e1256..dab99ed2b339 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -170,17 +170,18 @@ OPTIONS
170 Dump raw trace in ASCII. 170 Dump raw trace in ASCII.
171 171
172-g:: 172-g::
173--call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>:: 173--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
174 Display call chains using type, min percent threshold, print limit, 174 Display call chains using type, min percent threshold, print limit,
175 call order, sort key and branch. Note that ordering of parameters is not 175 call order, sort key, optional branch and value. Note that ordering of
176 fixed so any parement can be given in an arbitraty order. One exception 176 parameters is not fixed so any parement can be given in an arbitraty order.
177 is the print_limit which should be preceded by threshold. 177 One exception is the print_limit which should be preceded by threshold.
178 178
179 print_type can be either: 179 print_type can be either:
180 - flat: single column, linear exposure of call chains. 180 - flat: single column, linear exposure of call chains.
181 - graph: use a graph tree, displaying absolute overhead rates. (default) 181 - graph: use a graph tree, displaying absolute overhead rates. (default)
182 - fractal: like graph, but displays relative rates. Each branch of 182 - fractal: like graph, but displays relative rates. Each branch of
183 the tree is considered as a new profiled object. 183 the tree is considered as a new profiled object.
184 - folded: call chains are displayed in a line, separated by semicolons
184 - none: disable call chain display. 185 - none: disable call chain display.
185 186
186 threshold is a percentage value which specifies a minimum percent to be 187 threshold is a percentage value which specifies a minimum percent to be
@@ -204,6 +205,11 @@ OPTIONS
204 - branch: include last branch information in callgraph when available. 205 - branch: include last branch information in callgraph when available.
205 Usually more convenient to use --branch-history for this. 206 Usually more convenient to use --branch-history for this.
206 207
208 value can be:
209 - percent: diplay overhead percent (default)
210 - period: display event period
211 - count: display event count
212
207--children:: 213--children::
208 Accumulate callchain of children to parent entry so that then can 214 Accumulate callchain of children to parent entry so that then can
209 show up in the output. The output will have a new "Children" column 215 show up in the output. The output will have a new "Children" column
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 39c38cb45b00..2562eac6451d 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -22,6 +22,7 @@ tools/lib/api
22tools/lib/bpf 22tools/lib/bpf
23tools/lib/hweight.c 23tools/lib/hweight.c
24tools/lib/rbtree.c 24tools/lib/rbtree.c
25tools/lib/string.c
25tools/lib/symbol/kallsyms.c 26tools/lib/symbol/kallsyms.c
26tools/lib/symbol/kallsyms.h 27tools/lib/symbol/kallsyms.h
27tools/lib/util/find_next_bit.c 28tools/lib/util/find_next_bit.c
@@ -50,6 +51,7 @@ tools/include/linux/log2.h
50tools/include/linux/poison.h 51tools/include/linux/poison.h
51tools/include/linux/rbtree.h 52tools/include/linux/rbtree.h
52tools/include/linux/rbtree_augmented.h 53tools/include/linux/rbtree_augmented.h
54tools/include/linux/string.h
53tools/include/linux/types.h 55tools/include/linux/types.h
54tools/include/linux/err.h 56tools/include/linux/err.h
55include/asm-generic/bitops/arch_hweight.h 57include/asm-generic/bitops/arch_hweight.h
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 7ed00f4b0908..b48de2f5813c 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,10 +2,10 @@
2#define ARCH_TESTS_H 2#define ARCH_TESTS_H
3 3
4/* Tests */ 4/* Tests */
5int test__rdpmc(void); 5int test__rdpmc(int subtest);
6int test__perf_time_to_tsc(void); 6int test__perf_time_to_tsc(int subtest);
7int test__insn_x86(void); 7int test__insn_x86(int subtest);
8int test__intel_cqm_count_nmi_context(void); 8int test__intel_cqm_count_nmi_context(int subtest);
9 9
10#ifdef HAVE_DWARF_UNWIND_SUPPORT 10#ifdef HAVE_DWARF_UNWIND_SUPPORT
11struct thread; 11struct thread;
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index b6115dfd28f0..08d9b2bc185c 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
171 * verbose (-v) option to see all the instructions and whether or not they 171 * verbose (-v) option to see all the instructions and whether or not they
172 * decoded successfuly. 172 * decoded successfuly.
173 */ 173 */
174int test__insn_x86(void) 174int test__insn_x86(int subtest __maybe_unused)
175{ 175{
176 int ret = 0; 176 int ret = 0;
177 177
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index d28c1b6a3b54..94e0cb7462f9 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -33,7 +33,7 @@ static pid_t spawn(void)
33 * the last read counter value to avoid triggering a WARN_ON_ONCE() in 33 * the last read counter value to avoid triggering a WARN_ON_ONCE() in
34 * smp_call_function_many() caused by sending IPIs from NMI context. 34 * smp_call_function_many() caused by sending IPIs from NMI context.
35 */ 35 */
36int test__intel_cqm_count_nmi_context(void) 36int test__intel_cqm_count_nmi_context(int subtest __maybe_unused)
37{ 37{
38 struct perf_evlist *evlist = NULL; 38 struct perf_evlist *evlist = NULL;
39 struct perf_evsel *evsel = NULL; 39 struct perf_evsel *evsel = NULL;
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 658cd200af74..a289aa8a083a 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -35,7 +35,7 @@
35 * %0 is returned, otherwise %-1 is returned. If TSC conversion is not 35 * %0 is returned, otherwise %-1 is returned. If TSC conversion is not
36 * supported then then the test passes but " (not supported)" is printed. 36 * supported then then the test passes but " (not supported)" is printed.
37 */ 37 */
38int test__perf_time_to_tsc(void) 38int test__perf_time_to_tsc(int subtest __maybe_unused)
39{ 39{
40 struct record_opts opts = { 40 struct record_opts opts = {
41 .mmap_pages = UINT_MAX, 41 .mmap_pages = UINT_MAX,
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index e7688214c7cf..7bb0d13c235f 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -149,7 +149,7 @@ out_close:
149 return 0; 149 return 0;
150} 150}
151 151
152int test__rdpmc(void) 152int test__rdpmc(int subtest __maybe_unused)
153{ 153{
154 int status = 0; 154 int status = 0;
155 int wret = 0; 155 int wret = 0;
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index ff63649fa9ac..465970370f3e 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
5libperf-y += perf_regs.o 5libperf-y += perf_regs.o
6 6
7libperf-$(CONFIG_DWARF) += dwarf-regs.o 7libperf-$(CONFIG_DWARF) += dwarf-regs.o
8libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
8 9
9libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 10libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
10libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 11libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f256fac1e722..14428342b47b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -625,7 +625,7 @@ parse_percent_limit(const struct option *opt, const char *str,
625 return 0; 625 return 0;
626} 626}
627 627
628#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function" 628#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
629 629
630const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" 630const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
631 CALLCHAIN_REPORT_HELP 631 CALLCHAIN_REPORT_HELP
@@ -708,7 +708,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
708 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 708 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
709 "Only display entries with parent-match"), 709 "Only display entries with parent-match"),
710 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, 710 OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
711 "print_type,threshold[,print_limit],order,sort_key[,branch]", 711 "print_type,threshold[,print_limit],order,sort_key[,branch],value",
712 report_callchain_help, &report_parse_callchain_opt, 712 report_callchain_help, &report_parse_callchain_opt,
713 callchain_default_opt), 713 callchain_default_opt),
714 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, 714 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index de89ec574361..6eb9a956a408 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -318,6 +318,18 @@ ifndef NO_LIBELF
318 CFLAGS += -DHAVE_LIBBPF_SUPPORT 318 CFLAGS += -DHAVE_LIBBPF_SUPPORT
319 $(call detected,CONFIG_LIBBPF) 319 $(call detected,CONFIG_LIBBPF)
320 endif 320 endif
321
322 ifndef NO_DWARF
323 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
324 CFLAGS += -DHAVE_BPF_PROLOGUE
325 $(call detected,CONFIG_BPF_PROLOGUE)
326 else
327 msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
328 endif
329 else
330 msg := $(warning DWARF support is off, BPF prologue is disabled);
331 endif
332
321 endif # NO_LIBBPF 333 endif # NO_LIBBPF
322endif # NO_LIBELF 334endif # NO_LIBELF
323 335
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 489fc9ffbcb0..bf016c439fbd 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,2 +1,3 @@
1llvm-src-base.c 1llvm-src-base.c
2llvm-src-kbuild.c 2llvm-src-kbuild.c
3llvm-src-prologue.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index f41ebf8849fe..0ff8a973b81c 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,7 +31,7 @@ perf-y += sample-parsing.o
31perf-y += parse-no-sample-id-all.o 31perf-y += parse-no-sample-id-all.o
32perf-y += kmod-path.o 32perf-y += kmod-path.o
33perf-y += thread-map.o 33perf-y += thread-map.o
34perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o 34perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
35perf-y += bpf.o 35perf-y += bpf.o
36perf-y += topology.o 36perf-y += topology.o
37 37
@@ -49,6 +49,13 @@ $(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c
49 $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ 49 $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
50 $(Q)echo ';' >> $@ 50 $(Q)echo ';' >> $@
51 51
52$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c
53 $(call rule_mkdir)
54 $(Q)echo '#include <tests/llvm.h>' > $@
55 $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
56 $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
57 $(Q)echo ';' >> $@
58
52ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) 59ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
53perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o 60perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
54endif 61endif
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 638875a0960a..b66730eb94e3 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf)
153 return system(cmd); 153 return system(cmd);
154} 154}
155 155
156int test__attr(void) 156int test__attr(int subtest __maybe_unused)
157{ 157{
158 struct stat st; 158 struct stat st;
159 char path_perf[PATH_MAX]; 159 char path_perf[PATH_MAX];
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index a02b035fd5aa..fb80c9eb6a95 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -111,7 +111,7 @@ static long long bp_count(int fd)
111 return count; 111 return count;
112} 112}
113 113
114int test__bp_signal(void) 114int test__bp_signal(int subtest __maybe_unused)
115{ 115{
116 struct sigaction sa; 116 struct sigaction sa;
117 long long count1, count2; 117 long long count1, count2;
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index e76537724491..89f92fa67cc4 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -58,7 +58,7 @@ static long long bp_count(int fd)
58#define EXECUTIONS 10000 58#define EXECUTIONS 10000
59#define THRESHOLD 100 59#define THRESHOLD 100
60 60
61int test__bp_signal_overflow(void) 61int test__bp_signal_overflow(int subtest __maybe_unused)
62{ 62{
63 struct perf_event_attr pe; 63 struct perf_event_attr pe;
64 struct sigaction sa; 64 struct sigaction sa;
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
new file mode 100644
index 000000000000..7230e62c70fc
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -0,0 +1,35 @@
1/*
2 * bpf-script-test-prologue.c
3 * Test BPF prologue
4 */
5#ifndef LINUX_VERSION_CODE
6# error Need LINUX_VERSION_CODE
7# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
8#endif
9#define SEC(NAME) __attribute__((section(NAME), used))
10
11#include <uapi/linux/fs.h>
12
13#define FMODE_READ 0x1
14#define FMODE_WRITE 0x2
15
16static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
17 (void *) 6;
18
19SEC("func=null_lseek file->f_mode offset orig")
20int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode,
21 unsigned long offset, unsigned long orig)
22{
23 if (err)
24 return 0;
25 if (f_mode & FMODE_WRITE)
26 return 0;
27 if (offset & 1)
28 return 0;
29 if (orig == SEEK_CUR)
30 return 0;
31 return 1;
32}
33
34char _license[] SEC("license") = "GPL";
35int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index ec16f7812c8b..33689a0cf821 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -19,6 +19,29 @@ static int epoll_pwait_loop(void)
19 return 0; 19 return 0;
20} 20}
21 21
22#ifdef HAVE_BPF_PROLOGUE
23
24static int llseek_loop(void)
25{
26 int fds[2], i;
27
28 fds[0] = open("/dev/null", O_RDONLY);
29 fds[1] = open("/dev/null", O_RDWR);
30
31 if (fds[0] < 0 || fds[1] < 0)
32 return -1;
33
34 for (i = 0; i < NR_ITERS; i++) {
35 lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
36 lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
37 }
38 close(fds[0]);
39 close(fds[1]);
40 return 0;
41}
42
43#endif
44
22static struct { 45static struct {
23 enum test_llvm__testcase prog_id; 46 enum test_llvm__testcase prog_id;
24 const char *desc; 47 const char *desc;
@@ -37,6 +60,17 @@ static struct {
37 &epoll_pwait_loop, 60 &epoll_pwait_loop,
38 (NR_ITERS + 1) / 2, 61 (NR_ITERS + 1) / 2,
39 }, 62 },
63#ifdef HAVE_BPF_PROLOGUE
64 {
65 LLVM_TESTCASE_BPF_PROLOGUE,
66 "Test BPF prologue generation",
67 "[bpf_prologue_test]",
68 "fix kbuild first",
69 "check your vmlinux setting?",
70 &llseek_loop,
71 (NR_ITERS + 1) / 4,
72 },
73#endif
40}; 74};
41 75
42static int do_test(struct bpf_object *obj, int (*func)(void), 76static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -68,8 +102,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
68 err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj); 102 err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
69 if (err || list_empty(&parse_evlist.list)) { 103 if (err || list_empty(&parse_evlist.list)) {
70 pr_debug("Failed to add events selected by BPF\n"); 104 pr_debug("Failed to add events selected by BPF\n");
71 if (!err) 105 return TEST_FAIL;
72 return TEST_FAIL;
73 } 106 }
74 107
75 snprintf(pid, sizeof(pid), "%d", getpid()); 108 snprintf(pid, sizeof(pid), "%d", getpid());
@@ -123,8 +156,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
123 } 156 }
124 } 157 }
125 158
126 if (count != expect) 159 if (count != expect) {
127 pr_debug("BPF filter result incorrect\n"); 160 pr_debug("BPF filter result incorrect\n");
161 goto out_delete_evlist;
162 }
128 163
129 ret = TEST_OK; 164 ret = TEST_OK;
130 165
@@ -146,7 +181,7 @@ prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
146 return obj; 181 return obj;
147} 182}
148 183
149static int __test__bpf(int index) 184static int __test__bpf(int idx)
150{ 185{
151 int ret; 186 int ret;
152 void *obj_buf; 187 void *obj_buf;
@@ -154,54 +189,72 @@ static int __test__bpf(int index)
154 struct bpf_object *obj; 189 struct bpf_object *obj;
155 190
156 ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, 191 ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
157 bpf_testcase_table[index].prog_id, 192 bpf_testcase_table[idx].prog_id,
158 true); 193 true);
159 if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { 194 if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
160 pr_debug("Unable to get BPF object, %s\n", 195 pr_debug("Unable to get BPF object, %s\n",
161 bpf_testcase_table[index].msg_compile_fail); 196 bpf_testcase_table[idx].msg_compile_fail);
162 if (index == 0) 197 if (idx == 0)
163 return TEST_SKIP; 198 return TEST_SKIP;
164 else 199 else
165 return TEST_FAIL; 200 return TEST_FAIL;
166 } 201 }
167 202
168 obj = prepare_bpf(obj_buf, obj_buf_sz, 203 obj = prepare_bpf(obj_buf, obj_buf_sz,
169 bpf_testcase_table[index].name); 204 bpf_testcase_table[idx].name);
170 if (!obj) { 205 if (!obj) {
171 ret = TEST_FAIL; 206 ret = TEST_FAIL;
172 goto out; 207 goto out;
173 } 208 }
174 209
175 ret = do_test(obj, 210 ret = do_test(obj,
176 bpf_testcase_table[index].target_func, 211 bpf_testcase_table[idx].target_func,
177 bpf_testcase_table[index].expect_result); 212 bpf_testcase_table[idx].expect_result);
178out: 213out:
179 bpf__clear(); 214 bpf__clear();
180 return ret; 215 return ret;
181} 216}
182 217
183int test__bpf(void) 218int test__bpf_subtest_get_nr(void)
219{
220 return (int)ARRAY_SIZE(bpf_testcase_table);
221}
222
223const char *test__bpf_subtest_get_desc(int i)
224{
225 if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
226 return NULL;
227 return bpf_testcase_table[i].desc;
228}
229
230int test__bpf(int i)
184{ 231{
185 unsigned int i;
186 int err; 232 int err;
187 233
234 if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
235 return TEST_FAIL;
236
188 if (geteuid() != 0) { 237 if (geteuid() != 0) {
189 pr_debug("Only root can run BPF test\n"); 238 pr_debug("Only root can run BPF test\n");
190 return TEST_SKIP; 239 return TEST_SKIP;
191 } 240 }
192 241
193 for (i = 0; i < ARRAY_SIZE(bpf_testcase_table); i++) { 242 err = __test__bpf(i);
194 err = __test__bpf(i); 243 return err;
244}
195 245
196 if (err != TEST_OK) 246#else
197 return err; 247int test__bpf_subtest_get_nr(void)
198 } 248{
249 return 0;
250}
199 251
200 return TEST_OK; 252const char *test__bpf_subtest_get_desc(int i __maybe_unused)
253{
254 return NULL;
201} 255}
202 256
203#else 257int test__bpf(int i __maybe_unused)
204int test__bpf(void)
205{ 258{
206 pr_debug("Skip BPF test because BPF support is not compiled\n"); 259 pr_debug("Skip BPF test because BPF support is not compiled\n");
207 return TEST_SKIP; 260 return TEST_SKIP;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 80c442eab767..2b1ade1aafc3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -160,6 +160,11 @@ static struct test generic_tests[] = {
160 { 160 {
161 .desc = "Test LLVM searching and compiling", 161 .desc = "Test LLVM searching and compiling",
162 .func = test__llvm, 162 .func = test__llvm,
163 .subtest = {
164 .skip_if_fail = true,
165 .get_nr = test__llvm_subtest_get_nr,
166 .get_desc = test__llvm_subtest_get_desc,
167 },
163 }, 168 },
164 { 169 {
165 .desc = "Test topology in session", 170 .desc = "Test topology in session",
@@ -168,6 +173,11 @@ static struct test generic_tests[] = {
168 { 173 {
169 .desc = "Test BPF filter", 174 .desc = "Test BPF filter",
170 .func = test__bpf, 175 .func = test__bpf,
176 .subtest = {
177 .skip_if_fail = true,
178 .get_nr = test__bpf_subtest_get_nr,
179 .get_desc = test__bpf_subtest_get_desc,
180 },
171 }, 181 },
172 { 182 {
173 .func = NULL, 183 .func = NULL,
@@ -203,7 +213,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
203 return false; 213 return false;
204} 214}
205 215
206static int run_test(struct test *test) 216static int run_test(struct test *test, int subtest)
207{ 217{
208 int status, err = -1, child = fork(); 218 int status, err = -1, child = fork();
209 char sbuf[STRERR_BUFSIZE]; 219 char sbuf[STRERR_BUFSIZE];
@@ -216,7 +226,19 @@ static int run_test(struct test *test)
216 226
217 if (!child) { 227 if (!child) {
218 pr_debug("test child forked, pid %d\n", getpid()); 228 pr_debug("test child forked, pid %d\n", getpid());
219 err = test->func(); 229 if (!verbose) {
230 int nullfd = open("/dev/null", O_WRONLY);
231 if (nullfd >= 0) {
232 close(STDERR_FILENO);
233 close(STDOUT_FILENO);
234
235 dup2(nullfd, STDOUT_FILENO);
236 dup2(STDOUT_FILENO, STDERR_FILENO);
237 close(nullfd);
238 }
239 }
240
241 err = test->func(subtest);
220 exit(err); 242 exit(err);
221 } 243 }
222 244
@@ -237,6 +259,40 @@ static int run_test(struct test *test)
237 for (j = 0; j < ARRAY_SIZE(tests); j++) \ 259 for (j = 0; j < ARRAY_SIZE(tests); j++) \
238 for (t = &tests[j][0]; t->func; t++) 260 for (t = &tests[j][0]; t->func; t++)
239 261
262static int test_and_print(struct test *t, bool force_skip, int subtest)
263{
264 int err;
265
266 if (!force_skip) {
267 pr_debug("\n--- start ---\n");
268 err = run_test(t, subtest);
269 pr_debug("---- end ----\n");
270 } else {
271 pr_debug("\n--- force skipped ---\n");
272 err = TEST_SKIP;
273 }
274
275 if (!t->subtest.get_nr)
276 pr_debug("%s:", t->desc);
277 else
278 pr_debug("%s subtest %d:", t->desc, subtest);
279
280 switch (err) {
281 case TEST_OK:
282 pr_info(" Ok\n");
283 break;
284 case TEST_SKIP:
285 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
286 break;
287 case TEST_FAIL:
288 default:
289 color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
290 break;
291 }
292
293 return err;
294}
295
240static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) 296static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
241{ 297{
242 struct test *t; 298 struct test *t;
@@ -264,21 +320,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
264 continue; 320 continue;
265 } 321 }
266 322
267 pr_debug("\n--- start ---\n"); 323 if (!t->subtest.get_nr) {
268 err = run_test(t); 324 test_and_print(t, false, -1);
269 pr_debug("---- end ----\n%s:", t->desc); 325 } else {
270 326 int subn = t->subtest.get_nr();
271 switch (err) { 327 /*
272 case TEST_OK: 328 * minus 2 to align with normal testcases.
273 pr_info(" Ok\n"); 329 * For subtest we print additional '.x' in number.
274 break; 330 * for example:
275 case TEST_SKIP: 331 *
276 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); 332 * 35: Test LLVM searching and compiling :
277 break; 333 * 35.1: Basic BPF llvm compiling test : Ok
278 case TEST_FAIL: 334 */
279 default: 335 int subw = width > 2 ? width - 2 : width;
280 color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); 336 bool skip = false;
281 break; 337 int subi;
338
339 if (subn <= 0) {
340 color_fprintf(stderr, PERF_COLOR_YELLOW,
341 " Skip (not compiled in)\n");
342 continue;
343 }
344 pr_info("\n");
345
346 for (subi = 0; subi < subn; subi++) {
347 int len = strlen(t->subtest.get_desc(subi));
348
349 if (subw < len)
350 subw = len;
351 }
352
353 for (subi = 0; subi < subn; subi++) {
354 pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
355 t->subtest.get_desc(subi));
356 err = test_and_print(t, skip, subi);
357 if (err != TEST_OK && t->subtest.skip_if_fail)
358 skip = true;
359 }
282 } 360 }
283 } 361 }
284 362
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index a767a6400c5c..4417b6a079f0 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -601,7 +601,7 @@ out_err:
601 return err; 601 return err;
602} 602}
603 603
604int test__code_reading(void) 604int test__code_reading(int subtest __maybe_unused)
605{ 605{
606 int ret; 606 int ret;
607 607
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index a218aeaf56a0..dc673ff7c437 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
110 return fd; 110 return fd;
111} 111}
112 112
113int test__dso_data(void) 113int test__dso_data(int subtest __maybe_unused)
114{ 114{
115 struct machine machine; 115 struct machine machine;
116 struct dso *dso; 116 struct dso *dso;
@@ -245,7 +245,7 @@ static int set_fd_limit(int n)
245 return setrlimit(RLIMIT_NOFILE, &rlim); 245 return setrlimit(RLIMIT_NOFILE, &rlim);
246} 246}
247 247
248int test__dso_data_cache(void) 248int test__dso_data_cache(int subtest __maybe_unused)
249{ 249{
250 struct machine machine; 250 struct machine machine;
251 long nr_end, nr = open_files_cnt(); 251 long nr_end, nr = open_files_cnt();
@@ -302,7 +302,7 @@ int test__dso_data_cache(void)
302 return 0; 302 return 0;
303} 303}
304 304
305int test__dso_data_reopen(void) 305int test__dso_data_reopen(int subtest __maybe_unused)
306{ 306{
307 struct machine machine; 307 struct machine machine;
308 long nr_end, nr = open_files_cnt(); 308 long nr_end, nr = open_files_cnt();
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 07221793a3ac..01f0b61de53d 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -142,7 +142,7 @@ static int krava_1(struct thread *thread)
142 return krava_2(thread); 142 return krava_2(thread);
143} 143}
144 144
145int test__dwarf_unwind(void) 145int test__dwarf_unwind(int subtest __maybe_unused)
146{ 146{
147 struct machines machines; 147 struct machines machines;
148 struct machine *machine; 148 struct machine *machine;
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 3fa715987a5e..1da92e1159ee 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -95,7 +95,7 @@ out_delete_evlist:
95#define perf_evsel__name_array_test(names) \ 95#define perf_evsel__name_array_test(names) \
96 __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) 96 __perf_evsel__name_array_test(names, ARRAY_SIZE(names))
97 97
98int test__perf_evsel__roundtrip_name_test(void) 98int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused)
99{ 99{
100 int err = 0, ret = 0; 100 int err = 0, ret = 0;
101 101
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 790e413d9a1f..1984b3bbfe15 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
32 return ret; 32 return ret;
33} 33}
34 34
35int test__perf_evsel__tp_sched_test(void) 35int test__perf_evsel__tp_sched_test(int subtest __maybe_unused)
36{ 36{
37 struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); 37 struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
38 int ret = 0; 38 int ret = 0;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index d24b837951d4..c809463edbe5 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
25 return printed + fdarray__fprintf(fda, fp); 25 return printed + fdarray__fprintf(fda, fp);
26} 26}
27 27
28int test__fdarray__filter(void) 28int test__fdarray__filter(int subtest __maybe_unused)
29{ 29{
30 int nr_fds, expected_fd[2], fd, err = TEST_FAIL; 30 int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
31 struct fdarray *fda = fdarray__new(5, 5); 31 struct fdarray *fda = fdarray__new(5, 5);
@@ -103,7 +103,7 @@ out:
103 return err; 103 return err;
104} 104}
105 105
106int test__fdarray__add(void) 106int test__fdarray__add(int subtest __maybe_unused)
107{ 107{
108 int err = TEST_FAIL; 108 int err = TEST_FAIL;
109 struct fdarray *fda = fdarray__new(2, 2); 109 struct fdarray *fda = fdarray__new(2, 2);
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7ed737019de7..8292948bc5f9 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -686,7 +686,7 @@ out:
686 return err; 686 return err;
687} 687}
688 688
689int test__hists_cumulate(void) 689int test__hists_cumulate(int subtest __maybe_unused)
690{ 690{
691 int err = TEST_FAIL; 691 int err = TEST_FAIL;
692 struct machines machines; 692 struct machines machines;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 818acf875dd0..ccb5b4921f25 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -104,7 +104,7 @@ out:
104 return TEST_FAIL; 104 return TEST_FAIL;
105} 105}
106 106
107int test__hists_filter(void) 107int test__hists_filter(int subtest __maybe_unused)
108{ 108{
109 int err = TEST_FAIL; 109 int err = TEST_FAIL;
110 struct machines machines; 110 struct machines machines;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 8c102b011424..6243e2b2a245 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other)
274 return __validate_link(leader, 0) || __validate_link(other, 1); 274 return __validate_link(leader, 0) || __validate_link(other, 1);
275} 275}
276 276
277int test__hists_link(void) 277int test__hists_link(int subtest __maybe_unused)
278{ 278{
279 int err = -1; 279 int err = -1;
280 struct hists *hists, *first_hists; 280 struct hists *hists, *first_hists;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index adbebc852cc8..248beec1d917 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -576,7 +576,7 @@ out:
576 return err; 576 return err;
577} 577}
578 578
579int test__hists_output(void) 579int test__hists_output(int subtest __maybe_unused)
580{ 580{
581 int err = TEST_FAIL; 581 int err = TEST_FAIL;
582 struct machines machines; 582 struct machines machines;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index a2e2269aa093..a337a6da1f39 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -49,7 +49,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
49 * when an event is disabled but a dummy software event is not disabled. If the 49 * when an event is disabled but a dummy software event is not disabled. If the
50 * test passes %0 is returned, otherwise %-1 is returned. 50 * test passes %0 is returned, otherwise %-1 is returned.
51 */ 51 */
52int test__keep_tracking(void) 52int test__keep_tracking(int subtest __maybe_unused)
53{ 53{
54 struct record_opts opts = { 54 struct record_opts opts = {
55 .mmap_pages = UINT_MAX, 55 .mmap_pages = UINT_MAX,
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 08c433b4bf4f..d2af78193153 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
49#define M(path, c, e) \ 49#define M(path, c, e) \
50 TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) 50 TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
51 51
52int test__kmod_path__parse(void) 52int test__kmod_path__parse(int subtest __maybe_unused)
53{ 53{
54 /* path alloc_name alloc_ext kmod comp name ext */ 54 /* path alloc_name alloc_ext kmod comp name ext */
55 T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); 55 T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index bc4cf507cde5..06f45c1d4256 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -44,13 +44,17 @@ static struct {
44 .source = test_llvm__bpf_test_kbuild_prog, 44 .source = test_llvm__bpf_test_kbuild_prog,
45 .desc = "Test kbuild searching", 45 .desc = "Test kbuild searching",
46 }, 46 },
47 [LLVM_TESTCASE_BPF_PROLOGUE] = {
48 .source = test_llvm__bpf_test_prologue_prog,
49 .desc = "Compile source for BPF prologue generation test",
50 },
47}; 51};
48 52
49 53
50int 54int
51test_llvm__fetch_bpf_obj(void **p_obj_buf, 55test_llvm__fetch_bpf_obj(void **p_obj_buf,
52 size_t *p_obj_buf_sz, 56 size_t *p_obj_buf_sz,
53 enum test_llvm__testcase index, 57 enum test_llvm__testcase idx,
54 bool force) 58 bool force)
55{ 59{
56 const char *source; 60 const char *source;
@@ -59,11 +63,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
59 char *tmpl_new = NULL, *clang_opt_new = NULL; 63 char *tmpl_new = NULL, *clang_opt_new = NULL;
60 int err, old_verbose, ret = TEST_FAIL; 64 int err, old_verbose, ret = TEST_FAIL;
61 65
62 if (index >= __LLVM_TESTCASE_MAX) 66 if (idx >= __LLVM_TESTCASE_MAX)
63 return TEST_FAIL; 67 return TEST_FAIL;
64 68
65 source = bpf_source_table[index].source; 69 source = bpf_source_table[idx].source;
66 desc = bpf_source_table[index].desc; 70 desc = bpf_source_table[idx].desc;
67 71
68 perf_config(perf_config_cb, NULL); 72 perf_config(perf_config_cb, NULL);
69 73
@@ -127,44 +131,39 @@ out:
127 return ret; 131 return ret;
128} 132}
129 133
130int test__llvm(void) 134int test__llvm(int subtest)
131{ 135{
132 enum test_llvm__testcase i; 136 int ret;
137 void *obj_buf = NULL;
138 size_t obj_buf_sz = 0;
133 139
134 for (i = 0; i < __LLVM_TESTCASE_MAX; i++) { 140 if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
135 int ret; 141 return TEST_FAIL;
136 void *obj_buf = NULL;
137 size_t obj_buf_sz = 0;
138 142
139 ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, 143 ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
140 i, false); 144 subtest, false);
141 145
142 if (ret == TEST_OK) { 146 if (ret == TEST_OK) {
143 ret = test__bpf_parsing(obj_buf, obj_buf_sz); 147 ret = test__bpf_parsing(obj_buf, obj_buf_sz);
144 if (ret != TEST_OK) 148 if (ret != TEST_OK) {
145 pr_debug("Failed to parse test case '%s'\n", 149 pr_debug("Failed to parse test case '%s'\n",
146 bpf_source_table[i].desc); 150 bpf_source_table[subtest].desc);
147 }
148 free(obj_buf);
149
150 switch (ret) {
151 case TEST_SKIP:
152 return TEST_SKIP;
153 case TEST_OK:
154 break;
155 default:
156 /*
157 * Test 0 is the basic LLVM test. If test 0
158 * fail, the basic LLVM support not functional
159 * so the whole test should fail. If other test
160 * case fail, it can be fixed by adjusting
161 * config so don't report error.
162 */
163 if (i == 0)
164 return TEST_FAIL;
165 else
166 return TEST_SKIP;
167 } 151 }
168 } 152 }
169 return TEST_OK; 153 free(obj_buf);
154
155 return ret;
156}
157
158int test__llvm_subtest_get_nr(void)
159{
160 return __LLVM_TESTCASE_MAX;
161}
162
163const char *test__llvm_subtest_get_desc(int subtest)
164{
165 if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
166 return NULL;
167
168 return bpf_source_table[subtest].desc;
170} 169}
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
index d91d8f44efee..5150b4d6ef50 100644
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h
@@ -6,10 +6,12 @@
6 6
7extern const char test_llvm__bpf_base_prog[]; 7extern const char test_llvm__bpf_base_prog[];
8extern const char test_llvm__bpf_test_kbuild_prog[]; 8extern const char test_llvm__bpf_test_kbuild_prog[];
9extern const char test_llvm__bpf_test_prologue_prog[];
9 10
10enum test_llvm__testcase { 11enum test_llvm__testcase {
11 LLVM_TESTCASE_BASE, 12 LLVM_TESTCASE_BASE,
12 LLVM_TESTCASE_KBUILD, 13 LLVM_TESTCASE_KBUILD,
14 LLVM_TESTCASE_BPF_PROLOGUE,
13 __LLVM_TESTCASE_MAX, 15 __LLVM_TESTCASE_MAX,
14}; 16};
15 17
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 4495493c9431..359e98fcd94c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -16,7 +16,7 @@
16 * Then it checks if the number of syscalls reported as perf events by 16 * Then it checks if the number of syscalls reported as perf events by
17 * the kernel corresponds to the number of syscalls made. 17 * the kernel corresponds to the number of syscalls made.
18 */ 18 */
19int test__basic_mmap(void) 19int test__basic_mmap(int subtest __maybe_unused)
20{ 20{
21 int err = -1; 21 int err = -1;
22 union perf_event *event; 22 union perf_event *event;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 145050e2e544..6cdb97579c45 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -221,7 +221,7 @@ static int mmap_events(synth_cb synth)
221 * 221 *
222 * by using all thread objects. 222 * by using all thread objects.
223 */ 223 */
224int test__mmap_thread_lookup(void) 224int test__mmap_thread_lookup(int subtest __maybe_unused)
225{ 225{
226 /* perf_event__synthesize_threads synthesize */ 226 /* perf_event__synthesize_threads synthesize */
227 TEST_ASSERT_VAL("failed with sythesizing all", 227 TEST_ASSERT_VAL("failed with sythesizing all",
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 2006485a2859..53c2273e8859 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -7,7 +7,7 @@
7#include "debug.h" 7#include "debug.h"
8#include "stat.h" 8#include "stat.h"
9 9
10int test__openat_syscall_event_on_all_cpus(void) 10int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
11{ 11{
12 int err = -1, fd, cpu; 12 int err = -1, fd, cpu;
13 struct cpu_map *cpus; 13 struct cpu_map *cpus;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 5e811cd8f1c3..eb99a105f31c 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -6,7 +6,7 @@
6#include "tests.h" 6#include "tests.h"
7#include "debug.h" 7#include "debug.h"
8 8
9int test__syscall_openat_tp_fields(void) 9int test__syscall_openat_tp_fields(int subtest __maybe_unused)
10{ 10{
11 struct record_opts opts = { 11 struct record_opts opts = {
12 .target = { 12 .target = {
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 033b54797b8a..1184f9ba6499 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -5,7 +5,7 @@
5#include "debug.h" 5#include "debug.h"
6#include "tests.h" 6#include "tests.h"
7 7
8int test__openat_syscall_event(void) 8int test__openat_syscall_event(int subtest __maybe_unused)
9{ 9{
10 int err = -1, fd; 10 int err = -1, fd;
11 struct perf_evsel *evsel; 11 struct perf_evsel *evsel;
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 636d7b42d844..abe8849d1d70 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1765,7 +1765,7 @@ static void debug_warn(const char *warn, va_list params)
1765 fprintf(stderr, " Warning: %s\n", msg); 1765 fprintf(stderr, " Warning: %s\n", msg);
1766} 1766}
1767 1767
1768int test__parse_events(void) 1768int test__parse_events(int subtest __maybe_unused)
1769{ 1769{
1770 int ret1, ret2 = 0; 1770 int ret1, ret2 = 0;
1771 1771
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 2c63ea658541..294c76b01b41 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -67,7 +67,7 @@ struct test_attr_event {
67 * 67 *
68 * Return: %0 on success, %-1 if the test fails. 68 * Return: %0 on success, %-1 if the test fails.
69 */ 69 */
70int test__parse_no_sample_id_all(void) 70int test__parse_no_sample_id_all(int subtest __maybe_unused)
71{ 71{
72 int err; 72 int err;
73 73
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7a228a2a070b..9d5f0b57c4c1 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -32,7 +32,7 @@ realloc:
32 return cpu; 32 return cpu;
33} 33}
34 34
35int test__PERF_RECORD(void) 35int test__PERF_RECORD(int subtest __maybe_unused)
36{ 36{
37 struct record_opts opts = { 37 struct record_opts opts = {
38 .target = { 38 .target = {
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index faa04e9d5d5f..1e2ba2602930 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void)
133 return &terms; 133 return &terms;
134} 134}
135 135
136int test__pmu(void) 136int test__pmu(int subtest __maybe_unused)
137{ 137{
138 char *format = test_format_dir_get(); 138 char *format = test_format_dir_get();
139 LIST_HEAD(formats); 139 LIST_HEAD(formats);
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 7760277c6def..7a52834ee0d0 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -4,11 +4,12 @@
4 4
5#include <stdio.h> 5#include <stdio.h>
6#include <stdlib.h> 6#include <stdlib.h>
7#include <linux/compiler.h>
7#include "tests.h" 8#include "tests.h"
8 9
9extern int verbose; 10extern int verbose;
10 11
11int test__python_use(void) 12int test__python_use(int subtest __maybe_unused)
12{ 13{
13 char *cmd; 14 char *cmd;
14 int ret; 15 int ret;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 30c02181e78b..5f23710b9fee 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -290,7 +290,7 @@ out_free:
290 * checks sample format bits separately and together. If the test passes %0 is 290 * checks sample format bits separately and together. If the test passes %0 is
291 * returned, otherwise %-1 is returned. 291 * returned, otherwise %-1 is returned.
292 */ 292 */
293int test__sample_parsing(void) 293int test__sample_parsing(int subtest __maybe_unused)
294{ 294{
295 const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; 295 const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
296 u64 sample_type; 296 u64 sample_type;
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 5b83f56a3b6f..36e8ce1550e3 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -122,7 +122,7 @@ out_delete_evlist:
122 return err; 122 return err;
123} 123}
124 124
125int test__sw_clock_freq(void) 125int test__sw_clock_freq(int subtest __maybe_unused)
126{ 126{
127 int ret; 127 int ret;
128 128
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index a02af503100c..dfbd8d69ce89 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -305,7 +305,7 @@ out_free_nodes:
305 * evsel->system_wide and evsel->tracking flags (respectively) with other events 305 * evsel->system_wide and evsel->tracking flags (respectively) with other events
306 * sometimes enabled or disabled. 306 * sometimes enabled or disabled.
307 */ 307 */
308int test__switch_tracking(void) 308int test__switch_tracking(int subtest __maybe_unused)
309{ 309{
310 const char *sched_switch = "sched:sched_switch"; 310 const char *sched_switch = "sched:sched_switch";
311 struct switch_tracking switch_tracking = { .tids = NULL, }; 311 struct switch_tracking switch_tracking = { .tids = NULL, };
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index add16385f13e..2dfff7ac8ef3 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
31 * if the number of exit event reported by the kernel is 1 or not 31 * if the number of exit event reported by the kernel is 1 or not
32 * in order to check the kernel returns correct number of event. 32 * in order to check the kernel returns correct number of event.
33 */ 33 */
34int test__task_exit(void) 34int test__task_exit(int subtest __maybe_unused)
35{ 35{
36 int err = -1; 36 int err = -1;
37 union perf_event *event; 37 union perf_event *event;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 3c8734a3abbc..a0733aaad081 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,8 @@
1#ifndef TESTS_H 1#ifndef TESTS_H
2#define TESTS_H 2#define TESTS_H
3 3
4#include <stdbool.h>
5
4#define TEST_ASSERT_VAL(text, cond) \ 6#define TEST_ASSERT_VAL(text, cond) \
5do { \ 7do { \
6 if (!(cond)) { \ 8 if (!(cond)) { \
@@ -26,48 +28,57 @@ enum {
26 28
27struct test { 29struct test {
28 const char *desc; 30 const char *desc;
29 int (*func)(void); 31 int (*func)(int subtest);
32 struct {
33 bool skip_if_fail;
34 int (*get_nr)(void);
35 const char *(*get_desc)(int subtest);
36 } subtest;
30}; 37};
31 38
32/* Tests */ 39/* Tests */
33int test__vmlinux_matches_kallsyms(void); 40int test__vmlinux_matches_kallsyms(int subtest);
34int test__openat_syscall_event(void); 41int test__openat_syscall_event(int subtest);
35int test__openat_syscall_event_on_all_cpus(void); 42int test__openat_syscall_event_on_all_cpus(int subtest);
36int test__basic_mmap(void); 43int test__basic_mmap(int subtest);
37int test__PERF_RECORD(void); 44int test__PERF_RECORD(int subtest);
38int test__perf_evsel__roundtrip_name_test(void); 45int test__perf_evsel__roundtrip_name_test(int subtest);
39int test__perf_evsel__tp_sched_test(void); 46int test__perf_evsel__tp_sched_test(int subtest);
40int test__syscall_openat_tp_fields(void); 47int test__syscall_openat_tp_fields(int subtest);
41int test__pmu(void); 48int test__pmu(int subtest);
42int test__attr(void); 49int test__attr(int subtest);
43int test__dso_data(void); 50int test__dso_data(int subtest);
44int test__dso_data_cache(void); 51int test__dso_data_cache(int subtest);
45int test__dso_data_reopen(void); 52int test__dso_data_reopen(int subtest);
46int test__parse_events(void); 53int test__parse_events(int subtest);
47int test__hists_link(void); 54int test__hists_link(int subtest);
48int test__python_use(void); 55int test__python_use(int subtest);
49int test__bp_signal(void); 56int test__bp_signal(int subtest);
50int test__bp_signal_overflow(void); 57int test__bp_signal_overflow(int subtest);
51int test__task_exit(void); 58int test__task_exit(int subtest);
52int test__sw_clock_freq(void); 59int test__sw_clock_freq(int subtest);
53int test__code_reading(void); 60int test__code_reading(int subtest);
54int test__sample_parsing(void); 61int test__sample_parsing(int subtest);
55int test__keep_tracking(void); 62int test__keep_tracking(int subtest);
56int test__parse_no_sample_id_all(void); 63int test__parse_no_sample_id_all(int subtest);
57int test__dwarf_unwind(void); 64int test__dwarf_unwind(int subtest);
58int test__hists_filter(void); 65int test__hists_filter(int subtest);
59int test__mmap_thread_lookup(void); 66int test__mmap_thread_lookup(int subtest);
60int test__thread_mg_share(void); 67int test__thread_mg_share(int subtest);
61int test__hists_output(void); 68int test__hists_output(int subtest);
62int test__hists_cumulate(void); 69int test__hists_cumulate(int subtest);
63int test__switch_tracking(void); 70int test__switch_tracking(int subtest);
64int test__fdarray__filter(void); 71int test__fdarray__filter(int subtest);
65int test__fdarray__add(void); 72int test__fdarray__add(int subtest);
66int test__kmod_path__parse(void); 73int test__kmod_path__parse(int subtest);
67int test__thread_map(void); 74int test__thread_map(int subtest);
68int test__llvm(void); 75int test__llvm(int subtest);
69int test__bpf(void); 76const char *test__llvm_subtest_get_desc(int subtest);
70int test_session_topology(void); 77int test__llvm_subtest_get_nr(void);
78int test__bpf(int subtest);
79const char *test__bpf_subtest_get_desc(int subtest);
80int test__bpf_subtest_get_nr(void);
81int test_session_topology(int subtest);
71 82
72#if defined(__arm__) || defined(__aarch64__) 83#if defined(__arm__) || defined(__aarch64__)
73#ifdef HAVE_DWARF_UNWIND_SUPPORT 84#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 138a0e3431fa..2be02d303e82 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -4,7 +4,7 @@
4#include "thread_map.h" 4#include "thread_map.h"
5#include "debug.h" 5#include "debug.h"
6 6
7int test__thread_map(void) 7int test__thread_map(int subtest __maybe_unused)
8{ 8{
9 struct thread_map *map; 9 struct thread_map *map;
10 10
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index 01fabb19d746..188b63140fc8 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -4,7 +4,7 @@
4#include "map.h" 4#include "map.h"
5#include "debug.h" 5#include "debug.h"
6 6
7int test__thread_mg_share(void) 7int test__thread_mg_share(int subtest __maybe_unused)
8{ 8{
9 struct machines machines; 9 struct machines machines;
10 struct machine *machine; 10 struct machine *machine;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index f5bb096c3bd9..98fe69ac553c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
84 return 0; 84 return 0;
85} 85}
86 86
87int test_session_topology(void) 87int test_session_topology(int subtest __maybe_unused)
88{ 88{
89 char path[PATH_MAX]; 89 char path[PATH_MAX];
90 struct cpu_map *map; 90 struct cpu_map *map;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index d677e018e504..f0bfc9e8fd9f 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
18 18
19#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) 19#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
20 20
21int test__vmlinux_matches_kallsyms(void) 21int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
22{ 22{
23 int err = -1; 23 int err = -1;
24 struct rb_node *nd; 24 struct rb_node *nd;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa9eb92c9e24..a211b7b6a81e 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -178,12 +178,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
178 return n; 178 return n;
179} 179}
180 180
181static int callchain_node__count_flat_rows(struct callchain_node *node)
182{
183 struct callchain_list *chain;
184 char folded_sign = 0;
185 int n = 0;
186
187 list_for_each_entry(chain, &node->parent_val, list) {
188 if (!folded_sign) {
189 /* only check first chain list entry */
190 folded_sign = callchain_list__folded(chain);
191 if (folded_sign == '+')
192 return 1;
193 }
194 n++;
195 }
196
197 list_for_each_entry(chain, &node->val, list) {
198 if (!folded_sign) {
199 /* node->parent_val list might be empty */
200 folded_sign = callchain_list__folded(chain);
201 if (folded_sign == '+')
202 return 1;
203 }
204 n++;
205 }
206
207 return n;
208}
209
210static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
211{
212 return 1;
213}
214
181static int callchain_node__count_rows(struct callchain_node *node) 215static int callchain_node__count_rows(struct callchain_node *node)
182{ 216{
183 struct callchain_list *chain; 217 struct callchain_list *chain;
184 bool unfolded = false; 218 bool unfolded = false;
185 int n = 0; 219 int n = 0;
186 220
221 if (callchain_param.mode == CHAIN_FLAT)
222 return callchain_node__count_flat_rows(node);
223 else if (callchain_param.mode == CHAIN_FOLDED)
224 return callchain_node__count_folded_rows(node);
225
187 list_for_each_entry(chain, &node->val, list) { 226 list_for_each_entry(chain, &node->val, list) {
188 ++n; 227 ++n;
189 unfolded = chain->unfolded; 228 unfolded = chain->unfolded;
@@ -263,7 +302,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
263 chain = list_entry(node->val.next, struct callchain_list, list); 302 chain = list_entry(node->val.next, struct callchain_list, list);
264 chain->has_children = has_sibling; 303 chain->has_children = has_sibling;
265 304
266 if (!list_empty(&node->val)) { 305 if (node->val.next != node->val.prev) {
267 chain = list_entry(node->val.prev, struct callchain_list, list); 306 chain = list_entry(node->val.prev, struct callchain_list, list);
268 chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); 307 chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
269 } 308 }
@@ -279,6 +318,9 @@ static void callchain__init_have_children(struct rb_root *root)
279 for (nd = rb_first(root); nd; nd = rb_next(nd)) { 318 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
280 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); 319 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
281 callchain_node__init_have_children(node, has_sibling); 320 callchain_node__init_have_children(node, has_sibling);
321 if (callchain_param.mode == CHAIN_FLAT ||
322 callchain_param.mode == CHAIN_FOLDED)
323 callchain_node__make_parent_list(node);
282 } 324 }
283} 325}
284 326
@@ -574,6 +616,231 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
574 616
575#define LEVEL_OFFSET_STEP 3 617#define LEVEL_OFFSET_STEP 3
576 618
619static int hist_browser__show_callchain_list(struct hist_browser *browser,
620 struct callchain_node *node,
621 struct callchain_list *chain,
622 unsigned short row, u64 total,
623 bool need_percent, int offset,
624 print_callchain_entry_fn print,
625 struct callchain_print_arg *arg)
626{
627 char bf[1024], *alloc_str;
628 const char *str;
629
630 if (arg->row_offset != 0) {
631 arg->row_offset--;
632 return 0;
633 }
634
635 alloc_str = NULL;
636 str = callchain_list__sym_name(chain, bf, sizeof(bf),
637 browser->show_dso);
638
639 if (need_percent) {
640 char buf[64];
641
642 callchain_node__scnprintf_value(node, buf, sizeof(buf),
643 total);
644
645 if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
646 str = "Not enough memory!";
647 else
648 str = alloc_str;
649 }
650
651 print(browser, chain, str, offset, row, arg);
652
653 free(alloc_str);
654 return 1;
655}
656
657static int hist_browser__show_callchain_flat(struct hist_browser *browser,
658 struct rb_root *root,
659 unsigned short row, u64 total,
660 print_callchain_entry_fn print,
661 struct callchain_print_arg *arg,
662 check_output_full_fn is_output_full)
663{
664 struct rb_node *node;
665 int first_row = row, offset = LEVEL_OFFSET_STEP;
666 bool need_percent;
667
668 node = rb_first(root);
669 need_percent = node && rb_next(node);
670
671 while (node) {
672 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
673 struct rb_node *next = rb_next(node);
674 struct callchain_list *chain;
675 char folded_sign = ' ';
676 int first = true;
677 int extra_offset = 0;
678
679 list_for_each_entry(chain, &child->parent_val, list) {
680 bool was_first = first;
681
682 if (first)
683 first = false;
684 else if (need_percent)
685 extra_offset = LEVEL_OFFSET_STEP;
686
687 folded_sign = callchain_list__folded(chain);
688
689 row += hist_browser__show_callchain_list(browser, child,
690 chain, row, total,
691 was_first && need_percent,
692 offset + extra_offset,
693 print, arg);
694
695 if (is_output_full(browser, row))
696 goto out;
697
698 if (folded_sign == '+')
699 goto next;
700 }
701
702 list_for_each_entry(chain, &child->val, list) {
703 bool was_first = first;
704
705 if (first)
706 first = false;
707 else if (need_percent)
708 extra_offset = LEVEL_OFFSET_STEP;
709
710 folded_sign = callchain_list__folded(chain);
711
712 row += hist_browser__show_callchain_list(browser, child,
713 chain, row, total,
714 was_first && need_percent,
715 offset + extra_offset,
716 print, arg);
717
718 if (is_output_full(browser, row))
719 goto out;
720
721 if (folded_sign == '+')
722 break;
723 }
724
725next:
726 if (is_output_full(browser, row))
727 break;
728 node = next;
729 }
730out:
731 return row - first_row;
732}
733
734static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
735 struct callchain_list *chain,
736 char *value_str, char *old_str)
737{
738 char bf[1024];
739 const char *str;
740 char *new;
741
742 str = callchain_list__sym_name(chain, bf, sizeof(bf),
743 browser->show_dso);
744 if (old_str) {
745 if (asprintf(&new, "%s%s%s", old_str,
746 symbol_conf.field_sep ?: ";", str) < 0)
747 new = NULL;
748 } else {
749 if (value_str) {
750 if (asprintf(&new, "%s %s", value_str, str) < 0)
751 new = NULL;
752 } else {
753 if (asprintf(&new, "%s", str) < 0)
754 new = NULL;
755 }
756 }
757 return new;
758}
759
760static int hist_browser__show_callchain_folded(struct hist_browser *browser,
761 struct rb_root *root,
762 unsigned short row, u64 total,
763 print_callchain_entry_fn print,
764 struct callchain_print_arg *arg,
765 check_output_full_fn is_output_full)
766{
767 struct rb_node *node;
768 int first_row = row, offset = LEVEL_OFFSET_STEP;
769 bool need_percent;
770
771 node = rb_first(root);
772 need_percent = node && rb_next(node);
773
774 while (node) {
775 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
776 struct rb_node *next = rb_next(node);
777 struct callchain_list *chain, *first_chain = NULL;
778 int first = true;
779 char *value_str = NULL, *value_str_alloc = NULL;
780 char *chain_str = NULL, *chain_str_alloc = NULL;
781
782 if (arg->row_offset != 0) {
783 arg->row_offset--;
784 goto next;
785 }
786
787 if (need_percent) {
788 char buf[64];
789
790 callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
791 if (asprintf(&value_str, "%s", buf) < 0) {
792 value_str = (char *)"<...>";
793 goto do_print;
794 }
795 value_str_alloc = value_str;
796 }
797
798 list_for_each_entry(chain, &child->parent_val, list) {
799 chain_str = hist_browser__folded_callchain_str(browser,
800 chain, value_str, chain_str);
801 if (first) {
802 first = false;
803 first_chain = chain;
804 }
805
806 if (chain_str == NULL) {
807 chain_str = (char *)"Not enough memory!";
808 goto do_print;
809 }
810
811 chain_str_alloc = chain_str;
812 }
813
814 list_for_each_entry(chain, &child->val, list) {
815 chain_str = hist_browser__folded_callchain_str(browser,
816 chain, value_str, chain_str);
817 if (first) {
818 first = false;
819 first_chain = chain;
820 }
821
822 if (chain_str == NULL) {
823 chain_str = (char *)"Not enough memory!";
824 goto do_print;
825 }
826
827 chain_str_alloc = chain_str;
828 }
829
830do_print:
831 print(browser, first_chain, chain_str, offset, row++, arg);
832 free(value_str_alloc);
833 free(chain_str_alloc);
834
835next:
836 if (is_output_full(browser, row))
837 break;
838 node = next;
839 }
840
841 return row - first_row;
842}
843
577static int hist_browser__show_callchain(struct hist_browser *browser, 844static int hist_browser__show_callchain(struct hist_browser *browser,
578 struct rb_root *root, int level, 845 struct rb_root *root, int level,
579 unsigned short row, u64 total, 846 unsigned short row, u64 total,
@@ -592,15 +859,12 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
592 while (node) { 859 while (node) {
593 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); 860 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
594 struct rb_node *next = rb_next(node); 861 struct rb_node *next = rb_next(node);
595 u64 cumul = callchain_cumul_hits(child);
596 struct callchain_list *chain; 862 struct callchain_list *chain;
597 char folded_sign = ' '; 863 char folded_sign = ' ';
598 int first = true; 864 int first = true;
599 int extra_offset = 0; 865 int extra_offset = 0;
600 866
601 list_for_each_entry(chain, &child->val, list) { 867 list_for_each_entry(chain, &child->val, list) {
602 char bf[1024], *alloc_str;
603 const char *str;
604 bool was_first = first; 868 bool was_first = first;
605 869
606 if (first) 870 if (first)
@@ -609,31 +873,16 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
609 extra_offset = LEVEL_OFFSET_STEP; 873 extra_offset = LEVEL_OFFSET_STEP;
610 874
611 folded_sign = callchain_list__folded(chain); 875 folded_sign = callchain_list__folded(chain);
612 if (arg->row_offset != 0) {
613 arg->row_offset--;
614 goto do_next;
615 }
616
617 alloc_str = NULL;
618 str = callchain_list__sym_name(chain, bf, sizeof(bf),
619 browser->show_dso);
620
621 if (was_first && need_percent) {
622 double percent = cumul * 100.0 / total;
623 876
624 if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) 877 row += hist_browser__show_callchain_list(browser, child,
625 str = "Not enough memory!"; 878 chain, row, total,
626 else 879 was_first && need_percent,
627 str = alloc_str; 880 offset + extra_offset,
628 } 881 print, arg);
629 882
630 print(browser, chain, str, offset + extra_offset, row, arg); 883 if (is_output_full(browser, row))
631
632 free(alloc_str);
633
634 if (is_output_full(browser, ++row))
635 goto out; 884 goto out;
636do_next: 885
637 if (folded_sign == '+') 886 if (folded_sign == '+')
638 break; 887 break;
639 } 888 }
@@ -844,10 +1093,22 @@ static int hist_browser__show_entry(struct hist_browser *browser,
844 total = entry->stat.period; 1093 total = entry->stat.period;
845 } 1094 }
846 1095
847 printed += hist_browser__show_callchain(browser, 1096 if (callchain_param.mode == CHAIN_FLAT) {
1097 printed += hist_browser__show_callchain_flat(browser,
1098 &entry->sorted_chain, row, total,
1099 hist_browser__show_callchain_entry, &arg,
1100 hist_browser__check_output_full);
1101 } else if (callchain_param.mode == CHAIN_FOLDED) {
1102 printed += hist_browser__show_callchain_folded(browser,
1103 &entry->sorted_chain, row, total,
1104 hist_browser__show_callchain_entry, &arg,
1105 hist_browser__check_output_full);
1106 } else {
1107 printed += hist_browser__show_callchain(browser,
848 &entry->sorted_chain, 1, row, total, 1108 &entry->sorted_chain, 1, row, total,
849 hist_browser__show_callchain_entry, &arg, 1109 hist_browser__show_callchain_entry, &arg,
850 hist_browser__check_output_full); 1110 hist_browser__check_output_full);
1111 }
851 1112
852 if (arg.is_current_entry) 1113 if (arg.is_current_entry)
853 browser->he_selection = entry; 1114 browser->he_selection = entry;
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 4b3585eed1e8..467717276ab6 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -89,8 +89,8 @@ void perf_gtk__init_hpp(void)
89 perf_gtk__hpp_color_overhead_acc; 89 perf_gtk__hpp_color_overhead_acc;
90} 90}
91 91
92static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, 92static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
93 GtkTreeIter *parent, int col, u64 total) 93 GtkTreeIter *parent, int col, u64 total)
94{ 94{
95 struct rb_node *nd; 95 struct rb_node *nd;
96 bool has_single_node = (rb_first(root) == rb_last(root)); 96 bool has_single_node = (rb_first(root) == rb_last(root));
@@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
100 struct callchain_list *chain; 100 struct callchain_list *chain;
101 GtkTreeIter iter, new_parent; 101 GtkTreeIter iter, new_parent;
102 bool need_new_parent; 102 bool need_new_parent;
103 double percent;
104 u64 hits, child_total;
105 103
106 node = rb_entry(nd, struct callchain_node, rb_node); 104 node = rb_entry(nd, struct callchain_node, rb_node);
107 105
108 hits = callchain_cumul_hits(node); 106 new_parent = *parent;
109 percent = 100.0 * hits / total; 107 need_new_parent = !has_single_node;
108
109 callchain_node__make_parent_list(node);
110
111 list_for_each_entry(chain, &node->parent_val, list) {
112 char buf[128];
113
114 gtk_tree_store_append(store, &iter, &new_parent);
115
116 callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
117 gtk_tree_store_set(store, &iter, 0, buf, -1);
118
119 callchain_list__sym_name(chain, buf, sizeof(buf), false);
120 gtk_tree_store_set(store, &iter, col, buf, -1);
121
122 if (need_new_parent) {
123 /*
124 * Only show the top-most symbol in a callchain
125 * if it's not the only callchain.
126 */
127 new_parent = iter;
128 need_new_parent = false;
129 }
130 }
131
132 list_for_each_entry(chain, &node->val, list) {
133 char buf[128];
134
135 gtk_tree_store_append(store, &iter, &new_parent);
136
137 callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
138 gtk_tree_store_set(store, &iter, 0, buf, -1);
139
140 callchain_list__sym_name(chain, buf, sizeof(buf), false);
141 gtk_tree_store_set(store, &iter, col, buf, -1);
142
143 if (need_new_parent) {
144 /*
145 * Only show the top-most symbol in a callchain
146 * if it's not the only callchain.
147 */
148 new_parent = iter;
149 need_new_parent = false;
150 }
151 }
152 }
153}
154
155static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
156 GtkTreeIter *parent, int col, u64 total)
157{
158 struct rb_node *nd;
159
160 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
161 struct callchain_node *node;
162 struct callchain_list *chain;
163 GtkTreeIter iter;
164 char buf[64];
165 char *str, *str_alloc = NULL;
166 bool first = true;
167
168 node = rb_entry(nd, struct callchain_node, rb_node);
169
170 callchain_node__make_parent_list(node);
171
172 list_for_each_entry(chain, &node->parent_val, list) {
173 char name[1024];
174
175 callchain_list__sym_name(chain, name, sizeof(name), false);
176
177 if (asprintf(&str, "%s%s%s",
178 first ? "" : str_alloc,
179 first ? "" : symbol_conf.field_sep ?: "; ",
180 name) < 0)
181 return;
182
183 first = false;
184 free(str_alloc);
185 str_alloc = str;
186 }
187
188 list_for_each_entry(chain, &node->val, list) {
189 char name[1024];
190
191 callchain_list__sym_name(chain, name, sizeof(name), false);
192
193 if (asprintf(&str, "%s%s%s",
194 first ? "" : str_alloc,
195 first ? "" : symbol_conf.field_sep ?: "; ",
196 name) < 0)
197 return;
198
199 first = false;
200 free(str_alloc);
201 str_alloc = str;
202 }
203
204 gtk_tree_store_append(store, &iter, parent);
205
206 callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
207 gtk_tree_store_set(store, &iter, 0, buf, -1);
208
209 gtk_tree_store_set(store, &iter, col, str, -1);
210
211 free(str_alloc);
212 }
213}
214
215static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
216 GtkTreeIter *parent, int col, u64 total)
217{
218 struct rb_node *nd;
219 bool has_single_node = (rb_first(root) == rb_last(root));
220
221 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
222 struct callchain_node *node;
223 struct callchain_list *chain;
224 GtkTreeIter iter, new_parent;
225 bool need_new_parent;
226 u64 child_total;
227
228 node = rb_entry(nd, struct callchain_node, rb_node);
110 229
111 new_parent = *parent; 230 new_parent = *parent;
112 need_new_parent = !has_single_node && (node->val_nr > 1); 231 need_new_parent = !has_single_node && (node->val_nr > 1);
@@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
116 235
117 gtk_tree_store_append(store, &iter, &new_parent); 236 gtk_tree_store_append(store, &iter, &new_parent);
118 237
119 scnprintf(buf, sizeof(buf), "%5.2f%%", percent); 238 callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
120 gtk_tree_store_set(store, &iter, 0, buf, -1); 239 gtk_tree_store_set(store, &iter, 0, buf, -1);
121 240
122 callchain_list__sym_name(chain, buf, sizeof(buf), false); 241 callchain_list__sym_name(chain, buf, sizeof(buf), false);
@@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
138 child_total = total; 257 child_total = total;
139 258
140 /* Now 'iter' contains info of the last callchain_list */ 259 /* Now 'iter' contains info of the last callchain_list */
141 perf_gtk__add_callchain(&node->rb_root, store, &iter, col, 260 perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
142 child_total); 261 child_total);
143 } 262 }
144} 263}
145 264
265static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
266 GtkTreeIter *parent, int col, u64 total)
267{
268 if (callchain_param.mode == CHAIN_FLAT)
269 perf_gtk__add_callchain_flat(root, store, parent, col, total);
270 else if (callchain_param.mode == CHAIN_FOLDED)
271 perf_gtk__add_callchain_folded(root, store, parent, col, total);
272 else
273 perf_gtk__add_callchain_graph(root, store, parent, col, total);
274}
275
146static void on_row_activated(GtkTreeView *view, GtkTreePath *path, 276static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
147 GtkTreeViewColumn *col __maybe_unused, 277 GtkTreeViewColumn *col __maybe_unused,
148 gpointer user_data __maybe_unused) 278 gpointer user_data __maybe_unused)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index dfcbc90146ef..7ebc661be267 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
34 return ret; 34 return ret;
35} 35}
36 36
37static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, 37static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
38 struct callchain_list *chain,
38 int depth, int depth_mask, int period, 39 int depth, int depth_mask, int period,
39 u64 total_samples, u64 hits, 40 u64 total_samples, int left_margin)
40 int left_margin)
41{ 41{
42 int i; 42 int i;
43 size_t ret = 0; 43 size_t ret = 0;
@@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
50 else 50 else
51 ret += fprintf(fp, " "); 51 ret += fprintf(fp, " ");
52 if (!period && i == depth - 1) { 52 if (!period && i == depth - 1) {
53 double percent; 53 ret += fprintf(fp, "--");
54 54 ret += callchain_node__fprintf_value(node, fp, total_samples);
55 percent = hits * 100.0 / total_samples; 55 ret += fprintf(fp, "--");
56 ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
57 } else 56 } else
58 ret += fprintf(fp, "%s", " "); 57 ret += fprintf(fp, "%s", " ");
59 } 58 }
@@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
82 int depth_mask, int left_margin) 81 int depth_mask, int left_margin)
83{ 82{
84 struct rb_node *node, *next; 83 struct rb_node *node, *next;
85 struct callchain_node *child; 84 struct callchain_node *child = NULL;
86 struct callchain_list *chain; 85 struct callchain_list *chain;
87 int new_depth_mask = depth_mask; 86 int new_depth_mask = depth_mask;
88 u64 remaining; 87 u64 remaining;
89 size_t ret = 0; 88 size_t ret = 0;
90 int i; 89 int i;
91 uint entries_printed = 0; 90 uint entries_printed = 0;
91 int cumul_count = 0;
92 92
93 remaining = total_samples; 93 remaining = total_samples;
94 94
@@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
100 child = rb_entry(node, struct callchain_node, rb_node); 100 child = rb_entry(node, struct callchain_node, rb_node);
101 cumul = callchain_cumul_hits(child); 101 cumul = callchain_cumul_hits(child);
102 remaining -= cumul; 102 remaining -= cumul;
103 cumul_count += callchain_cumul_counts(child);
103 104
104 /* 105 /*
105 * The depth mask manages the output of pipes that show 106 * The depth mask manages the output of pipes that show
@@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
120 left_margin); 121 left_margin);
121 i = 0; 122 i = 0;
122 list_for_each_entry(chain, &child->val, list) { 123 list_for_each_entry(chain, &child->val, list) {
123 ret += ipchain__fprintf_graph(fp, chain, depth, 124 ret += ipchain__fprintf_graph(fp, child, chain, depth,
124 new_depth_mask, i++, 125 new_depth_mask, i++,
125 total_samples, 126 total_samples,
126 cumul,
127 left_margin); 127 left_margin);
128 } 128 }
129 129
@@ -143,14 +143,23 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
143 143
144 if (callchain_param.mode == CHAIN_GRAPH_REL && 144 if (callchain_param.mode == CHAIN_GRAPH_REL &&
145 remaining && remaining != total_samples) { 145 remaining && remaining != total_samples) {
146 struct callchain_node rem_node = {
147 .hit = remaining,
148 };
146 149
147 if (!rem_sq_bracket) 150 if (!rem_sq_bracket)
148 return ret; 151 return ret;
149 152
153 if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
154 rem_node.count = child->parent->children_count - cumul_count;
155 if (rem_node.count <= 0)
156 return ret;
157 }
158
150 new_depth_mask &= ~(1 << (depth - 1)); 159 new_depth_mask &= ~(1 << (depth - 1));
151 ret += ipchain__fprintf_graph(fp, &rem_hits, depth, 160 ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
152 new_depth_mask, 0, total_samples, 161 new_depth_mask, 0, total_samples,
153 remaining, left_margin); 162 left_margin);
154 } 163 }
155 164
156 return ret; 165 return ret;
@@ -243,12 +252,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
243 struct rb_node *rb_node = rb_first(tree); 252 struct rb_node *rb_node = rb_first(tree);
244 253
245 while (rb_node) { 254 while (rb_node) {
246 double percent;
247
248 chain = rb_entry(rb_node, struct callchain_node, rb_node); 255 chain = rb_entry(rb_node, struct callchain_node, rb_node);
249 percent = chain->hit * 100.0 / total_samples;
250 256
251 ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); 257 ret += fprintf(fp, " ");
258 ret += callchain_node__fprintf_value(chain, fp, total_samples);
259 ret += fprintf(fp, "\n");
252 ret += __callchain__fprintf_flat(fp, chain, total_samples); 260 ret += __callchain__fprintf_flat(fp, chain, total_samples);
253 ret += fprintf(fp, "\n"); 261 ret += fprintf(fp, "\n");
254 if (++entries_printed == callchain_param.print_limit) 262 if (++entries_printed == callchain_param.print_limit)
@@ -260,6 +268,57 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
260 return ret; 268 return ret;
261} 269}
262 270
271static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
272{
273 const char *sep = symbol_conf.field_sep ?: ";";
274 struct callchain_list *chain;
275 size_t ret = 0;
276 char bf[1024];
277 bool first;
278
279 if (!node)
280 return 0;
281
282 ret += __callchain__fprintf_folded(fp, node->parent);
283
284 first = (ret == 0);
285 list_for_each_entry(chain, &node->val, list) {
286 if (chain->ip >= PERF_CONTEXT_MAX)
287 continue;
288 ret += fprintf(fp, "%s%s", first ? "" : sep,
289 callchain_list__sym_name(chain,
290 bf, sizeof(bf), false));
291 first = false;
292 }
293
294 return ret;
295}
296
297static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
298 u64 total_samples)
299{
300 size_t ret = 0;
301 u32 entries_printed = 0;
302 struct callchain_node *chain;
303 struct rb_node *rb_node = rb_first(tree);
304
305 while (rb_node) {
306
307 chain = rb_entry(rb_node, struct callchain_node, rb_node);
308
309 ret += callchain_node__fprintf_value(chain, fp, total_samples);
310 ret += fprintf(fp, " ");
311 ret += __callchain__fprintf_folded(fp, chain);
312 ret += fprintf(fp, "\n");
313 if (++entries_printed == callchain_param.print_limit)
314 break;
315
316 rb_node = rb_next(rb_node);
317 }
318
319 return ret;
320}
321
263static size_t hist_entry_callchain__fprintf(struct hist_entry *he, 322static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
264 u64 total_samples, int left_margin, 323 u64 total_samples, int left_margin,
265 FILE *fp) 324 FILE *fp)
@@ -278,6 +337,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
278 case CHAIN_FLAT: 337 case CHAIN_FLAT:
279 return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); 338 return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
280 break; 339 break;
340 case CHAIN_FOLDED:
341 return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
342 break;
281 case CHAIN_NONE: 343 case CHAIN_NONE:
282 break; 344 break;
283 default: 345 default:
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 591b3fe3ed49..0513dd525d87 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -21,6 +21,7 @@ libperf-y += parse-events.o
21libperf-y += perf_regs.o 21libperf-y += perf_regs.o
22libperf-y += path.o 22libperf-y += path.o
23libperf-y += rbtree.o 23libperf-y += rbtree.o
24libperf-y += libstring.o
24libperf-y += bitmap.o 25libperf-y += bitmap.o
25libperf-y += hweight.o 26libperf-y += hweight.o
26libperf-y += run-command.o 27libperf-y += run-command.o
@@ -88,6 +89,7 @@ libperf-y += parse-branch-options.o
88libperf-y += parse-regs-options.o 89libperf-y += parse-regs-options.o
89 90
90libperf-$(CONFIG_LIBBPF) += bpf-loader.o 91libperf-$(CONFIG_LIBBPF) += bpf-loader.o
92libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
91libperf-$(CONFIG_LIBELF) += symbol-elf.o 93libperf-$(CONFIG_LIBELF) += symbol-elf.o
92libperf-$(CONFIG_LIBELF) += probe-file.o 94libperf-$(CONFIG_LIBELF) += probe-file.o
93libperf-$(CONFIG_LIBELF) += probe-event.o 95libperf-$(CONFIG_LIBELF) += probe-event.o
@@ -138,6 +140,7 @@ $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
138 140
139CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 141CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
140CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 142CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
143CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
141CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 144CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
142CFLAGS_parse-events.o += -Wno-redundant-decls 145CFLAGS_parse-events.o += -Wno-redundant-decls
143 146
@@ -153,6 +156,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
153 $(call rule_mkdir) 156 $(call rule_mkdir)
154 $(call if_changed_dep,cc_o_c) 157 $(call if_changed_dep,cc_o_c)
155 158
159$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
160 $(call rule_mkdir)
161 $(call if_changed_dep,cc_o_c)
162
156$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE 163$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
157 $(call rule_mkdir) 164 $(call rule_mkdir)
158 $(call if_changed_dep,cc_o_c) 165 $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 4c50411371db..36544e5ece43 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -5,11 +5,15 @@
5 * Copyright (C) 2015 Huawei Inc. 5 * Copyright (C) 2015 Huawei Inc.
6 */ 6 */
7 7
8#include <linux/bpf.h>
8#include <bpf/libbpf.h> 9#include <bpf/libbpf.h>
9#include <linux/err.h> 10#include <linux/err.h>
11#include <linux/string.h>
10#include "perf.h" 12#include "perf.h"
11#include "debug.h" 13#include "debug.h"
12#include "bpf-loader.h" 14#include "bpf-loader.h"
15#include "bpf-prologue.h"
16#include "llvm-utils.h"
13#include "probe-event.h" 17#include "probe-event.h"
14#include "probe-finder.h" // for MAX_PROBES 18#include "probe-finder.h" // for MAX_PROBES
15#include "llvm-utils.h" 19#include "llvm-utils.h"
@@ -32,6 +36,10 @@ DEFINE_PRINT_FN(debug, 1)
32 36
33struct bpf_prog_priv { 37struct bpf_prog_priv {
34 struct perf_probe_event pev; 38 struct perf_probe_event pev;
39 bool need_prologue;
40 struct bpf_insn *insns_buf;
41 int nr_types;
42 int *type_mapping;
35}; 43};
36 44
37static bool libbpf_initialized; 45static bool libbpf_initialized;
@@ -106,10 +114,179 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
106 struct bpf_prog_priv *priv = _priv; 114 struct bpf_prog_priv *priv = _priv;
107 115
108 cleanup_perf_probe_events(&priv->pev, 1); 116 cleanup_perf_probe_events(&priv->pev, 1);
117 zfree(&priv->insns_buf);
118 zfree(&priv->type_mapping);
109 free(priv); 119 free(priv);
110} 120}
111 121
112static int 122static int
123config__exec(const char *value, struct perf_probe_event *pev)
124{
125 pev->uprobes = true;
126 pev->target = strdup(value);
127 if (!pev->target)
128 return -ENOMEM;
129 return 0;
130}
131
132static int
133config__module(const char *value, struct perf_probe_event *pev)
134{
135 pev->uprobes = false;
136 pev->target = strdup(value);
137 if (!pev->target)
138 return -ENOMEM;
139 return 0;
140}
141
142static int
143config__bool(const char *value,
144 bool *pbool, bool invert)
145{
146 int err;
147 bool bool_value;
148
149 if (!pbool)
150 return -EINVAL;
151
152 err = strtobool(value, &bool_value);
153 if (err)
154 return err;
155
156 *pbool = invert ? !bool_value : bool_value;
157 return 0;
158}
159
160static int
161config__inlines(const char *value,
162 struct perf_probe_event *pev __maybe_unused)
163{
164 return config__bool(value, &probe_conf.no_inlines, true);
165}
166
167static int
168config__force(const char *value,
169 struct perf_probe_event *pev __maybe_unused)
170{
171 return config__bool(value, &probe_conf.force_add, false);
172}
173
174static struct {
175 const char *key;
176 const char *usage;
177 const char *desc;
178 int (*func)(const char *, struct perf_probe_event *);
179} bpf_config_terms[] = {
180 {
181 .key = "exec",
182 .usage = "exec=<full path of file>",
183 .desc = "Set uprobe target",
184 .func = config__exec,
185 },
186 {
187 .key = "module",
188 .usage = "module=<module name> ",
189 .desc = "Set kprobe module",
190 .func = config__module,
191 },
192 {
193 .key = "inlines",
194 .usage = "inlines=[yes|no] ",
195 .desc = "Probe at inline symbol",
196 .func = config__inlines,
197 },
198 {
199 .key = "force",
200 .usage = "force=[yes|no] ",
201 .desc = "Forcibly add events with existing name",
202 .func = config__force,
203 },
204};
205
206static int
207do_config(const char *key, const char *value,
208 struct perf_probe_event *pev)
209{
210 unsigned int i;
211
212 pr_debug("config bpf program: %s=%s\n", key, value);
213 for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
214 if (strcmp(key, bpf_config_terms[i].key) == 0)
215 return bpf_config_terms[i].func(value, pev);
216
217 pr_debug("BPF: ERROR: invalid config option in object: %s=%s\n",
218 key, value);
219
220 pr_debug("\nHint: Currently valid options are:\n");
221 for (i = 0; i < ARRAY_SIZE(bpf_config_terms); i++)
222 pr_debug("\t%s:\t%s\n", bpf_config_terms[i].usage,
223 bpf_config_terms[i].desc);
224 pr_debug("\n");
225
226 return -BPF_LOADER_ERRNO__CONFIG_TERM;
227}
228
229static const char *
230parse_config_kvpair(const char *config_str, struct perf_probe_event *pev)
231{
232 char *text = strdup(config_str);
233 char *sep, *line;
234 const char *main_str = NULL;
235 int err = 0;
236
237 if (!text) {
238 pr_debug("No enough memory: dup config_str failed\n");
239 return ERR_PTR(-ENOMEM);
240 }
241
242 line = text;
243 while ((sep = strchr(line, ';'))) {
244 char *equ;
245
246 *sep = '\0';
247 equ = strchr(line, '=');
248 if (!equ) {
249 pr_warning("WARNING: invalid config in BPF object: %s\n",
250 line);
251 pr_warning("\tShould be 'key=value'.\n");
252 goto nextline;
253 }
254 *equ = '\0';
255
256 err = do_config(line, equ + 1, pev);
257 if (err)
258 break;
259nextline:
260 line = sep + 1;
261 }
262
263 if (!err)
264 main_str = config_str + (line - text);
265 free(text);
266
267 return err ? ERR_PTR(err) : main_str;
268}
269
270static int
271parse_config(const char *config_str, struct perf_probe_event *pev)
272{
273 int err;
274 const char *main_str = parse_config_kvpair(config_str, pev);
275
276 if (IS_ERR(main_str))
277 return PTR_ERR(main_str);
278
279 err = parse_perf_probe_command(main_str, pev);
280 if (err < 0) {
281 pr_debug("bpf: '%s' is not a valid config string\n",
282 config_str);
283 /* parse failed, don't need clear pev. */
284 return -BPF_LOADER_ERRNO__CONFIG;
285 }
286 return 0;
287}
288
289static int
113config_bpf_program(struct bpf_program *prog) 290config_bpf_program(struct bpf_program *prog)
114{ 291{
115 struct perf_probe_event *pev = NULL; 292 struct perf_probe_event *pev = NULL;
@@ -117,6 +294,10 @@ config_bpf_program(struct bpf_program *prog)
117 const char *config_str; 294 const char *config_str;
118 int err; 295 int err;
119 296
297 /* Initialize per-program probing setting */
298 probe_conf.no_inlines = false;
299 probe_conf.force_add = false;
300
120 config_str = bpf_program__title(prog, false); 301 config_str = bpf_program__title(prog, false);
121 if (IS_ERR(config_str)) { 302 if (IS_ERR(config_str)) {
122 pr_debug("bpf: unable to get title for program\n"); 303 pr_debug("bpf: unable to get title for program\n");
@@ -131,13 +312,9 @@ config_bpf_program(struct bpf_program *prog)
131 pev = &priv->pev; 312 pev = &priv->pev;
132 313
133 pr_debug("bpf: config program '%s'\n", config_str); 314 pr_debug("bpf: config program '%s'\n", config_str);
134 err = parse_perf_probe_command(config_str, pev); 315 err = parse_config(config_str, pev);
135 if (err < 0) { 316 if (err)
136 pr_debug("bpf: '%s' is not a valid config string\n",
137 config_str);
138 err = -BPF_LOADER_ERRNO__CONFIG;
139 goto errout; 317 goto errout;
140 }
141 318
142 if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { 319 if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
143 pr_debug("bpf: '%s': group for event is set and not '%s'.\n", 320 pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
@@ -197,6 +374,220 @@ static int bpf__prepare_probe(void)
197 return err; 374 return err;
198} 375}
199 376
377static int
378preproc_gen_prologue(struct bpf_program *prog, int n,
379 struct bpf_insn *orig_insns, int orig_insns_cnt,
380 struct bpf_prog_prep_result *res)
381{
382 struct probe_trace_event *tev;
383 struct perf_probe_event *pev;
384 struct bpf_prog_priv *priv;
385 struct bpf_insn *buf;
386 size_t prologue_cnt = 0;
387 int i, err;
388
389 err = bpf_program__get_private(prog, (void **)&priv);
390 if (err || !priv)
391 goto errout;
392
393 pev = &priv->pev;
394
395 if (n < 0 || n >= priv->nr_types)
396 goto errout;
397
398 /* Find a tev belongs to that type */
399 for (i = 0; i < pev->ntevs; i++) {
400 if (priv->type_mapping[i] == n)
401 break;
402 }
403
404 if (i >= pev->ntevs) {
405 pr_debug("Internal error: prologue type %d not found\n", n);
406 return -BPF_LOADER_ERRNO__PROLOGUE;
407 }
408
409 tev = &pev->tevs[i];
410
411 buf = priv->insns_buf;
412 err = bpf__gen_prologue(tev->args, tev->nargs,
413 buf, &prologue_cnt,
414 BPF_MAXINSNS - orig_insns_cnt);
415 if (err) {
416 const char *title;
417
418 title = bpf_program__title(prog, false);
419 if (!title)
420 title = "[unknown]";
421
422 pr_debug("Failed to generate prologue for program %s\n",
423 title);
424 return err;
425 }
426
427 memcpy(&buf[prologue_cnt], orig_insns,
428 sizeof(struct bpf_insn) * orig_insns_cnt);
429
430 res->new_insn_ptr = buf;
431 res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
432 res->pfd = NULL;
433 return 0;
434
435errout:
436 pr_debug("Internal error in preproc_gen_prologue\n");
437 return -BPF_LOADER_ERRNO__PROLOGUE;
438}
439
440/*
441 * compare_tev_args is reflexive, transitive and antisymmetric.
442 * I can proof it but this margin is too narrow to contain.
443 */
444static int compare_tev_args(const void *ptev1, const void *ptev2)
445{
446 int i, ret;
447 const struct probe_trace_event *tev1 =
448 *(const struct probe_trace_event **)ptev1;
449 const struct probe_trace_event *tev2 =
450 *(const struct probe_trace_event **)ptev2;
451
452 ret = tev2->nargs - tev1->nargs;
453 if (ret)
454 return ret;
455
456 for (i = 0; i < tev1->nargs; i++) {
457 struct probe_trace_arg *arg1, *arg2;
458 struct probe_trace_arg_ref *ref1, *ref2;
459
460 arg1 = &tev1->args[i];
461 arg2 = &tev2->args[i];
462
463 ret = strcmp(arg1->value, arg2->value);
464 if (ret)
465 return ret;
466
467 ref1 = arg1->ref;
468 ref2 = arg2->ref;
469
470 while (ref1 && ref2) {
471 ret = ref2->offset - ref1->offset;
472 if (ret)
473 return ret;
474
475 ref1 = ref1->next;
476 ref2 = ref2->next;
477 }
478
479 if (ref1 || ref2)
480 return ref2 ? 1 : -1;
481 }
482
483 return 0;
484}
485
486/*
487 * Assign a type number to each tevs in a pev.
488 * mapping is an array with same slots as tevs in that pev.
489 * nr_types will be set to number of types.
490 */
491static int map_prologue(struct perf_probe_event *pev, int *mapping,
492 int *nr_types)
493{
494 int i, type = 0;
495 struct probe_trace_event **ptevs;
496
497 size_t array_sz = sizeof(*ptevs) * pev->ntevs;
498
499 ptevs = malloc(array_sz);
500 if (!ptevs) {
501 pr_debug("No ehough memory: alloc ptevs failed\n");
502 return -ENOMEM;
503 }
504
505 pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
506 for (i = 0; i < pev->ntevs; i++)
507 ptevs[i] = &pev->tevs[i];
508
509 qsort(ptevs, pev->ntevs, sizeof(*ptevs),
510 compare_tev_args);
511
512 for (i = 0; i < pev->ntevs; i++) {
513 int n;
514
515 n = ptevs[i] - pev->tevs;
516 if (i == 0) {
517 mapping[n] = type;
518 pr_debug("mapping[%d]=%d\n", n, type);
519 continue;
520 }
521
522 if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
523 mapping[n] = type;
524 else
525 mapping[n] = ++type;
526
527 pr_debug("mapping[%d]=%d\n", n, mapping[n]);
528 }
529 free(ptevs);
530 *nr_types = type + 1;
531
532 return 0;
533}
534
535static int hook_load_preprocessor(struct bpf_program *prog)
536{
537 struct perf_probe_event *pev;
538 struct bpf_prog_priv *priv;
539 bool need_prologue = false;
540 int err, i;
541
542 err = bpf_program__get_private(prog, (void **)&priv);
543 if (err || !priv) {
544 pr_debug("Internal error when hook preprocessor\n");
545 return -BPF_LOADER_ERRNO__INTERNAL;
546 }
547
548 pev = &priv->pev;
549 for (i = 0; i < pev->ntevs; i++) {
550 struct probe_trace_event *tev = &pev->tevs[i];
551
552 if (tev->nargs > 0) {
553 need_prologue = true;
554 break;
555 }
556 }
557
558 /*
559 * Since all tevs don't have argument, we don't need generate
560 * prologue.
561 */
562 if (!need_prologue) {
563 priv->need_prologue = false;
564 return 0;
565 }
566
567 priv->need_prologue = true;
568 priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
569 if (!priv->insns_buf) {
570 pr_debug("No enough memory: alloc insns_buf failed\n");
571 return -ENOMEM;
572 }
573
574 priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
575 if (!priv->type_mapping) {
576 pr_debug("No enough memory: alloc type_mapping failed\n");
577 return -ENOMEM;
578 }
579 memset(priv->type_mapping, -1,
580 sizeof(int) * pev->ntevs);
581
582 err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
583 if (err)
584 return err;
585
586 err = bpf_program__set_prep(prog, priv->nr_types,
587 preproc_gen_prologue);
588 return err;
589}
590
200int bpf__probe(struct bpf_object *obj) 591int bpf__probe(struct bpf_object *obj)
201{ 592{
202 int err = 0; 593 int err = 0;
@@ -231,6 +622,18 @@ int bpf__probe(struct bpf_object *obj)
231 pr_debug("bpf_probe: failed to apply perf probe events"); 622 pr_debug("bpf_probe: failed to apply perf probe events");
232 goto out; 623 goto out;
233 } 624 }
625
626 /*
627 * After probing, let's consider prologue, which
628 * adds program fetcher to BPF programs.
629 *
630 * hook_load_preprocessorr() hooks pre-processor
631 * to bpf_program, let it generate prologue
632 * dynamically during loading.
633 */
634 err = hook_load_preprocessor(prog);
635 if (err)
636 goto out;
234 } 637 }
235out: 638out:
236 return err < 0 ? err : 0; 639 return err < 0 ? err : 0;
@@ -314,7 +717,14 @@ int bpf__foreach_tev(struct bpf_object *obj,
314 for (i = 0; i < pev->ntevs; i++) { 717 for (i = 0; i < pev->ntevs; i++) {
315 tev = &pev->tevs[i]; 718 tev = &pev->tevs[i];
316 719
317 fd = bpf_program__fd(prog); 720 if (priv->need_prologue) {
721 int type = priv->type_mapping[i];
722
723 fd = bpf_program__nth_fd(prog, type);
724 } else {
725 fd = bpf_program__fd(prog);
726 }
727
318 if (fd < 0) { 728 if (fd < 0) {
319 pr_debug("bpf: failed to get file descriptor\n"); 729 pr_debug("bpf: failed to get file descriptor\n");
320 return fd; 730 return fd;
@@ -340,6 +750,10 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
340 [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string", 750 [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
341 [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error", 751 [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
342 [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet", 752 [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
753 [ERRCODE_OFFSET(CONFIG_TERM)] = "Invalid config term in config string",
754 [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
755 [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
756 [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
343}; 757};
344 758
345static int 759static int
@@ -420,7 +834,11 @@ int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
420 int err, char *buf, size_t size) 834 int err, char *buf, size_t size)
421{ 835{
422 bpf__strerror_head(err, buf, size); 836 bpf__strerror_head(err, buf, size);
423 bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'"); 837 case BPF_LOADER_ERRNO__CONFIG_TERM: {
838 scnprintf(buf, size, "%s (add -v to see detail)", emsg);
839 break;
840 }
841 bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
424 bpf__strerror_entry(EACCES, "You need to be root"); 842 bpf__strerror_entry(EACCES, "You need to be root");
425 bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0"); 843 bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
426 bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file"); 844 bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 9caf3ae4acf3..a58740b0f31e 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -20,6 +20,10 @@ enum bpf_loader_errno {
20 BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */ 20 BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */
21 BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */ 21 BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */
22 BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */ 22 BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */
23 BPF_LOADER_ERRNO__CONFIG_TERM, /* Invalid config term in config term */
24 BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
25 BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
26 BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
23 __BPF_LOADER_ERRNO__END, 27 __BPF_LOADER_ERRNO__END,
24}; 28};
25 29
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000000000000..6cdbee119ceb
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,455 @@
1/*
2 * bpf-prologue.c
3 *
4 * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
5 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
6 * Copyright (C) 2015 Huawei Inc.
7 */
8
9#include <bpf/libbpf.h>
10#include "perf.h"
11#include "debug.h"
12#include "bpf-loader.h"
13#include "bpf-prologue.h"
14#include "probe-finder.h"
15#include <dwarf-regs.h>
16#include <linux/filter.h>
17
18#define BPF_REG_SIZE 8
19
20#define JMP_TO_ERROR_CODE -1
21#define JMP_TO_SUCCESS_CODE -2
22#define JMP_TO_USER_CODE -3
23
24struct bpf_insn_pos {
25 struct bpf_insn *begin;
26 struct bpf_insn *end;
27 struct bpf_insn *pos;
28};
29
30static inline int
31pos_get_cnt(struct bpf_insn_pos *pos)
32{
33 return pos->pos - pos->begin;
34}
35
36static int
37append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
38{
39 if (!pos->pos)
40 return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
41
42 if (pos->pos + 1 >= pos->end) {
43 pr_err("bpf prologue: prologue too long\n");
44 pos->pos = NULL;
45 return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
46 }
47
48 *(pos->pos)++ = new_insn;
49 return 0;
50}
51
52static int
53check_pos(struct bpf_insn_pos *pos)
54{
55 if (!pos->pos || pos->pos >= pos->end)
56 return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
57 return 0;
58}
59
60/* Give it a shorter name */
61#define ins(i, p) append_insn((i), (p))
62
63/*
64 * Give a register name (in 'reg'), generate instruction to
65 * load register into an eBPF register rd:
66 * 'ldd target_reg, offset(ctx_reg)', where:
67 * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
68 */
69static int
70gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
71 const char *reg, int target_reg)
72{
73 int offset = regs_query_register_offset(reg);
74
75 if (offset < 0) {
76 pr_err("bpf: prologue: failed to get register %s\n",
77 reg);
78 return offset;
79 }
80 ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
81
82 return check_pos(pos);
83}
84
85/*
86 * Generate a BPF_FUNC_probe_read function call.
87 *
88 * src_base_addr_reg is a register holding base address,
89 * dst_addr_reg is a register holding dest address (on stack),
90 * result is:
91 *
92 * *[dst_addr_reg] = *([src_base_addr_reg] + offset)
93 *
94 * Arguments of BPF_FUNC_probe_read:
95 * ARG1: ptr to stack (dest)
96 * ARG2: size (8)
97 * ARG3: unsafe ptr (src)
98 */
99static int
100gen_read_mem(struct bpf_insn_pos *pos,
101 int src_base_addr_reg,
102 int dst_addr_reg,
103 long offset)
104{
105 /* mov arg3, src_base_addr_reg */
106 if (src_base_addr_reg != BPF_REG_ARG3)
107 ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
108 /* add arg3, #offset */
109 if (offset)
110 ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
111
112 /* mov arg2, #reg_size */
113 ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
114
115 /* mov arg1, dst_addr_reg */
116 if (dst_addr_reg != BPF_REG_ARG1)
117 ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
118
119 /* Call probe_read */
120 ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
121 /*
122 * Error processing: if read fail, goto error code,
123 * will be relocated. Target should be the start of
124 * error processing code.
125 */
126 ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
127 pos);
128
129 return check_pos(pos);
130}
131
132/*
133 * Each arg should be bare register. Fetch and save them into argument
134 * registers (r3 - r5).
135 *
136 * BPF_REG_1 should have been initialized with pointer to
137 * 'struct pt_regs'.
138 */
139static int
140gen_prologue_fastpath(struct bpf_insn_pos *pos,
141 struct probe_trace_arg *args, int nargs)
142{
143 int i, err = 0;
144
145 for (i = 0; i < nargs; i++) {
146 err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
147 BPF_PROLOGUE_START_ARG_REG + i);
148 if (err)
149 goto errout;
150 }
151
152 return check_pos(pos);
153errout:
154 return err;
155}
156
157/*
158 * Slow path:
159 * At least one argument has the form of 'offset($rx)'.
160 *
161 * Following code first stores them into stack, then loads all of then
162 * to r2 - r5.
163 * Before final loading, the final result should be:
164 *
165 * low address
166 * BPF_REG_FP - 24 ARG3
167 * BPF_REG_FP - 16 ARG2
168 * BPF_REG_FP - 8 ARG1
169 * BPF_REG_FP
170 * high address
171 *
172 * For each argument (described as: offn(...off2(off1(reg)))),
173 * generates following code:
174 *
175 * r7 <- fp
176 * r7 <- r7 - stack_offset // Ideal code should initialize r7 using
177 * // fp before generating args. However,
178 * // eBPF won't regard r7 as stack pointer
179 * // if it is generated by minus 8 from
180 * // another stack pointer except fp.
181 * // This is why we have to set r7
182 * // to fp for each variable.
183 * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
184 * (r7) <- r3 // skip following instructions for bare reg
185 * r3 <- r3 + off1 . // skip if off1 == 0
186 * r2 <- 8 \
187 * r1 <- r7 |-> generated by gen_read_mem()
188 * call probe_read /
189 * jnei r0, 0, err ./
190 * r3 <- (r7)
191 * r3 <- r3 + off2 . // skip if off2 == 0
192 * r2 <- 8 \ // r2 may be broken by probe_read, so set again
193 * r1 <- r7 |-> generated by gen_read_mem()
194 * call probe_read /
195 * jnei r0, 0, err ./
196 * ...
197 */
198static int
199gen_prologue_slowpath(struct bpf_insn_pos *pos,
200 struct probe_trace_arg *args, int nargs)
201{
202 int err, i;
203
204 for (i = 0; i < nargs; i++) {
205 struct probe_trace_arg *arg = &args[i];
206 const char *reg = arg->value;
207 struct probe_trace_arg_ref *ref = NULL;
208 int stack_offset = (i + 1) * -8;
209
210 pr_debug("prologue: fetch arg %d, base reg is %s\n",
211 i, reg);
212
213 /* value of base register is stored into ARG3 */
214 err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
215 BPF_REG_ARG3);
216 if (err) {
217 pr_err("prologue: failed to get offset of register %s\n",
218 reg);
219 goto errout;
220 }
221
222 /* Make r7 the stack pointer. */
223 ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
224 /* r7 += -8 */
225 ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
226 /*
227 * Store r3 (base register) onto stack
228 * Ensure fp[offset] is set.
229 * fp is the only valid base register when storing
230 * into stack. We are not allowed to use r7 as base
231 * register here.
232 */
233 ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
234 stack_offset), pos);
235
236 ref = arg->ref;
237 while (ref) {
238 pr_debug("prologue: arg %d: offset %ld\n",
239 i, ref->offset);
240 err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
241 ref->offset);
242 if (err) {
243 pr_err("prologue: failed to generate probe_read function call\n");
244 goto errout;
245 }
246
247 ref = ref->next;
248 /*
249 * Load previous result into ARG3. Use
250 * BPF_REG_FP instead of r7 because verifier
251 * allows FP based addressing only.
252 */
253 if (ref)
254 ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
255 BPF_REG_FP, stack_offset), pos);
256 }
257 }
258
259 /* Final pass: read to registers */
260 for (i = 0; i < nargs; i++)
261 ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i,
262 BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
263
264 ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
265
266 return check_pos(pos);
267errout:
268 return err;
269}
270
271static int
272prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
273 struct bpf_insn *success_code, struct bpf_insn *user_code)
274{
275 struct bpf_insn *insn;
276
277 if (check_pos(pos))
278 return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
279
280 for (insn = pos->begin; insn < pos->pos; insn++) {
281 struct bpf_insn *target;
282 u8 class = BPF_CLASS(insn->code);
283 u8 opcode;
284
285 if (class != BPF_JMP)
286 continue;
287 opcode = BPF_OP(insn->code);
288 if (opcode == BPF_CALL)
289 continue;
290
291 switch (insn->off) {
292 case JMP_TO_ERROR_CODE:
293 target = error_code;
294 break;
295 case JMP_TO_SUCCESS_CODE:
296 target = success_code;
297 break;
298 case JMP_TO_USER_CODE:
299 target = user_code;
300 break;
301 default:
302 pr_err("bpf prologue: internal error: relocation failed\n");
303 return -BPF_LOADER_ERRNO__PROLOGUE;
304 }
305
306 insn->off = target - (insn + 1);
307 }
308 return 0;
309}
310
311int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
312 struct bpf_insn *new_prog, size_t *new_cnt,
313 size_t cnt_space)
314{
315 struct bpf_insn *success_code = NULL;
316 struct bpf_insn *error_code = NULL;
317 struct bpf_insn *user_code = NULL;
318 struct bpf_insn_pos pos;
319 bool fastpath = true;
320 int err = 0, i;
321
322 if (!new_prog || !new_cnt)
323 return -EINVAL;
324
325 if (cnt_space > BPF_MAXINSNS)
326 cnt_space = BPF_MAXINSNS;
327
328 pos.begin = new_prog;
329 pos.end = new_prog + cnt_space;
330 pos.pos = new_prog;
331
332 if (!nargs) {
333 ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
334 &pos);
335
336 if (check_pos(&pos))
337 goto errout;
338
339 *new_cnt = pos_get_cnt(&pos);
340 return 0;
341 }
342
343 if (nargs > BPF_PROLOGUE_MAX_ARGS) {
344 pr_warning("bpf: prologue: %d arguments are dropped\n",
345 nargs - BPF_PROLOGUE_MAX_ARGS);
346 nargs = BPF_PROLOGUE_MAX_ARGS;
347 }
348
349 /* First pass: validation */
350 for (i = 0; i < nargs; i++) {
351 struct probe_trace_arg_ref *ref = args[i].ref;
352
353 if (args[i].value[0] == '@') {
354 /* TODO: fetch global variable */
355 pr_err("bpf: prologue: global %s%+ld not support\n",
356 args[i].value, ref ? ref->offset : 0);
357 return -ENOTSUP;
358 }
359
360 while (ref) {
361 /* fastpath is true if all args has ref == NULL */
362 fastpath = false;
363
364 /*
365 * Instruction encodes immediate value using
366 * s32, ref->offset is long. On systems which
367 * can't fill long in s32, refuse to process if
368 * ref->offset too large (or small).
369 */
370#ifdef __LP64__
371#define OFFSET_MAX ((1LL << 31) - 1)
372#define OFFSET_MIN ((1LL << 31) * -1)
373 if (ref->offset > OFFSET_MAX ||
374 ref->offset < OFFSET_MIN) {
375 pr_err("bpf: prologue: offset out of bound: %ld\n",
376 ref->offset);
377 return -BPF_LOADER_ERRNO__PROLOGUEOOB;
378 }
379#endif
380 ref = ref->next;
381 }
382 }
383 pr_debug("prologue: pass validation\n");
384
385 if (fastpath) {
386 /* If all variables are registers... */
387 pr_debug("prologue: fast path\n");
388 err = gen_prologue_fastpath(&pos, args, nargs);
389 if (err)
390 goto errout;
391 } else {
392 pr_debug("prologue: slow path\n");
393
394 /* Initialization: move ctx to a callee saved register. */
395 ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
396
397 err = gen_prologue_slowpath(&pos, args, nargs);
398 if (err)
399 goto errout;
400 /*
401 * start of ERROR_CODE (only slow pass needs error code)
402 * mov r2 <- 1 // r2 is error number
403 * mov r3 <- 0 // r3, r4... should be touched or
404 * // verifier would complain
405 * mov r4 <- 0
406 * ...
407 * goto usercode
408 */
409 error_code = pos.pos;
410 ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
411 &pos);
412
413 for (i = 0; i < nargs; i++)
414 ins(BPF_ALU64_IMM(BPF_MOV,
415 BPF_PROLOGUE_START_ARG_REG + i,
416 0),
417 &pos);
418 ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
419 &pos);
420 }
421
422 /*
423 * start of SUCCESS_CODE:
424 * mov r2 <- 0
425 * goto usercode // skip
426 */
427 success_code = pos.pos;
428 ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
429
430 /*
431 * start of USER_CODE:
432 * Restore ctx to r1
433 */
434 user_code = pos.pos;
435 if (!fastpath) {
436 /*
437 * Only slow path needs restoring of ctx. In fast path,
438 * register are loaded directly from r1.
439 */
440 ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
441 err = prologue_relocate(&pos, error_code, success_code,
442 user_code);
443 if (err)
444 goto errout;
445 }
446
447 err = check_pos(&pos);
448 if (err)
449 goto errout;
450
451 *new_cnt = pos_get_cnt(&pos);
452 return 0;
453errout:
454 return err;
455}
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000000..d94cbea12899
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
3 * Copyright (C) 2015, Huawei Inc.
4 */
5#ifndef __BPF_PROLOGUE_H
6#define __BPF_PROLOGUE_H
7
8#include <linux/compiler.h>
9#include <linux/filter.h>
10#include "probe-event.h"
11
12#define BPF_PROLOGUE_MAX_ARGS 3
13#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
14#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
15
16#ifdef HAVE_BPF_PROLOGUE
17int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
18 struct bpf_insn *new_prog, size_t *new_cnt,
19 size_t cnt_space);
20#else
21static inline int
22bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
23 int nargs __maybe_unused,
24 struct bpf_insn *new_prog __maybe_unused,
25 size_t *new_cnt,
26 size_t cnt_space __maybe_unused)
27{
28 if (!new_cnt)
29 return -EINVAL;
30 *new_cnt = 0;
31 return -ENOTSUP;
32}
33#endif
34#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 735ad48e1858..fc3b1e0d09ee 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value)
44 callchain_param.mode = CHAIN_GRAPH_REL; 44 callchain_param.mode = CHAIN_GRAPH_REL;
45 return 0; 45 return 0;
46 } 46 }
47 if (!strncmp(value, "folded", strlen(value))) {
48 callchain_param.mode = CHAIN_FOLDED;
49 return 0;
50 }
47 return -1; 51 return -1;
48} 52}
49 53
@@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value)
79 return -1; 83 return -1;
80} 84}
81 85
86static int parse_callchain_value(const char *value)
87{
88 if (!strncmp(value, "percent", strlen(value))) {
89 callchain_param.value = CCVAL_PERCENT;
90 return 0;
91 }
92 if (!strncmp(value, "period", strlen(value))) {
93 callchain_param.value = CCVAL_PERIOD;
94 return 0;
95 }
96 if (!strncmp(value, "count", strlen(value))) {
97 callchain_param.value = CCVAL_COUNT;
98 return 0;
99 }
100 return -1;
101}
102
82static int 103static int
83__parse_callchain_report_opt(const char *arg, bool allow_record_opt) 104__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
84{ 105{
@@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
102 123
103 if (!parse_callchain_mode(tok) || 124 if (!parse_callchain_mode(tok) ||
104 !parse_callchain_order(tok) || 125 !parse_callchain_order(tok) ||
105 !parse_callchain_sort_key(tok)) { 126 !parse_callchain_sort_key(tok) ||
127 !parse_callchain_value(tok)) {
106 /* parsing ok - move on to the next */ 128 /* parsing ok - move on to the next */
107 try_stack_size = false; 129 try_stack_size = false;
108 goto next; 130 goto next;
@@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
218 240
219 switch (mode) { 241 switch (mode) {
220 case CHAIN_FLAT: 242 case CHAIN_FLAT:
243 case CHAIN_FOLDED:
221 if (rnode->hit < chain->hit) 244 if (rnode->hit < chain->hit)
222 p = &(*p)->rb_left; 245 p = &(*p)->rb_left;
223 else 246 else
@@ -338,6 +361,7 @@ int callchain_register_param(struct callchain_param *param)
338 param->sort = sort_chain_graph_rel; 361 param->sort = sort_chain_graph_rel;
339 break; 362 break;
340 case CHAIN_FLAT: 363 case CHAIN_FLAT:
364 case CHAIN_FOLDED:
341 param->sort = sort_chain_flat; 365 param->sort = sort_chain_flat;
342 break; 366 break;
343 case CHAIN_NONE: 367 case CHAIN_NONE:
@@ -363,6 +387,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
363 } 387 }
364 new->parent = parent; 388 new->parent = parent;
365 INIT_LIST_HEAD(&new->val); 389 INIT_LIST_HEAD(&new->val);
390 INIT_LIST_HEAD(&new->parent_val);
366 391
367 if (inherit_children) { 392 if (inherit_children) {
368 struct rb_node *n; 393 struct rb_node *n;
@@ -431,6 +456,8 @@ add_child(struct callchain_node *parent,
431 456
432 new->children_hit = 0; 457 new->children_hit = 0;
433 new->hit = period; 458 new->hit = period;
459 new->children_count = 0;
460 new->count = 1;
434 return new; 461 return new;
435} 462}
436 463
@@ -478,6 +505,9 @@ split_add_child(struct callchain_node *parent,
478 parent->children_hit = callchain_cumul_hits(new); 505 parent->children_hit = callchain_cumul_hits(new);
479 new->val_nr = parent->val_nr - idx_local; 506 new->val_nr = parent->val_nr - idx_local;
480 parent->val_nr = idx_local; 507 parent->val_nr = idx_local;
508 new->count = parent->count;
509 new->children_count = parent->children_count;
510 parent->children_count = callchain_cumul_counts(new);
481 511
482 /* create a new child for the new branch if any */ 512 /* create a new child for the new branch if any */
483 if (idx_total < cursor->nr) { 513 if (idx_total < cursor->nr) {
@@ -488,6 +518,8 @@ split_add_child(struct callchain_node *parent,
488 518
489 parent->hit = 0; 519 parent->hit = 0;
490 parent->children_hit += period; 520 parent->children_hit += period;
521 parent->count = 0;
522 parent->children_count += 1;
491 523
492 node = callchain_cursor_current(cursor); 524 node = callchain_cursor_current(cursor);
493 new = add_child(parent, cursor, period); 525 new = add_child(parent, cursor, period);
@@ -510,6 +542,7 @@ split_add_child(struct callchain_node *parent,
510 rb_insert_color(&new->rb_node_in, &parent->rb_root_in); 542 rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
511 } else { 543 } else {
512 parent->hit = period; 544 parent->hit = period;
545 parent->count = 1;
513 } 546 }
514} 547}
515 548
@@ -556,6 +589,7 @@ append_chain_children(struct callchain_node *root,
556 589
557inc_children_hit: 590inc_children_hit:
558 root->children_hit += period; 591 root->children_hit += period;
592 root->children_count++;
559} 593}
560 594
561static int 595static int
@@ -608,6 +642,7 @@ append_chain(struct callchain_node *root,
608 /* we match 100% of the path, increment the hit */ 642 /* we match 100% of the path, increment the hit */
609 if (matches == root->val_nr && cursor->pos == cursor->nr) { 643 if (matches == root->val_nr && cursor->pos == cursor->nr) {
610 root->hit += period; 644 root->hit += period;
645 root->count++;
611 return 0; 646 return 0;
612 } 647 }
613 648
@@ -799,12 +834,72 @@ char *callchain_list__sym_name(struct callchain_list *cl,
799 return bf; 834 return bf;
800} 835}
801 836
837char *callchain_node__scnprintf_value(struct callchain_node *node,
838 char *bf, size_t bfsize, u64 total)
839{
840 double percent = 0.0;
841 u64 period = callchain_cumul_hits(node);
842 unsigned count = callchain_cumul_counts(node);
843
844 if (callchain_param.mode == CHAIN_FOLDED) {
845 period = node->hit;
846 count = node->count;
847 }
848
849 switch (callchain_param.value) {
850 case CCVAL_PERIOD:
851 scnprintf(bf, bfsize, "%"PRIu64, period);
852 break;
853 case CCVAL_COUNT:
854 scnprintf(bf, bfsize, "%u", count);
855 break;
856 case CCVAL_PERCENT:
857 default:
858 if (total)
859 percent = period * 100.0 / total;
860 scnprintf(bf, bfsize, "%.2f%%", percent);
861 break;
862 }
863 return bf;
864}
865
866int callchain_node__fprintf_value(struct callchain_node *node,
867 FILE *fp, u64 total)
868{
869 double percent = 0.0;
870 u64 period = callchain_cumul_hits(node);
871 unsigned count = callchain_cumul_counts(node);
872
873 if (callchain_param.mode == CHAIN_FOLDED) {
874 period = node->hit;
875 count = node->count;
876 }
877
878 switch (callchain_param.value) {
879 case CCVAL_PERIOD:
880 return fprintf(fp, "%"PRIu64, period);
881 case CCVAL_COUNT:
882 return fprintf(fp, "%u", count);
883 case CCVAL_PERCENT:
884 default:
885 if (total)
886 percent = period * 100.0 / total;
887 return percent_color_fprintf(fp, "%.2f%%", percent);
888 }
889 return 0;
890}
891
802static void free_callchain_node(struct callchain_node *node) 892static void free_callchain_node(struct callchain_node *node)
803{ 893{
804 struct callchain_list *list, *tmp; 894 struct callchain_list *list, *tmp;
805 struct callchain_node *child; 895 struct callchain_node *child;
806 struct rb_node *n; 896 struct rb_node *n;
807 897
898 list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
899 list_del(&list->list);
900 free(list);
901 }
902
808 list_for_each_entry_safe(list, tmp, &node->val, list) { 903 list_for_each_entry_safe(list, tmp, &node->val, list) {
809 list_del(&list->list); 904 list_del(&list->list);
810 free(list); 905 free(list);
@@ -828,3 +923,41 @@ void free_callchain(struct callchain_root *root)
828 923
829 free_callchain_node(&root->node); 924 free_callchain_node(&root->node);
830} 925}
926
927int callchain_node__make_parent_list(struct callchain_node *node)
928{
929 struct callchain_node *parent = node->parent;
930 struct callchain_list *chain, *new;
931 LIST_HEAD(head);
932
933 while (parent) {
934 list_for_each_entry_reverse(chain, &parent->val, list) {
935 new = malloc(sizeof(*new));
936 if (new == NULL)
937 goto out;
938 *new = *chain;
939 new->has_children = false;
940 list_add_tail(&new->list, &head);
941 }
942 parent = parent->parent;
943 }
944
945 list_for_each_entry_safe_reverse(chain, new, &head, list)
946 list_move_tail(&chain->list, &node->parent_val);
947
948 if (!list_empty(&node->parent_val)) {
949 chain = list_first_entry(&node->parent_val, struct callchain_list, list);
950 chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
951
952 chain = list_first_entry(&node->val, struct callchain_list, list);
953 chain->has_children = false;
954 }
955 return 0;
956
957out:
958 list_for_each_entry_safe(chain, new, &head, list) {
959 list_del(&chain->list);
960 free(chain);
961 }
962 return -ENOMEM;
963}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index fce8161e54db..6e9b5f2099e1 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -24,12 +24,13 @@
24#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP 24#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
25 25
26#define CALLCHAIN_REPORT_HELP \ 26#define CALLCHAIN_REPORT_HELP \
27 HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \ 27 HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
28 HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \ 28 HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
29 HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \ 29 HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
30 HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \ 30 HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
31 HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \ 31 HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
32 HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" 32 HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
33 HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
33 34
34enum perf_call_graph_mode { 35enum perf_call_graph_mode {
35 CALLCHAIN_NONE, 36 CALLCHAIN_NONE,
@@ -43,7 +44,8 @@ enum chain_mode {
43 CHAIN_NONE, 44 CHAIN_NONE,
44 CHAIN_FLAT, 45 CHAIN_FLAT,
45 CHAIN_GRAPH_ABS, 46 CHAIN_GRAPH_ABS,
46 CHAIN_GRAPH_REL 47 CHAIN_GRAPH_REL,
48 CHAIN_FOLDED,
47}; 49};
48 50
49enum chain_order { 51enum chain_order {
@@ -54,11 +56,14 @@ enum chain_order {
54struct callchain_node { 56struct callchain_node {
55 struct callchain_node *parent; 57 struct callchain_node *parent;
56 struct list_head val; 58 struct list_head val;
59 struct list_head parent_val;
57 struct rb_node rb_node_in; /* to insert nodes in an rbtree */ 60 struct rb_node rb_node_in; /* to insert nodes in an rbtree */
58 struct rb_node rb_node; /* to sort nodes in an output tree */ 61 struct rb_node rb_node; /* to sort nodes in an output tree */
59 struct rb_root rb_root_in; /* input tree of children */ 62 struct rb_root rb_root_in; /* input tree of children */
60 struct rb_root rb_root; /* sorted output tree of children */ 63 struct rb_root rb_root; /* sorted output tree of children */
61 unsigned int val_nr; 64 unsigned int val_nr;
65 unsigned int count;
66 unsigned int children_count;
62 u64 hit; 67 u64 hit;
63 u64 children_hit; 68 u64 children_hit;
64}; 69};
@@ -78,6 +83,12 @@ enum chain_key {
78 CCKEY_ADDRESS 83 CCKEY_ADDRESS
79}; 84};
80 85
86enum chain_value {
87 CCVAL_PERCENT,
88 CCVAL_PERIOD,
89 CCVAL_COUNT,
90};
91
81struct callchain_param { 92struct callchain_param {
82 bool enabled; 93 bool enabled;
83 enum perf_call_graph_mode record_mode; 94 enum perf_call_graph_mode record_mode;
@@ -90,6 +101,7 @@ struct callchain_param {
90 bool order_set; 101 bool order_set;
91 enum chain_key key; 102 enum chain_key key;
92 bool branch_callstack; 103 bool branch_callstack;
104 enum chain_value value;
93}; 105};
94 106
95extern struct callchain_param callchain_param; 107extern struct callchain_param callchain_param;
@@ -144,6 +156,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node)
144 return node->hit + node->children_hit; 156 return node->hit + node->children_hit;
145} 157}
146 158
159static inline unsigned callchain_cumul_counts(struct callchain_node *node)
160{
161 return node->count + node->children_count;
162}
163
147int callchain_register_param(struct callchain_param *param); 164int callchain_register_param(struct callchain_param *param);
148int callchain_append(struct callchain_root *root, 165int callchain_append(struct callchain_root *root,
149 struct callchain_cursor *cursor, 166 struct callchain_cursor *cursor,
@@ -229,7 +246,12 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
229 246
230char *callchain_list__sym_name(struct callchain_list *cl, 247char *callchain_list__sym_name(struct callchain_list *cl,
231 char *bf, size_t bfsize, bool show_dso); 248 char *bf, size_t bfsize, bool show_dso);
249char *callchain_node__scnprintf_value(struct callchain_node *node,
250 char *bf, size_t bfsize, u64 total);
251int callchain_node__fprintf_value(struct callchain_node *node,
252 FILE *fp, u64 total);
232 253
233void free_callchain(struct callchain_root *root); 254void free_callchain(struct callchain_root *root);
255int callchain_node__make_parent_list(struct callchain_node *node);
234 256
235#endif /* __PERF_CALLCHAIN_H */ 257#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 425df5c86c9c..e8e9a9dbf5e3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1243,6 +1243,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
1243 if (dso != NULL) { 1243 if (dso != NULL) {
1244 __dsos__add(dsos, dso); 1244 __dsos__add(dsos, dso);
1245 dso__set_basename(dso); 1245 dso__set_basename(dso);
1246 /* Put dso here because __dsos_add already got it */
1247 dso__put(dso);
1246 } 1248 }
1247 return dso; 1249 return dso;
1248} 1250}
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
index 7adf4ad15d8f..1099e92f5ee1 100644
--- a/tools/perf/util/exec_cmd.c
+++ b/tools/perf/util/exec_cmd.c
@@ -9,17 +9,17 @@
9static const char *argv_exec_path; 9static const char *argv_exec_path;
10static const char *argv0_path; 10static const char *argv0_path;
11 11
12const char *system_path(const char *path) 12char *system_path(const char *path)
13{ 13{
14 static const char *prefix = PREFIX; 14 static const char *prefix = PREFIX;
15 struct strbuf d = STRBUF_INIT; 15 struct strbuf d = STRBUF_INIT;
16 16
17 if (is_absolute_path(path)) 17 if (is_absolute_path(path))
18 return path; 18 return strdup(path);
19 19
20 strbuf_addf(&d, "%s/%s", prefix, path); 20 strbuf_addf(&d, "%s/%s", prefix, path);
21 path = strbuf_detach(&d, NULL); 21 path = strbuf_detach(&d, NULL);
22 return path; 22 return (char *)path;
23} 23}
24 24
25const char *perf_extract_argv0_path(const char *argv0) 25const char *perf_extract_argv0_path(const char *argv0)
@@ -52,17 +52,16 @@ void perf_set_argv_exec_path(const char *exec_path)
52 52
53 53
54/* Returns the highest-priority, location to look for perf programs. */ 54/* Returns the highest-priority, location to look for perf programs. */
55const char *perf_exec_path(void) 55char *perf_exec_path(void)
56{ 56{
57 const char *env; 57 char *env;
58 58
59 if (argv_exec_path) 59 if (argv_exec_path)
60 return argv_exec_path; 60 return strdup(argv_exec_path);
61 61
62 env = getenv(EXEC_PATH_ENVIRONMENT); 62 env = getenv(EXEC_PATH_ENVIRONMENT);
63 if (env && *env) { 63 if (env && *env)
64 return env; 64 return strdup(env);
65 }
66 65
67 return system_path(PERF_EXEC_PATH); 66 return system_path(PERF_EXEC_PATH);
68} 67}
@@ -83,9 +82,11 @@ void setup_path(void)
83{ 82{
84 const char *old_path = getenv("PATH"); 83 const char *old_path = getenv("PATH");
85 struct strbuf new_path = STRBUF_INIT; 84 struct strbuf new_path = STRBUF_INIT;
85 char *tmp = perf_exec_path();
86 86
87 add_path(&new_path, perf_exec_path()); 87 add_path(&new_path, tmp);
88 add_path(&new_path, argv0_path); 88 add_path(&new_path, argv0_path);
89 free(tmp);
89 90
90 if (old_path) 91 if (old_path)
91 strbuf_addstr(&new_path, old_path); 92 strbuf_addstr(&new_path, old_path);
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
index bc4b915963f5..48b4175f1e11 100644
--- a/tools/perf/util/exec_cmd.h
+++ b/tools/perf/util/exec_cmd.h
@@ -3,10 +3,11 @@
3 3
4extern void perf_set_argv_exec_path(const char *exec_path); 4extern void perf_set_argv_exec_path(const char *exec_path);
5extern const char *perf_extract_argv0_path(const char *path); 5extern const char *perf_extract_argv0_path(const char *path);
6extern const char *perf_exec_path(void);
7extern void setup_path(void); 6extern void setup_path(void);
8extern int execv_perf_cmd(const char **argv); /* NULL terminated */ 7extern int execv_perf_cmd(const char **argv); /* NULL terminated */
9extern int execl_perf_cmd(const char *cmd, ...); 8extern int execl_perf_cmd(const char *cmd, ...);
10extern const char *system_path(const char *path); 9/* perf_exec_path and system_path return malloc'd string, caller must free it */
10extern char *perf_exec_path(void);
11extern char *system_path(const char *path);
11 12
12#endif /* __PERF_EXEC_CMD_H */ 13#endif /* __PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
index 86c37c472263..fa1fc4acb8a4 100644
--- a/tools/perf/util/help.c
+++ b/tools/perf/util/help.c
@@ -159,7 +159,7 @@ void load_command_list(const char *prefix,
159 struct cmdnames *other_cmds) 159 struct cmdnames *other_cmds)
160{ 160{
161 const char *env_path = getenv("PATH"); 161 const char *env_path = getenv("PATH");
162 const char *exec_path = perf_exec_path(); 162 char *exec_path = perf_exec_path();
163 163
164 if (exec_path) { 164 if (exec_path) {
165 list_commands_in_dir(main_cmds, exec_path, prefix); 165 list_commands_in_dir(main_cmds, exec_path, prefix);
@@ -187,6 +187,7 @@ void load_command_list(const char *prefix,
187 sizeof(*other_cmds->names), cmdname_compare); 187 sizeof(*other_cmds->names), cmdname_compare);
188 uniq(other_cmds); 188 uniq(other_cmds);
189 } 189 }
190 free(exec_path);
190 exclude_cmds(other_cmds, main_cmds); 191 exclude_cmds(other_cmds, main_cmds);
191} 192}
192 193
@@ -203,13 +204,14 @@ void list_commands(const char *title, struct cmdnames *main_cmds,
203 longest = other_cmds->names[i]->len; 204 longest = other_cmds->names[i]->len;
204 205
205 if (main_cmds->cnt) { 206 if (main_cmds->cnt) {
206 const char *exec_path = perf_exec_path(); 207 char *exec_path = perf_exec_path();
207 printf("available %s in '%s'\n", title, exec_path); 208 printf("available %s in '%s'\n", title, exec_path);
208 printf("----------------"); 209 printf("----------------");
209 mput_char('-', strlen(title) + strlen(exec_path)); 210 mput_char('-', strlen(title) + strlen(exec_path));
210 putchar('\n'); 211 putchar('\n');
211 pretty_print_string_list(main_cmds, longest); 212 pretty_print_string_list(main_cmds, longest);
212 putchar('\n'); 213 putchar('\n');
214 free(exec_path);
213 } 215 }
214 216
215 if (other_cmds->cnt) { 217 if (other_cmds->cnt) {
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
deleted file mode 100644
index 6f19c548ecc0..000000000000
--- a/tools/perf/util/include/linux/string.h
+++ /dev/null
@@ -1,3 +0,0 @@
1#include <string.h>
2
3void *memdup(const void *src, size_t len);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8b303ff20289..7f5071a4d9aa 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -122,6 +122,7 @@ void machine__delete_threads(struct machine *machine)
122 122
123void machine__exit(struct machine *machine) 123void machine__exit(struct machine *machine)
124{ 124{
125 machine__destroy_kernel_maps(machine);
125 map_groups__exit(&machine->kmaps); 126 map_groups__exit(&machine->kmaps);
126 dsos__exit(&machine->dsos); 127 dsos__exit(&machine->dsos);
127 machine__exit_vdso(machine); 128 machine__exit_vdso(machine);
@@ -564,7 +565,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
564 const char *filename) 565 const char *filename)
565{ 566{
566 struct map *map = NULL; 567 struct map *map = NULL;
567 struct dso *dso; 568 struct dso *dso = NULL;
568 struct kmod_path m; 569 struct kmod_path m;
569 570
570 if (kmod_path__parse_name(&m, filename)) 571 if (kmod_path__parse_name(&m, filename))
@@ -585,7 +586,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
585 586
586 map_groups__insert(&machine->kmaps, map); 587 map_groups__insert(&machine->kmaps, map);
587 588
589 /* Put the map here because map_groups__insert alread got it */
590 map__put(map);
588out: 591out:
592 /* put the dso here, corresponding to machine__findnew_module_dso */
593 dso__put(dso);
589 free(m.name); 594 free(m.name);
590 return map; 595 return map;
591} 596}
@@ -788,6 +793,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
788 kmap->ref_reloc_sym = NULL; 793 kmap->ref_reloc_sym = NULL;
789 } 794 }
790 795
796 map__put(machine->vmlinux_maps[type]);
791 machine->vmlinux_maps[type] = NULL; 797 machine->vmlinux_maps[type] = NULL;
792 } 798 }
793} 799}
@@ -1084,11 +1090,14 @@ int machine__create_kernel_maps(struct machine *machine)
1084 struct dso *kernel = machine__get_kernel(machine); 1090 struct dso *kernel = machine__get_kernel(machine);
1085 const char *name; 1091 const char *name;
1086 u64 addr = machine__get_running_kernel_start(machine, &name); 1092 u64 addr = machine__get_running_kernel_start(machine, &name);
1087 if (!addr) 1093 int ret;
1094
1095 if (!addr || kernel == NULL)
1088 return -1; 1096 return -1;
1089 1097
1090 if (kernel == NULL || 1098 ret = __machine__create_kernel_maps(machine, kernel);
1091 __machine__create_kernel_maps(machine, kernel) < 0) 1099 dso__put(kernel);
1100 if (ret < 0)
1092 return -1; 1101 return -1;
1093 1102
1094 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { 1103 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 03875f9154e7..93996ec4bbe3 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
2326 goto out; 2326 goto out;
2327 2327
2328 if (!allow_suffix) { 2328 if (!allow_suffix) {
2329 pr_warning("Error: event \"%s\" already exists. " 2329 pr_warning("Error: event \"%s\" already exists.\n"
2330 "(Use -f to force duplicates.)\n", buf); 2330 " Hint: Remove existing event by 'perf probe -d'\n"
2331 " or force duplicates by 'perf probe -f'\n"
2332 " or set 'force=yes' in BPF source.\n",
2333 buf);
2331 ret = -EEXIST; 2334 ret = -EEXIST;
2332 goto out; 2335 goto out;
2333 } 2336 }
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 05012bb178d7..1cab05a3831e 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -683,21 +683,24 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
683 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); 683 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
684 if (ret <= 0 || nops == 0) { 684 if (ret <= 0 || nops == 0) {
685 pf->fb_ops = NULL; 685 pf->fb_ops = NULL;
686 ret = 0;
686#if _ELFUTILS_PREREQ(0, 142) 687#if _ELFUTILS_PREREQ(0, 142)
687 } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && 688 } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
688 pf->cfi != NULL) { 689 pf->cfi != NULL) {
689 Dwarf_Frame *frame; 690 Dwarf_Frame *frame = NULL;
690 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 || 691 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
691 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) { 692 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
692 pr_warning("Failed to get call frame on 0x%jx\n", 693 pr_warning("Failed to get call frame on 0x%jx\n",
693 (uintmax_t)pf->addr); 694 (uintmax_t)pf->addr);
694 return -ENOENT; 695 ret = -ENOENT;
695 } 696 }
697 free(frame);
696#endif 698#endif
697 } 699 }
698 700
699 /* Call finder's callback handler */ 701 /* Call finder's callback handler */
700 ret = pf->callback(sc_die, pf); 702 if (ret >= 0)
703 ret = pf->callback(sc_die, pf);
701 704
702 /* *pf->fb_ops will be cached in libdw. Don't free it. */ 705 /* *pf->fb_ops will be cached in libdw. Don't free it. */
703 pf->fb_ops = NULL; 706 pf->fb_ops = NULL;
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index fc8781de62db..7f7e072be746 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -342,22 +342,6 @@ char *rtrim(char *s)
342 return s; 342 return s;
343} 343}
344 344
345/**
346 * memdup - duplicate region of memory
347 * @src: memory region to duplicate
348 * @len: memory region length
349 */
350void *memdup(const void *src, size_t len)
351{
352 void *p;
353
354 p = malloc(len);
355 if (p)
356 memcpy(p, src, len);
357
358 return p;
359}
360
361char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) 345char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
362{ 346{
363 /* 347 /*
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 475d88d0a1c9..53f19968bfa2 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1042,6 +1042,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
1042 } 1042 }
1043 curr_dso->symtab_type = dso->symtab_type; 1043 curr_dso->symtab_type = dso->symtab_type;
1044 map_groups__insert(kmaps, curr_map); 1044 map_groups__insert(kmaps, curr_map);
1045 /* kmaps already got it */
1046 map__put(curr_map);
1045 dsos__add(&map->groups->machine->dsos, curr_dso); 1047 dsos__add(&map->groups->machine->dsos, curr_dso);
1046 dso__set_loaded(curr_dso, map->type); 1048 dso__set_loaded(curr_dso, map->type);
1047 } else 1049 } else
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 47b1e36c7ea0..75759aebc7b8 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -21,7 +21,8 @@ struct callchain_param callchain_param = {
21 .mode = CHAIN_GRAPH_ABS, 21 .mode = CHAIN_GRAPH_ABS,
22 .min_percent = 0.5, 22 .min_percent = 0.5,
23 .order = ORDER_CALLEE, 23 .order = ORDER_CALLEE,
24 .key = CCKEY_FUNCTION 24 .key = CCKEY_FUNCTION,
25 .value = CCVAL_PERCENT,
25}; 26};
26 27
27/* 28/*