diff options
| author | Ingo Molnar <mingo@kernel.org> | 2015-11-23 03:13:48 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2015-11-23 03:45:53 -0500 |
| commit | 8c2accc8ca0be9cd8119ca439038243dfc8fcd0d (patch) | |
| tree | ec57994aef7e5c4a63d7f36f798c3e8a2139066c /tools/lib | |
| parent | 90eec103b96e30401c0b846045bf8a1c7159b6da (diff) | |
| parent | 2c6caff2b26fde8f3f87183f8c97f2cebfdbcb98 (diff) | |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Allows BPF scriptlets specify arguments to be fetched using
DWARF info, using a prologue generated at compile/build time (He Kuang, Wang Nan)
- Allow attaching BPF scriptlets to module symbols (Wang Nan)
- Allow attaching BPF scriptlets to userspace code using uprobe (Wang Nan)
- BPF programs now can specify 'perf probe' tunables via its section name,
separating key=val values using semicolons (Wang Nan)
Testing some of these new BPF features:
Use case: get callchains when receiving SSL packets, filter then in the
kernel, at arbitrary place.
# cat ssl.bpf.c
#define SEC(NAME) __attribute__((section(NAME), used))
struct pt_regs;
SEC("func=__inet_lookup_established hnum")
int func(struct pt_regs *ctx, int err, unsigned short port)
{
return err == 0 && port == 443;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
#
# perf record -a -g -e ssl.bpf.c
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ]
# perf script | head -30
swapper 0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux)
856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux)
2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux)
2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux)
96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux)
969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux)
2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux)
95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux)
1163ffa start_kernel ([kernel.vmlinux].init.text)
11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text)
1163623 x86_64_start_kernel ([kernel.vmlinux].init.text)
qemu-system-x86 9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb
8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux)
896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux)
8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux)
855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux)
8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux)
430a br_handle_frame_finish ([bridge])
48bc br_handle_frame ([bridge])
855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux)
8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux)
#
Use 'perf probe' various options to list functions, see what variables can
be collected at any given point, experiment first collecting without a filter,
then filter, use it together with 'perf trace', 'perf top', with or without
callchains, if it explodes, please tell us!
- Introduce a new callchain mode: "folded", that will list per line
representations of all callchains for a give histogram entry, facilitating
'perf report' output processing by other tools, such as Brendan Gregg's
flamegraph tools (Namhyung Kim)
E.g:
# perf report | grep -v ^# | head
18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry
|
---cpu_startup_entry
|
|--12.07%--start_secondary
|
--6.30%--rest_init
start_kernel
x86_64_start_reservations
x86_64_start_kernel
#
Becomes, in "folded" mode:
# perf report -g folded | grep -v ^# | head -5
18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry
12.07% cpu_startup_entry;start_secondary
6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
16.90% 0.00% swapper [kernel.kallsyms] [k] call_cpuidle
11.23% call_cpuidle;cpu_startup_entry;start_secondary
5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
16.90% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter
11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary
5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel
15.12% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter_state
#
The user can also select one of "count", "period" or "percent" as the first column.
Infrastructure changes:
- Fix multiple leaks found with Valgrind and a refcount
debugger (Masami Hiramatsu)
- Add further 'perf test' entries for BPF and LLVM (Wang Nan)
- Improve 'perf test' to suport subtests, so that the series of tests
performed in the LLVM and BPF main tests appear in the default 'perf test'
output (Wang Nan)
- Move memdup() from tools/perf to tools/lib/string.c (Arnaldo Carvalho de Melo)
- Adopt strtobool() from the kernel into tools/lib/ (Wang Nan)
- Fix selftests_install tools/ Makefile rule (Kevin Hilman)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/lib')
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 146 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.h | 64 | ||||
| -rw-r--r-- | tools/lib/string.c | 62 |
3 files changed, 263 insertions, 9 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e176bad19bcb..e3f4c3379f14 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c | |||
| @@ -152,7 +152,11 @@ struct bpf_program { | |||
| 152 | } *reloc_desc; | 152 | } *reloc_desc; |
| 153 | int nr_reloc; | 153 | int nr_reloc; |
| 154 | 154 | ||
| 155 | int fd; | 155 | struct { |
| 156 | int nr; | ||
| 157 | int *fds; | ||
| 158 | } instances; | ||
| 159 | bpf_program_prep_t preprocessor; | ||
| 156 | 160 | ||
| 157 | struct bpf_object *obj; | 161 | struct bpf_object *obj; |
| 158 | void *priv; | 162 | void *priv; |
| @@ -206,10 +210,25 @@ struct bpf_object { | |||
| 206 | 210 | ||
| 207 | static void bpf_program__unload(struct bpf_program *prog) | 211 | static void bpf_program__unload(struct bpf_program *prog) |
| 208 | { | 212 | { |
| 213 | int i; | ||
| 214 | |||
| 209 | if (!prog) | 215 | if (!prog) |
| 210 | return; | 216 | return; |
| 211 | 217 | ||
| 212 | zclose(prog->fd); | 218 | /* |
| 219 | * If the object is opened but the program was never loaded, | ||
| 220 | * it is possible that prog->instances.nr == -1. | ||
| 221 | */ | ||
| 222 | if (prog->instances.nr > 0) { | ||
| 223 | for (i = 0; i < prog->instances.nr; i++) | ||
| 224 | zclose(prog->instances.fds[i]); | ||
| 225 | } else if (prog->instances.nr != -1) { | ||
| 226 | pr_warning("Internal error: instances.nr is %d\n", | ||
| 227 | prog->instances.nr); | ||
| 228 | } | ||
| 229 | |||
| 230 | prog->instances.nr = -1; | ||
| 231 | zfree(&prog->instances.fds); | ||
| 213 | } | 232 | } |
| 214 | 233 | ||
| 215 | static void bpf_program__exit(struct bpf_program *prog) | 234 | static void bpf_program__exit(struct bpf_program *prog) |
| @@ -260,7 +279,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx, | |||
| 260 | memcpy(prog->insns, data, | 279 | memcpy(prog->insns, data, |
| 261 | prog->insns_cnt * sizeof(struct bpf_insn)); | 280 | prog->insns_cnt * sizeof(struct bpf_insn)); |
| 262 | prog->idx = idx; | 281 | prog->idx = idx; |
| 263 | prog->fd = -1; | 282 | prog->instances.fds = NULL; |
| 283 | prog->instances.nr = -1; | ||
| 264 | 284 | ||
| 265 | return 0; | 285 | return 0; |
| 266 | errout: | 286 | errout: |
| @@ -860,13 +880,73 @@ static int | |||
| 860 | bpf_program__load(struct bpf_program *prog, | 880 | bpf_program__load(struct bpf_program *prog, |
| 861 | char *license, u32 kern_version) | 881 | char *license, u32 kern_version) |
| 862 | { | 882 | { |
| 863 | int err, fd; | 883 | int err = 0, fd, i; |
| 864 | 884 | ||
| 865 | err = load_program(prog->insns, prog->insns_cnt, | 885 | if (prog->instances.nr < 0 || !prog->instances.fds) { |
| 866 | license, kern_version, &fd); | 886 | if (prog->preprocessor) { |
| 867 | if (!err) | 887 | pr_warning("Internal error: can't load program '%s'\n", |
| 868 | prog->fd = fd; | 888 | prog->section_name); |
| 889 | return -LIBBPF_ERRNO__INTERNAL; | ||
| 890 | } | ||
| 869 | 891 | ||
| 892 | prog->instances.fds = malloc(sizeof(int)); | ||
| 893 | if (!prog->instances.fds) { | ||
| 894 | pr_warning("Not enough memory for BPF fds\n"); | ||
| 895 | return -ENOMEM; | ||
| 896 | } | ||
| 897 | prog->instances.nr = 1; | ||
| 898 | prog->instances.fds[0] = -1; | ||
| 899 | } | ||
| 900 | |||
| 901 | if (!prog->preprocessor) { | ||
| 902 | if (prog->instances.nr != 1) { | ||
| 903 | pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", | ||
| 904 | prog->section_name, prog->instances.nr); | ||
| 905 | } | ||
| 906 | err = load_program(prog->insns, prog->insns_cnt, | ||
| 907 | license, kern_version, &fd); | ||
| 908 | if (!err) | ||
| 909 | prog->instances.fds[0] = fd; | ||
| 910 | goto out; | ||
| 911 | } | ||
| 912 | |||
| 913 | for (i = 0; i < prog->instances.nr; i++) { | ||
| 914 | struct bpf_prog_prep_result result; | ||
| 915 | bpf_program_prep_t preprocessor = prog->preprocessor; | ||
| 916 | |||
| 917 | bzero(&result, sizeof(result)); | ||
| 918 | err = preprocessor(prog, i, prog->insns, | ||
| 919 | prog->insns_cnt, &result); | ||
| 920 | if (err) { | ||
| 921 | pr_warning("Preprocessing the %dth instance of program '%s' failed\n", | ||
| 922 | i, prog->section_name); | ||
| 923 | goto out; | ||
| 924 | } | ||
| 925 | |||
| 926 | if (!result.new_insn_ptr || !result.new_insn_cnt) { | ||
| 927 | pr_debug("Skip loading the %dth instance of program '%s'\n", | ||
| 928 | i, prog->section_name); | ||
| 929 | prog->instances.fds[i] = -1; | ||
| 930 | if (result.pfd) | ||
| 931 | *result.pfd = -1; | ||
| 932 | continue; | ||
| 933 | } | ||
| 934 | |||
| 935 | err = load_program(result.new_insn_ptr, | ||
| 936 | result.new_insn_cnt, | ||
| 937 | license, kern_version, &fd); | ||
| 938 | |||
| 939 | if (err) { | ||
| 940 | pr_warning("Loading the %dth instance of program '%s' failed\n", | ||
| 941 | i, prog->section_name); | ||
| 942 | goto out; | ||
| 943 | } | ||
| 944 | |||
| 945 | if (result.pfd) | ||
| 946 | *result.pfd = fd; | ||
| 947 | prog->instances.fds[i] = fd; | ||
| 948 | } | ||
| 949 | out: | ||
| 870 | if (err) | 950 | if (err) |
| 871 | pr_warning("failed to load program '%s'\n", | 951 | pr_warning("failed to load program '%s'\n", |
| 872 | prog->section_name); | 952 | prog->section_name); |
| @@ -1121,5 +1201,53 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy) | |||
| 1121 | 1201 | ||
| 1122 | int bpf_program__fd(struct bpf_program *prog) | 1202 | int bpf_program__fd(struct bpf_program *prog) |
| 1123 | { | 1203 | { |
| 1124 | return prog->fd; | 1204 | return bpf_program__nth_fd(prog, 0); |
| 1205 | } | ||
| 1206 | |||
| 1207 | int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, | ||
| 1208 | bpf_program_prep_t prep) | ||
| 1209 | { | ||
| 1210 | int *instances_fds; | ||
| 1211 | |||
| 1212 | if (nr_instances <= 0 || !prep) | ||
| 1213 | return -EINVAL; | ||
| 1214 | |||
| 1215 | if (prog->instances.nr > 0 || prog->instances.fds) { | ||
| 1216 | pr_warning("Can't set pre-processor after loading\n"); | ||
| 1217 | return -EINVAL; | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | instances_fds = malloc(sizeof(int) * nr_instances); | ||
| 1221 | if (!instances_fds) { | ||
| 1222 | pr_warning("alloc memory failed for fds\n"); | ||
| 1223 | return -ENOMEM; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | /* fill all fd with -1 */ | ||
| 1227 | memset(instances_fds, -1, sizeof(int) * nr_instances); | ||
| 1228 | |||
| 1229 | prog->instances.nr = nr_instances; | ||
| 1230 | prog->instances.fds = instances_fds; | ||
| 1231 | prog->preprocessor = prep; | ||
| 1232 | return 0; | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | int bpf_program__nth_fd(struct bpf_program *prog, int n) | ||
| 1236 | { | ||
| 1237 | int fd; | ||
| 1238 | |||
| 1239 | if (n >= prog->instances.nr || n < 0) { | ||
| 1240 | pr_warning("Can't get the %dth fd from program %s: only %d instances\n", | ||
| 1241 | n, prog->section_name, prog->instances.nr); | ||
| 1242 | return -EINVAL; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | fd = prog->instances.fds[n]; | ||
| 1246 | if (fd < 0) { | ||
| 1247 | pr_warning("%dth instance of program '%s' is invalid\n", | ||
| 1248 | n, prog->section_name); | ||
| 1249 | return -ENOENT; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | return fd; | ||
| 1125 | } | 1253 | } |
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c9a9aef2806c..949df4b346cf 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h | |||
| @@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); | |||
| 88 | 88 | ||
| 89 | int bpf_program__fd(struct bpf_program *prog); | 89 | int bpf_program__fd(struct bpf_program *prog); |
| 90 | 90 | ||
| 91 | struct bpf_insn; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Libbpf allows callers to adjust BPF programs before being loaded | ||
| 95 | * into kernel. One program in an object file can be transform into | ||
| 96 | * multiple variants to be attached to different code. | ||
| 97 | * | ||
| 98 | * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd | ||
| 99 | * are APIs for this propose. | ||
| 100 | * | ||
| 101 | * - bpf_program_prep_t: | ||
| 102 | * It defines 'preprocessor', which is a caller defined function | ||
| 103 | * passed to libbpf through bpf_program__set_prep(), and will be | ||
| 104 | * called before program is loaded. The processor should adjust | ||
| 105 | * the program one time for each instances according to the number | ||
| 106 | * passed to it. | ||
| 107 | * | ||
| 108 | * - bpf_program__set_prep: | ||
| 109 | * Attachs a preprocessor to a BPF program. The number of instances | ||
| 110 | * whould be created is also passed through this function. | ||
| 111 | * | ||
| 112 | * - bpf_program__nth_fd: | ||
| 113 | * After the program is loaded, get resuling fds from bpf program for | ||
| 114 | * each instances. | ||
| 115 | * | ||
| 116 | * If bpf_program__set_prep() is not used, the program whould be loaded | ||
| 117 | * without adjustment during bpf_object__load(). The program has only | ||
| 118 | * one instance. In this case bpf_program__fd(prog) is equal to | ||
| 119 | * bpf_program__nth_fd(prog, 0). | ||
| 120 | */ | ||
| 121 | |||
| 122 | struct bpf_prog_prep_result { | ||
| 123 | /* | ||
| 124 | * If not NULL, load new instruction array. | ||
| 125 | * If set to NULL, don't load this instance. | ||
| 126 | */ | ||
| 127 | struct bpf_insn *new_insn_ptr; | ||
| 128 | int new_insn_cnt; | ||
| 129 | |||
| 130 | /* If not NULL, result fd is set to it */ | ||
| 131 | int *pfd; | ||
| 132 | }; | ||
| 133 | |||
| 134 | /* | ||
| 135 | * Parameters of bpf_program_prep_t: | ||
| 136 | * - prog: The bpf_program being loaded. | ||
| 137 | * - n: Index of instance being generated. | ||
| 138 | * - insns: BPF instructions array. | ||
| 139 | * - insns_cnt:Number of instructions in insns. | ||
| 140 | * - res: Output parameter, result of transformation. | ||
| 141 | * | ||
| 142 | * Return value: | ||
| 143 | * - Zero: pre-processing success. | ||
| 144 | * - Non-zero: pre-processing, stop loading. | ||
| 145 | */ | ||
| 146 | typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, | ||
| 147 | struct bpf_insn *insns, int insns_cnt, | ||
| 148 | struct bpf_prog_prep_result *res); | ||
| 149 | |||
| 150 | int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, | ||
| 151 | bpf_program_prep_t prep); | ||
| 152 | |||
| 153 | int bpf_program__nth_fd(struct bpf_program *prog, int n); | ||
| 154 | |||
| 91 | /* | 155 | /* |
| 92 | * We don't need __attribute__((packed)) now since it is | 156 | * We don't need __attribute__((packed)) now since it is |
| 93 | * unnecessary for 'bpf_map_def' because they are all aligned. | 157 | * unnecessary for 'bpf_map_def' because they are all aligned. |
diff --git a/tools/lib/string.c b/tools/lib/string.c new file mode 100644 index 000000000000..065e54f42d8f --- /dev/null +++ b/tools/lib/string.c | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /* | ||
| 2 | * linux/tools/lib/string.c | ||
| 3 | * | ||
| 4 | * Copied from linux/lib/string.c, where it is: | ||
| 5 | * | ||
| 6 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 7 | * | ||
| 8 | * More specifically, the first copied function was strtobool, which | ||
| 9 | * was introduced by: | ||
| 10 | * | ||
| 11 | * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents") | ||
| 12 | * Author: Jonathan Cameron <jic23@cam.ac.uk> | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <stdlib.h> | ||
| 16 | #include <string.h> | ||
| 17 | #include <errno.h> | ||
| 18 | #include <linux/string.h> | ||
| 19 | |||
| 20 | /** | ||
| 21 | * memdup - duplicate region of memory | ||
| 22 | * | ||
| 23 | * @src: memory region to duplicate | ||
| 24 | * @len: memory region length | ||
| 25 | */ | ||
| 26 | void *memdup(const void *src, size_t len) | ||
| 27 | { | ||
| 28 | void *p = malloc(len); | ||
| 29 | |||
| 30 | if (p) | ||
| 31 | memcpy(p, src, len); | ||
| 32 | |||
| 33 | return p; | ||
| 34 | } | ||
| 35 | |||
| 36 | /** | ||
| 37 | * strtobool - convert common user inputs into boolean values | ||
| 38 | * @s: input string | ||
| 39 | * @res: result | ||
| 40 | * | ||
| 41 | * This routine returns 0 iff the first character is one of 'Yy1Nn0'. | ||
| 42 | * Otherwise it will return -EINVAL. Value pointed to by res is | ||
| 43 | * updated upon finding a match. | ||
| 44 | */ | ||
| 45 | int strtobool(const char *s, bool *res) | ||
| 46 | { | ||
| 47 | switch (s[0]) { | ||
| 48 | case 'y': | ||
| 49 | case 'Y': | ||
| 50 | case '1': | ||
| 51 | *res = true; | ||
| 52 | break; | ||
| 53 | case 'n': | ||
| 54 | case 'N': | ||
| 55 | case '0': | ||
| 56 | *res = false; | ||
| 57 | break; | ||
| 58 | default: | ||
| 59 | return -EINVAL; | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
