diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-01-31 04:20:14 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-01-31 04:20:14 -0500 |
commit | 152fefa921535665f95840c08062844ab2f5593e (patch) | |
tree | b6ff202ebeca4341a1332258a04403f8ce95e75a | |
parent | a2d28d0c198b65fac28ea6212f5f8edc77b29c27 (diff) | |
parent | 5809fde040de2afa477a6c593ce2e8fd2c11d9d3 (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
. Fix some leaks in exit paths.
. Use memdup where applicable
. Remove some die() calls, allowing callers to handle exit paths
gracefully.
. Correct typo in tools Makefile, fix from Borislav Petkov.
. Add 'perf bench numa mem' NUMA performance measurement suite, from Ingo Molnar.
. Handle dynamic array's element size properly, fix from Jiri Olsa.
. Fix memory leaks on evsel->counts, from Namhyung Kim.
. Make numa benchmark optional, allowing the build in machines where required
numa libraries are not present, fix from Peter Hurley.
. Add interval printing in 'perf stat', from Stephane Eranian.
. Fix compile warnings in tests/attr.c, from Sukadev Bhattiprolu.
. Fix double free, pclose instead of fclose, leaks and double fclose errors
found with the cppcheck tool, from Thomas Jarosch.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/Makefile | 2 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.c | 39 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 4 | ||||
-rw-r--r-- | tools/perf/Makefile | 13 | ||||
-rw-r--r-- | tools/perf/arch/common.c | 1 | ||||
-rw-r--r-- | tools/perf/bench/bench.h | 1 | ||||
-rw-r--r-- | tools/perf/bench/numa.c | 1731 | ||||
-rw-r--r-- | tools/perf/builtin-bench.c | 17 | ||||
-rw-r--r-- | tools/perf/builtin-kmem.c | 6 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 158 | ||||
-rw-r--r-- | tools/perf/config/feature-tests.mak | 11 | ||||
-rw-r--r-- | tools/perf/tests/attr.c | 5 | ||||
-rw-r--r-- | tools/perf/tests/open-syscall-all-cpus.c | 1 | ||||
-rw-r--r-- | tools/perf/tests/perf-record.c | 12 | ||||
-rw-r--r-- | tools/perf/tests/vmlinux-kallsyms.c | 4 | ||||
-rw-r--r-- | tools/perf/ui/browser.c | 2 | ||||
-rw-r--r-- | tools/perf/util/event.c | 4 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 31 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 2 | ||||
-rw-r--r-- | tools/perf/util/header.c | 25 | ||||
-rw-r--r-- | tools/perf/util/map.c | 118 | ||||
-rw-r--r-- | tools/perf/util/map.h | 24 | ||||
-rw-r--r-- | tools/perf/util/sort.c | 7 | ||||
-rw-r--r-- | tools/perf/util/strlist.c | 54 | ||||
-rw-r--r-- | tools/perf/util/strlist.h | 42 |
25 files changed, 2154 insertions, 160 deletions
diff --git a/tools/Makefile b/tools/Makefile index 1f9a529fe544..798fa0ef048e 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
@@ -15,7 +15,7 @@ help: | |||
15 | @echo ' x86_energy_perf_policy - Intel energy policy tool' | 15 | @echo ' x86_energy_perf_policy - Intel energy policy tool' |
16 | @echo '' | 16 | @echo '' |
17 | @echo 'You can do:' | 17 | @echo 'You can do:' |
18 | @echo ' $$ make -C tools/<tool>_install' | 18 | @echo ' $$ make -C tools/ <tool>_install' |
19 | @echo '' | 19 | @echo '' |
20 | @echo ' from the kernel command line to build and install one of' | 20 | @echo ' from the kernel command line to build and install one of' |
21 | @echo ' the tools above' | 21 | @echo ' the tools above' |
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index bb8b3db0e583..82b0606dcb8a 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c | |||
@@ -1223,6 +1223,34 @@ static int field_is_long(struct format_field *field) | |||
1223 | return 0; | 1223 | return 0; |
1224 | } | 1224 | } |
1225 | 1225 | ||
1226 | static unsigned int type_size(const char *name) | ||
1227 | { | ||
1228 | /* This covers all FIELD_IS_STRING types. */ | ||
1229 | static struct { | ||
1230 | const char *type; | ||
1231 | unsigned int size; | ||
1232 | } table[] = { | ||
1233 | { "u8", 1 }, | ||
1234 | { "u16", 2 }, | ||
1235 | { "u32", 4 }, | ||
1236 | { "u64", 8 }, | ||
1237 | { "s8", 1 }, | ||
1238 | { "s16", 2 }, | ||
1239 | { "s32", 4 }, | ||
1240 | { "s64", 8 }, | ||
1241 | { "char", 1 }, | ||
1242 | { }, | ||
1243 | }; | ||
1244 | int i; | ||
1245 | |||
1246 | for (i = 0; table[i].type; i++) { | ||
1247 | if (!strcmp(table[i].type, name)) | ||
1248 | return table[i].size; | ||
1249 | } | ||
1250 | |||
1251 | return 0; | ||
1252 | } | ||
1253 | |||
1226 | static int event_read_fields(struct event_format *event, struct format_field **fields) | 1254 | static int event_read_fields(struct event_format *event, struct format_field **fields) |
1227 | { | 1255 | { |
1228 | struct format_field *field = NULL; | 1256 | struct format_field *field = NULL; |
@@ -1232,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f | |||
1232 | int count = 0; | 1260 | int count = 0; |
1233 | 1261 | ||
1234 | do { | 1262 | do { |
1263 | unsigned int size_dynamic = 0; | ||
1264 | |||
1235 | type = read_token(&token); | 1265 | type = read_token(&token); |
1236 | if (type == EVENT_NEWLINE) { | 1266 | if (type == EVENT_NEWLINE) { |
1237 | free_token(token); | 1267 | free_token(token); |
@@ -1390,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f | |||
1390 | field->type = new_type; | 1420 | field->type = new_type; |
1391 | strcat(field->type, " "); | 1421 | strcat(field->type, " "); |
1392 | strcat(field->type, field->name); | 1422 | strcat(field->type, field->name); |
1423 | size_dynamic = type_size(field->name); | ||
1393 | free_token(field->name); | 1424 | free_token(field->name); |
1394 | strcat(field->type, brackets); | 1425 | strcat(field->type, brackets); |
1395 | field->name = token; | 1426 | field->name = token; |
@@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f | |||
1478 | if (field->flags & FIELD_IS_ARRAY) { | 1509 | if (field->flags & FIELD_IS_ARRAY) { |
1479 | if (field->arraylen) | 1510 | if (field->arraylen) |
1480 | field->elementsize = field->size / field->arraylen; | 1511 | field->elementsize = field->size / field->arraylen; |
1512 | else if (field->flags & FIELD_IS_DYNAMIC) | ||
1513 | field->elementsize = size_dynamic; | ||
1481 | else if (field->flags & FIELD_IS_STRING) | 1514 | else if (field->flags & FIELD_IS_STRING) |
1482 | field->elementsize = 1; | 1515 | field->elementsize = 1; |
1483 | else | 1516 | else if (field->flags & FIELD_IS_LONG) |
1484 | field->elementsize = event->pevent->long_size; | 1517 | field->elementsize = event->pevent ? |
1518 | event->pevent->long_size : | ||
1519 | sizeof(long); | ||
1485 | } else | 1520 | } else |
1486 | field->elementsize = field->size; | 1521 | field->elementsize = field->size; |
1487 | 1522 | ||
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index cf0c3107e06e..5289da3344e9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -114,6 +114,10 @@ with it. --append may be used here. Examples: | |||
114 | 114 | ||
115 | perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage | 115 | perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage |
116 | 116 | ||
117 | -I msecs:: | ||
118 | --interval-print msecs:: | ||
119 | print count deltas every N milliseconds (minimum: 100ms) | ||
120 | example: perf stat -I 1000 -e cycles -a sleep 5 | ||
117 | 121 | ||
118 | EXAMPLES | 122 | EXAMPLES |
119 | -------- | 123 | -------- |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a84021abb3fe..4b1044cbd84c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -47,6 +47,8 @@ include config/utilities.mak | |||
47 | # backtrace post unwind. | 47 | # backtrace post unwind. |
48 | # | 48 | # |
49 | # Define NO_BACKTRACE if you do not want stack backtrace debug feature | 49 | # Define NO_BACKTRACE if you do not want stack backtrace debug feature |
50 | # | ||
51 | # Define NO_LIBNUMA if you do not want numa perf benchmark | ||
50 | 52 | ||
51 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | 53 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE |
52 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) | 54 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) |
@@ -838,6 +840,17 @@ ifndef NO_BACKTRACE | |||
838 | endif | 840 | endif |
839 | endif | 841 | endif |
840 | 842 | ||
843 | ifndef NO_LIBNUMA | ||
844 | FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma | ||
845 | ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y) | ||
846 | msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev); | ||
847 | else | ||
848 | BASIC_CFLAGS += -DLIBNUMA_SUPPORT | ||
849 | BUILTIN_OBJS += $(OUTPUT)bench/numa.o | ||
850 | EXTLIBS += -lnuma | ||
851 | endif | ||
852 | endif | ||
853 | |||
841 | ifdef ASCIIDOC8 | 854 | ifdef ASCIIDOC8 |
842 | export ASCIIDOC8 | 855 | export ASCIIDOC8 |
843 | endif | 856 | endif |
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 3e975cb6232e..aacef07ebf31 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c | |||
@@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, | |||
155 | if (lookup_path(buf)) | 155 | if (lookup_path(buf)) |
156 | goto out; | 156 | goto out; |
157 | free(buf); | 157 | free(buf); |
158 | buf = NULL; | ||
158 | } | 159 | } |
159 | 160 | ||
160 | if (!strcmp(arch, "arm")) | 161 | if (!strcmp(arch, "arm")) |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 8f89998eeaf4..a5223e6a7b43 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef BENCH_H | 1 | #ifndef BENCH_H |
2 | #define BENCH_H | 2 | #define BENCH_H |
3 | 3 | ||
4 | extern int bench_numa(int argc, const char **argv, const char *prefix); | ||
4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 6 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
6 | extern int bench_mem_memcpy(int argc, const char **argv, | 7 | extern int bench_mem_memcpy(int argc, const char **argv, |
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c new file mode 100644 index 000000000000..30d1c3225b46 --- /dev/null +++ b/tools/perf/bench/numa.c | |||
@@ -0,0 +1,1731 @@ | |||
1 | /* | ||
2 | * numa.c | ||
3 | * | ||
4 | * numa: Simulate NUMA-sensitive workload and measure their NUMA performance | ||
5 | */ | ||
6 | |||
7 | #include "../perf.h" | ||
8 | #include "../builtin.h" | ||
9 | #include "../util/util.h" | ||
10 | #include "../util/parse-options.h" | ||
11 | |||
12 | #include "bench.h" | ||
13 | |||
14 | #include <errno.h> | ||
15 | #include <sched.h> | ||
16 | #include <stdio.h> | ||
17 | #include <assert.h> | ||
18 | #include <malloc.h> | ||
19 | #include <signal.h> | ||
20 | #include <stdlib.h> | ||
21 | #include <string.h> | ||
22 | #include <unistd.h> | ||
23 | #include <pthread.h> | ||
24 | #include <sys/mman.h> | ||
25 | #include <sys/time.h> | ||
26 | #include <sys/wait.h> | ||
27 | #include <sys/prctl.h> | ||
28 | #include <sys/types.h> | ||
29 | |||
30 | #include <numa.h> | ||
31 | #include <numaif.h> | ||
32 | |||
33 | /* | ||
34 | * Regular printout to the terminal, supressed if -q is specified: | ||
35 | */ | ||
36 | #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) | ||
37 | |||
38 | /* | ||
39 | * Debug printf: | ||
40 | */ | ||
41 | #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0) | ||
42 | |||
43 | struct thread_data { | ||
44 | int curr_cpu; | ||
45 | cpu_set_t bind_cpumask; | ||
46 | int bind_node; | ||
47 | u8 *process_data; | ||
48 | int process_nr; | ||
49 | int thread_nr; | ||
50 | int task_nr; | ||
51 | unsigned int loops_done; | ||
52 | u64 val; | ||
53 | u64 runtime_ns; | ||
54 | pthread_mutex_t *process_lock; | ||
55 | }; | ||
56 | |||
57 | /* Parameters set by options: */ | ||
58 | |||
59 | struct params { | ||
60 | /* Startup synchronization: */ | ||
61 | bool serialize_startup; | ||
62 | |||
63 | /* Task hierarchy: */ | ||
64 | int nr_proc; | ||
65 | int nr_threads; | ||
66 | |||
67 | /* Working set sizes: */ | ||
68 | const char *mb_global_str; | ||
69 | const char *mb_proc_str; | ||
70 | const char *mb_proc_locked_str; | ||
71 | const char *mb_thread_str; | ||
72 | |||
73 | double mb_global; | ||
74 | double mb_proc; | ||
75 | double mb_proc_locked; | ||
76 | double mb_thread; | ||
77 | |||
78 | /* Access patterns to the working set: */ | ||
79 | bool data_reads; | ||
80 | bool data_writes; | ||
81 | bool data_backwards; | ||
82 | bool data_zero_memset; | ||
83 | bool data_rand_walk; | ||
84 | u32 nr_loops; | ||
85 | u32 nr_secs; | ||
86 | u32 sleep_usecs; | ||
87 | |||
88 | /* Working set initialization: */ | ||
89 | bool init_zero; | ||
90 | bool init_random; | ||
91 | bool init_cpu0; | ||
92 | |||
93 | /* Misc options: */ | ||
94 | int show_details; | ||
95 | int run_all; | ||
96 | int thp; | ||
97 | |||
98 | long bytes_global; | ||
99 | long bytes_process; | ||
100 | long bytes_process_locked; | ||
101 | long bytes_thread; | ||
102 | |||
103 | int nr_tasks; | ||
104 | bool show_quiet; | ||
105 | |||
106 | bool show_convergence; | ||
107 | bool measure_convergence; | ||
108 | |||
109 | int perturb_secs; | ||
110 | int nr_cpus; | ||
111 | int nr_nodes; | ||
112 | |||
113 | /* Affinity options -C and -N: */ | ||
114 | char *cpu_list_str; | ||
115 | char *node_list_str; | ||
116 | }; | ||
117 | |||
118 | |||
119 | /* Global, read-writable area, accessible to all processes and threads: */ | ||
120 | |||
121 | struct global_info { | ||
122 | u8 *data; | ||
123 | |||
124 | pthread_mutex_t startup_mutex; | ||
125 | int nr_tasks_started; | ||
126 | |||
127 | pthread_mutex_t startup_done_mutex; | ||
128 | |||
129 | pthread_mutex_t start_work_mutex; | ||
130 | int nr_tasks_working; | ||
131 | |||
132 | pthread_mutex_t stop_work_mutex; | ||
133 | u64 bytes_done; | ||
134 | |||
135 | struct thread_data *threads; | ||
136 | |||
137 | /* Convergence latency measurement: */ | ||
138 | bool all_converged; | ||
139 | bool stop_work; | ||
140 | |||
141 | int print_once; | ||
142 | |||
143 | struct params p; | ||
144 | }; | ||
145 | |||
146 | static struct global_info *g = NULL; | ||
147 | |||
148 | static int parse_cpus_opt(const struct option *opt, const char *arg, int unset); | ||
149 | static int parse_nodes_opt(const struct option *opt, const char *arg, int unset); | ||
150 | |||
151 | struct params p0; | ||
152 | |||
153 | static const struct option options[] = { | ||
154 | OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"), | ||
155 | OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"), | ||
156 | |||
157 | OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"), | ||
158 | OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"), | ||
159 | OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), | ||
160 | OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), | ||
161 | |||
162 | OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), | ||
163 | OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), | ||
164 | OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), | ||
165 | |||
166 | OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), | ||
167 | OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), | ||
168 | OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), | ||
169 | OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), | ||
170 | OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"), | ||
171 | |||
172 | |||
173 | OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"), | ||
174 | OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"), | ||
175 | OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"), | ||
176 | OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"), | ||
177 | |||
178 | OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), | ||
179 | OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), | ||
180 | OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), | ||
181 | OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), | ||
182 | OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), | ||
183 | OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), | ||
184 | OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), | ||
185 | |||
186 | /* Special option string parsing callbacks: */ | ||
187 | OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]", | ||
188 | "bind the first N tasks to these specific cpus (the rest is unbound)", | ||
189 | parse_cpus_opt), | ||
190 | OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]", | ||
191 | "bind the first N tasks to these specific memory nodes (the rest is unbound)", | ||
192 | parse_nodes_opt), | ||
193 | OPT_END() | ||
194 | }; | ||
195 | |||
196 | static const char * const bench_numa_usage[] = { | ||
197 | "perf bench numa <options>", | ||
198 | NULL | ||
199 | }; | ||
200 | |||
201 | static const char * const numa_usage[] = { | ||
202 | "perf bench numa mem [<options>]", | ||
203 | NULL | ||
204 | }; | ||
205 | |||
206 | static cpu_set_t bind_to_cpu(int target_cpu) | ||
207 | { | ||
208 | cpu_set_t orig_mask, mask; | ||
209 | int ret; | ||
210 | |||
211 | ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); | ||
212 | BUG_ON(ret); | ||
213 | |||
214 | CPU_ZERO(&mask); | ||
215 | |||
216 | if (target_cpu == -1) { | ||
217 | int cpu; | ||
218 | |||
219 | for (cpu = 0; cpu < g->p.nr_cpus; cpu++) | ||
220 | CPU_SET(cpu, &mask); | ||
221 | } else { | ||
222 | BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); | ||
223 | CPU_SET(target_cpu, &mask); | ||
224 | } | ||
225 | |||
226 | ret = sched_setaffinity(0, sizeof(mask), &mask); | ||
227 | BUG_ON(ret); | ||
228 | |||
229 | return orig_mask; | ||
230 | } | ||
231 | |||
232 | static cpu_set_t bind_to_node(int target_node) | ||
233 | { | ||
234 | int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; | ||
235 | cpu_set_t orig_mask, mask; | ||
236 | int cpu; | ||
237 | int ret; | ||
238 | |||
239 | BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); | ||
240 | BUG_ON(!cpus_per_node); | ||
241 | |||
242 | ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); | ||
243 | BUG_ON(ret); | ||
244 | |||
245 | CPU_ZERO(&mask); | ||
246 | |||
247 | if (target_node == -1) { | ||
248 | for (cpu = 0; cpu < g->p.nr_cpus; cpu++) | ||
249 | CPU_SET(cpu, &mask); | ||
250 | } else { | ||
251 | int cpu_start = (target_node + 0) * cpus_per_node; | ||
252 | int cpu_stop = (target_node + 1) * cpus_per_node; | ||
253 | |||
254 | BUG_ON(cpu_stop > g->p.nr_cpus); | ||
255 | |||
256 | for (cpu = cpu_start; cpu < cpu_stop; cpu++) | ||
257 | CPU_SET(cpu, &mask); | ||
258 | } | ||
259 | |||
260 | ret = sched_setaffinity(0, sizeof(mask), &mask); | ||
261 | BUG_ON(ret); | ||
262 | |||
263 | return orig_mask; | ||
264 | } | ||
265 | |||
266 | static void bind_to_cpumask(cpu_set_t mask) | ||
267 | { | ||
268 | int ret; | ||
269 | |||
270 | ret = sched_setaffinity(0, sizeof(mask), &mask); | ||
271 | BUG_ON(ret); | ||
272 | } | ||
273 | |||
274 | static void mempol_restore(void) | ||
275 | { | ||
276 | int ret; | ||
277 | |||
278 | ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); | ||
279 | |||
280 | BUG_ON(ret); | ||
281 | } | ||
282 | |||
283 | static void bind_to_memnode(int node) | ||
284 | { | ||
285 | unsigned long nodemask; | ||
286 | int ret; | ||
287 | |||
288 | if (node == -1) | ||
289 | return; | ||
290 | |||
291 | BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); | ||
292 | nodemask = 1L << node; | ||
293 | |||
294 | ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); | ||
295 | dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); | ||
296 | |||
297 | BUG_ON(ret); | ||
298 | } | ||
299 | |||
300 | #define HPSIZE (2*1024*1024) | ||
301 | |||
302 | #define set_taskname(fmt...) \ | ||
303 | do { \ | ||
304 | char name[20]; \ | ||
305 | \ | ||
306 | snprintf(name, 20, fmt); \ | ||
307 | prctl(PR_SET_NAME, name); \ | ||
308 | } while (0) | ||
309 | |||
310 | static u8 *alloc_data(ssize_t bytes0, int map_flags, | ||
311 | int init_zero, int init_cpu0, int thp, int init_random) | ||
312 | { | ||
313 | cpu_set_t orig_mask; | ||
314 | ssize_t bytes; | ||
315 | u8 *buf; | ||
316 | int ret; | ||
317 | |||
318 | if (!bytes0) | ||
319 | return NULL; | ||
320 | |||
321 | /* Allocate and initialize all memory on CPU#0: */ | ||
322 | if (init_cpu0) { | ||
323 | orig_mask = bind_to_node(0); | ||
324 | bind_to_memnode(0); | ||
325 | } | ||
326 | |||
327 | bytes = bytes0 + HPSIZE; | ||
328 | |||
329 | buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); | ||
330 | BUG_ON(buf == (void *)-1); | ||
331 | |||
332 | if (map_flags == MAP_PRIVATE) { | ||
333 | if (thp > 0) { | ||
334 | ret = madvise(buf, bytes, MADV_HUGEPAGE); | ||
335 | if (ret && !g->print_once) { | ||
336 | g->print_once = 1; | ||
337 | printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n"); | ||
338 | } | ||
339 | } | ||
340 | if (thp < 0) { | ||
341 | ret = madvise(buf, bytes, MADV_NOHUGEPAGE); | ||
342 | if (ret && !g->print_once) { | ||
343 | g->print_once = 1; | ||
344 | printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n"); | ||
345 | } | ||
346 | } | ||
347 | } | ||
348 | |||
349 | if (init_zero) { | ||
350 | bzero(buf, bytes); | ||
351 | } else { | ||
352 | /* Initialize random contents, different in each word: */ | ||
353 | if (init_random) { | ||
354 | u64 *wbuf = (void *)buf; | ||
355 | long off = rand(); | ||
356 | long i; | ||
357 | |||
358 | for (i = 0; i < bytes/8; i++) | ||
359 | wbuf[i] = i + off; | ||
360 | } | ||
361 | } | ||
362 | |||
363 | /* Align to 2MB boundary: */ | ||
364 | buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); | ||
365 | |||
366 | /* Restore affinity: */ | ||
367 | if (init_cpu0) { | ||
368 | bind_to_cpumask(orig_mask); | ||
369 | mempol_restore(); | ||
370 | } | ||
371 | |||
372 | return buf; | ||
373 | } | ||
374 | |||
375 | static void free_data(void *data, ssize_t bytes) | ||
376 | { | ||
377 | int ret; | ||
378 | |||
379 | if (!data) | ||
380 | return; | ||
381 | |||
382 | ret = munmap(data, bytes); | ||
383 | BUG_ON(ret); | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * Create a shared memory buffer that can be shared between processes, zeroed: | ||
388 | */ | ||
389 | static void * zalloc_shared_data(ssize_t bytes) | ||
390 | { | ||
391 | return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Create a shared memory buffer that can be shared between processes: | ||
396 | */ | ||
397 | static void * setup_shared_data(ssize_t bytes) | ||
398 | { | ||
399 | return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Allocate process-local memory - this will either be shared between | ||
404 | * threads of this process, or only be accessed by this thread: | ||
405 | */ | ||
406 | static void * setup_private_data(ssize_t bytes) | ||
407 | { | ||
408 | return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * Return a process-shared (global) mutex: | ||
413 | */ | ||
414 | static void init_global_mutex(pthread_mutex_t *mutex) | ||
415 | { | ||
416 | pthread_mutexattr_t attr; | ||
417 | |||
418 | pthread_mutexattr_init(&attr); | ||
419 | pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | ||
420 | pthread_mutex_init(mutex, &attr); | ||
421 | } | ||
422 | |||
423 | static int parse_cpu_list(const char *arg) | ||
424 | { | ||
425 | p0.cpu_list_str = strdup(arg); | ||
426 | |||
427 | dprintf("got CPU list: {%s}\n", p0.cpu_list_str); | ||
428 | |||
429 | return 0; | ||
430 | } | ||
431 | |||
432 | static void parse_setup_cpu_list(void) | ||
433 | { | ||
434 | struct thread_data *td; | ||
435 | char *str0, *str; | ||
436 | int t; | ||
437 | |||
438 | if (!g->p.cpu_list_str) | ||
439 | return; | ||
440 | |||
441 | dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); | ||
442 | |||
443 | str0 = str = strdup(g->p.cpu_list_str); | ||
444 | t = 0; | ||
445 | |||
446 | BUG_ON(!str); | ||
447 | |||
448 | tprintf("# binding tasks to CPUs:\n"); | ||
449 | tprintf("# "); | ||
450 | |||
451 | while (true) { | ||
452 | int bind_cpu, bind_cpu_0, bind_cpu_1; | ||
453 | char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; | ||
454 | int bind_len; | ||
455 | int step; | ||
456 | int mul; | ||
457 | |||
458 | tok = strsep(&str, ","); | ||
459 | if (!tok) | ||
460 | break; | ||
461 | |||
462 | tok_end = strstr(tok, "-"); | ||
463 | |||
464 | dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); | ||
465 | if (!tok_end) { | ||
466 | /* Single CPU specified: */ | ||
467 | bind_cpu_0 = bind_cpu_1 = atol(tok); | ||
468 | } else { | ||
469 | /* CPU range specified (for example: "5-11"): */ | ||
470 | bind_cpu_0 = atol(tok); | ||
471 | bind_cpu_1 = atol(tok_end + 1); | ||
472 | } | ||
473 | |||
474 | step = 1; | ||
475 | tok_step = strstr(tok, "#"); | ||
476 | if (tok_step) { | ||
477 | step = atol(tok_step + 1); | ||
478 | BUG_ON(step <= 0 || step >= g->p.nr_cpus); | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * Mask length. | ||
483 | * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', | ||
484 | * where the _4 means the next 4 CPUs are allowed. | ||
485 | */ | ||
486 | bind_len = 1; | ||
487 | tok_len = strstr(tok, "_"); | ||
488 | if (tok_len) { | ||
489 | bind_len = atol(tok_len + 1); | ||
490 | BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); | ||
491 | } | ||
492 | |||
493 | /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ | ||
494 | mul = 1; | ||
495 | tok_mul = strstr(tok, "x"); | ||
496 | if (tok_mul) { | ||
497 | mul = atol(tok_mul + 1); | ||
498 | BUG_ON(mul <= 0); | ||
499 | } | ||
500 | |||
501 | dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); | ||
502 | |||
503 | BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus); | ||
504 | BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus); | ||
505 | BUG_ON(bind_cpu_0 > bind_cpu_1); | ||
506 | |||
507 | for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { | ||
508 | int i; | ||
509 | |||
510 | for (i = 0; i < mul; i++) { | ||
511 | int cpu; | ||
512 | |||
513 | if (t >= g->p.nr_tasks) { | ||
514 | printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); | ||
515 | goto out; | ||
516 | } | ||
517 | td = g->threads + t; | ||
518 | |||
519 | if (t) | ||
520 | tprintf(","); | ||
521 | if (bind_len > 1) { | ||
522 | tprintf("%2d/%d", bind_cpu, bind_len); | ||
523 | } else { | ||
524 | tprintf("%2d", bind_cpu); | ||
525 | } | ||
526 | |||
527 | CPU_ZERO(&td->bind_cpumask); | ||
528 | for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { | ||
529 | BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); | ||
530 | CPU_SET(cpu, &td->bind_cpumask); | ||
531 | } | ||
532 | t++; | ||
533 | } | ||
534 | } | ||
535 | } | ||
536 | out: | ||
537 | |||
538 | tprintf("\n"); | ||
539 | |||
540 | if (t < g->p.nr_tasks) | ||
541 | printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); | ||
542 | |||
543 | free(str0); | ||
544 | } | ||
545 | |||
546 | static int parse_cpus_opt(const struct option *opt __maybe_unused, | ||
547 | const char *arg, int unset __maybe_unused) | ||
548 | { | ||
549 | if (!arg) | ||
550 | return -1; | ||
551 | |||
552 | return parse_cpu_list(arg); | ||
553 | } | ||
554 | |||
555 | static int parse_node_list(const char *arg) | ||
556 | { | ||
557 | p0.node_list_str = strdup(arg); | ||
558 | |||
559 | dprintf("got NODE list: {%s}\n", p0.node_list_str); | ||
560 | |||
561 | return 0; | ||
562 | } | ||
563 | |||
564 | static void parse_setup_node_list(void) | ||
565 | { | ||
566 | struct thread_data *td; | ||
567 | char *str0, *str; | ||
568 | int t; | ||
569 | |||
570 | if (!g->p.node_list_str) | ||
571 | return; | ||
572 | |||
573 | dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); | ||
574 | |||
575 | str0 = str = strdup(g->p.node_list_str); | ||
576 | t = 0; | ||
577 | |||
578 | BUG_ON(!str); | ||
579 | |||
580 | tprintf("# binding tasks to NODEs:\n"); | ||
581 | tprintf("# "); | ||
582 | |||
583 | while (true) { | ||
584 | int bind_node, bind_node_0, bind_node_1; | ||
585 | char *tok, *tok_end, *tok_step, *tok_mul; | ||
586 | int step; | ||
587 | int mul; | ||
588 | |||
589 | tok = strsep(&str, ","); | ||
590 | if (!tok) | ||
591 | break; | ||
592 | |||
593 | tok_end = strstr(tok, "-"); | ||
594 | |||
595 | dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); | ||
596 | if (!tok_end) { | ||
597 | /* Single NODE specified: */ | ||
598 | bind_node_0 = bind_node_1 = atol(tok); | ||
599 | } else { | ||
600 | /* NODE range specified (for example: "5-11"): */ | ||
601 | bind_node_0 = atol(tok); | ||
602 | bind_node_1 = atol(tok_end + 1); | ||
603 | } | ||
604 | |||
605 | step = 1; | ||
606 | tok_step = strstr(tok, "#"); | ||
607 | if (tok_step) { | ||
608 | step = atol(tok_step + 1); | ||
609 | BUG_ON(step <= 0 || step >= g->p.nr_nodes); | ||
610 | } | ||
611 | |||
612 | /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ | ||
613 | mul = 1; | ||
614 | tok_mul = strstr(tok, "x"); | ||
615 | if (tok_mul) { | ||
616 | mul = atol(tok_mul + 1); | ||
617 | BUG_ON(mul <= 0); | ||
618 | } | ||
619 | |||
620 | dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); | ||
621 | |||
622 | BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes); | ||
623 | BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes); | ||
624 | BUG_ON(bind_node_0 > bind_node_1); | ||
625 | |||
626 | for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { | ||
627 | int i; | ||
628 | |||
629 | for (i = 0; i < mul; i++) { | ||
630 | if (t >= g->p.nr_tasks) { | ||
631 | printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); | ||
632 | goto out; | ||
633 | } | ||
634 | td = g->threads + t; | ||
635 | |||
636 | if (!t) | ||
637 | tprintf(" %2d", bind_node); | ||
638 | else | ||
639 | tprintf(",%2d", bind_node); | ||
640 | |||
641 | td->bind_node = bind_node; | ||
642 | t++; | ||
643 | } | ||
644 | } | ||
645 | } | ||
646 | out: | ||
647 | |||
648 | tprintf("\n"); | ||
649 | |||
650 | if (t < g->p.nr_tasks) | ||
651 | printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); | ||
652 | |||
653 | free(str0); | ||
654 | } | ||
655 | |||
656 | static int parse_nodes_opt(const struct option *opt __maybe_unused, | ||
657 | const char *arg, int unset __maybe_unused) | ||
658 | { | ||
659 | if (!arg) | ||
660 | return -1; | ||
661 | |||
662 | return parse_node_list(arg); | ||
663 | |||
664 | return 0; | ||
665 | } | ||
666 | |||
667 | #define BIT(x) (1ul << x) | ||
668 | |||
669 | static inline uint32_t lfsr_32(uint32_t lfsr) | ||
670 | { | ||
671 | const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); | ||
672 | return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | * Make sure there's real data dependency to RAM (when read | ||
677 | * accesses are enabled), so the compiler, the CPU and the | ||
678 | * kernel (KSM, zero page, etc.) cannot optimize away RAM | ||
679 | * accesses: | ||
680 | */ | ||
681 | static inline u64 access_data(u64 *data __attribute__((unused)), u64 val) | ||
682 | { | ||
683 | if (g->p.data_reads) | ||
684 | val += *data; | ||
685 | if (g->p.data_writes) | ||
686 | *data = val + 1; | ||
687 | return val; | ||
688 | } | ||
689 | |||
690 | /* | ||
691 | * The worker process does two types of work, a forwards going | ||
692 | * loop and a backwards going loop. | ||
693 | * | ||
694 | * We do this so that on multiprocessor systems we do not create | ||
695 | * a 'train' of processing, with highly synchronized processes, | ||
696 | * skewing the whole benchmark. | ||
697 | */ | ||
698 | static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val) | ||
699 | { | ||
700 | long words = bytes/sizeof(u64); | ||
701 | u64 *data = (void *)__data; | ||
702 | long chunk_0, chunk_1; | ||
703 | u64 *d0, *d, *d1; | ||
704 | long off; | ||
705 | long i; | ||
706 | |||
707 | BUG_ON(!data && words); | ||
708 | BUG_ON(data && !words); | ||
709 | |||
710 | if (!data) | ||
711 | return val; | ||
712 | |||
713 | /* Very simple memset() work variant: */ | ||
714 | if (g->p.data_zero_memset && !g->p.data_rand_walk) { | ||
715 | bzero(data, bytes); | ||
716 | return val; | ||
717 | } | ||
718 | |||
719 | /* Spread out by PID/TID nr and by loop nr: */ | ||
720 | chunk_0 = words/nr_max; | ||
721 | chunk_1 = words/g->p.nr_loops; | ||
722 | off = nr*chunk_0 + loop*chunk_1; | ||
723 | |||
724 | while (off >= words) | ||
725 | off -= words; | ||
726 | |||
727 | if (g->p.data_rand_walk) { | ||
728 | u32 lfsr = nr + loop + val; | ||
729 | int j; | ||
730 | |||
731 | for (i = 0; i < words/1024; i++) { | ||
732 | long start, end; | ||
733 | |||
734 | lfsr = lfsr_32(lfsr); | ||
735 | |||
736 | start = lfsr % words; | ||
737 | end = min(start + 1024, words-1); | ||
738 | |||
739 | if (g->p.data_zero_memset) { | ||
740 | bzero(data + start, (end-start) * sizeof(u64)); | ||
741 | } else { | ||
742 | for (j = start; j < end; j++) | ||
743 | val = access_data(data + j, val); | ||
744 | } | ||
745 | } | ||
746 | } else if (!g->p.data_backwards || (nr + loop) & 1) { | ||
747 | |||
748 | d0 = data + off; | ||
749 | d = data + off + 1; | ||
750 | d1 = data + words; | ||
751 | |||
752 | /* Process data forwards: */ | ||
753 | for (;;) { | ||
754 | if (unlikely(d >= d1)) | ||
755 | d = data; | ||
756 | if (unlikely(d == d0)) | ||
757 | break; | ||
758 | |||
759 | val = access_data(d, val); | ||
760 | |||
761 | d++; | ||
762 | } | ||
763 | } else { | ||
764 | /* Process data backwards: */ | ||
765 | |||
766 | d0 = data + off; | ||
767 | d = data + off - 1; | ||
768 | d1 = data + words; | ||
769 | |||
770 | /* Process data forwards: */ | ||
771 | for (;;) { | ||
772 | if (unlikely(d < data)) | ||
773 | d = data + words-1; | ||
774 | if (unlikely(d == d0)) | ||
775 | break; | ||
776 | |||
777 | val = access_data(d, val); | ||
778 | |||
779 | d--; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | return val; | ||
784 | } | ||
785 | |||
786 | static void update_curr_cpu(int task_nr, unsigned long bytes_worked) | ||
787 | { | ||
788 | unsigned int cpu; | ||
789 | |||
790 | cpu = sched_getcpu(); | ||
791 | |||
792 | g->threads[task_nr].curr_cpu = cpu; | ||
793 | prctl(0, bytes_worked); | ||
794 | } | ||
795 | |||
796 | #define MAX_NR_NODES 64 | ||
797 | |||
798 | /* | ||
799 | * Count the number of nodes a process's threads | ||
800 | * are spread out on. | ||
801 | * | ||
802 | * A count of 1 means that the process is compressed | ||
803 | * to a single node. A count of g->p.nr_nodes means it's | ||
804 | * spread out on the whole system. | ||
805 | */ | ||
806 | static int count_process_nodes(int process_nr) | ||
807 | { | ||
808 | char node_present[MAX_NR_NODES] = { 0, }; | ||
809 | int nodes; | ||
810 | int n, t; | ||
811 | |||
812 | for (t = 0; t < g->p.nr_threads; t++) { | ||
813 | struct thread_data *td; | ||
814 | int task_nr; | ||
815 | int node; | ||
816 | |||
817 | task_nr = process_nr*g->p.nr_threads + t; | ||
818 | td = g->threads + task_nr; | ||
819 | |||
820 | node = numa_node_of_cpu(td->curr_cpu); | ||
821 | node_present[node] = 1; | ||
822 | } | ||
823 | |||
824 | nodes = 0; | ||
825 | |||
826 | for (n = 0; n < MAX_NR_NODES; n++) | ||
827 | nodes += node_present[n]; | ||
828 | |||
829 | return nodes; | ||
830 | } | ||
831 | |||
832 | /* | ||
833 | * Count the number of distinct process-threads a node contains. | ||
834 | * | ||
835 | * A count of 1 means that the node contains only a single | ||
836 | * process. If all nodes on the system contain at most one | ||
837 | * process then we are well-converged. | ||
838 | */ | ||
839 | static int count_node_processes(int node) | ||
840 | { | ||
841 | int processes = 0; | ||
842 | int t, p; | ||
843 | |||
844 | for (p = 0; p < g->p.nr_proc; p++) { | ||
845 | for (t = 0; t < g->p.nr_threads; t++) { | ||
846 | struct thread_data *td; | ||
847 | int task_nr; | ||
848 | int n; | ||
849 | |||
850 | task_nr = p*g->p.nr_threads + t; | ||
851 | td = g->threads + task_nr; | ||
852 | |||
853 | n = numa_node_of_cpu(td->curr_cpu); | ||
854 | if (n == node) { | ||
855 | processes++; | ||
856 | break; | ||
857 | } | ||
858 | } | ||
859 | } | ||
860 | |||
861 | return processes; | ||
862 | } | ||
863 | |||
864 | static void calc_convergence_compression(int *strong) | ||
865 | { | ||
866 | unsigned int nodes_min, nodes_max; | ||
867 | int p; | ||
868 | |||
869 | nodes_min = -1; | ||
870 | nodes_max = 0; | ||
871 | |||
872 | for (p = 0; p < g->p.nr_proc; p++) { | ||
873 | unsigned int nodes = count_process_nodes(p); | ||
874 | |||
875 | nodes_min = min(nodes, nodes_min); | ||
876 | nodes_max = max(nodes, nodes_max); | ||
877 | } | ||
878 | |||
879 | /* Strong convergence: all threads compress on a single node: */ | ||
880 | if (nodes_min == 1 && nodes_max == 1) { | ||
881 | *strong = 1; | ||
882 | } else { | ||
883 | *strong = 0; | ||
884 | tprintf(" {%d-%d}", nodes_min, nodes_max); | ||
885 | } | ||
886 | } | ||
887 | |||
888 | static void calc_convergence(double runtime_ns_max, double *convergence) | ||
889 | { | ||
890 | unsigned int loops_done_min, loops_done_max; | ||
891 | int process_groups; | ||
892 | int nodes[MAX_NR_NODES]; | ||
893 | int distance; | ||
894 | int nr_min; | ||
895 | int nr_max; | ||
896 | int strong; | ||
897 | int sum; | ||
898 | int nr; | ||
899 | int node; | ||
900 | int cpu; | ||
901 | int t; | ||
902 | |||
903 | if (!g->p.show_convergence && !g->p.measure_convergence) | ||
904 | return; | ||
905 | |||
906 | for (node = 0; node < g->p.nr_nodes; node++) | ||
907 | nodes[node] = 0; | ||
908 | |||
909 | loops_done_min = -1; | ||
910 | loops_done_max = 0; | ||
911 | |||
912 | for (t = 0; t < g->p.nr_tasks; t++) { | ||
913 | struct thread_data *td = g->threads + t; | ||
914 | unsigned int loops_done; | ||
915 | |||
916 | cpu = td->curr_cpu; | ||
917 | |||
918 | /* Not all threads have written it yet: */ | ||
919 | if (cpu < 0) | ||
920 | continue; | ||
921 | |||
922 | node = numa_node_of_cpu(cpu); | ||
923 | |||
924 | nodes[node]++; | ||
925 | |||
926 | loops_done = td->loops_done; | ||
927 | loops_done_min = min(loops_done, loops_done_min); | ||
928 | loops_done_max = max(loops_done, loops_done_max); | ||
929 | } | ||
930 | |||
931 | nr_max = 0; | ||
932 | nr_min = g->p.nr_tasks; | ||
933 | sum = 0; | ||
934 | |||
935 | for (node = 0; node < g->p.nr_nodes; node++) { | ||
936 | nr = nodes[node]; | ||
937 | nr_min = min(nr, nr_min); | ||
938 | nr_max = max(nr, nr_max); | ||
939 | sum += nr; | ||
940 | } | ||
941 | BUG_ON(nr_min > nr_max); | ||
942 | |||
943 | BUG_ON(sum > g->p.nr_tasks); | ||
944 | |||
945 | if (0 && (sum < g->p.nr_tasks)) | ||
946 | return; | ||
947 | |||
948 | /* | ||
949 | * Count the number of distinct process groups present | ||
950 | * on nodes - when we are converged this will decrease | ||
951 | * to g->p.nr_proc: | ||
952 | */ | ||
953 | process_groups = 0; | ||
954 | |||
955 | for (node = 0; node < g->p.nr_nodes; node++) { | ||
956 | int processes = count_node_processes(node); | ||
957 | |||
958 | nr = nodes[node]; | ||
959 | tprintf(" %2d/%-2d", nr, processes); | ||
960 | |||
961 | process_groups += processes; | ||
962 | } | ||
963 | |||
964 | distance = nr_max - nr_min; | ||
965 | |||
966 | tprintf(" [%2d/%-2d]", distance, process_groups); | ||
967 | |||
968 | tprintf(" l:%3d-%-3d (%3d)", | ||
969 | loops_done_min, loops_done_max, loops_done_max-loops_done_min); | ||
970 | |||
971 | if (loops_done_min && loops_done_max) { | ||
972 | double skew = 1.0 - (double)loops_done_min/loops_done_max; | ||
973 | |||
974 | tprintf(" [%4.1f%%]", skew * 100.0); | ||
975 | } | ||
976 | |||
977 | calc_convergence_compression(&strong); | ||
978 | |||
979 | if (strong && process_groups == g->p.nr_proc) { | ||
980 | if (!*convergence) { | ||
981 | *convergence = runtime_ns_max; | ||
982 | tprintf(" (%6.1fs converged)\n", *convergence/1e9); | ||
983 | if (g->p.measure_convergence) { | ||
984 | g->all_converged = true; | ||
985 | g->stop_work = true; | ||
986 | } | ||
987 | } | ||
988 | } else { | ||
989 | if (*convergence) { | ||
990 | tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9); | ||
991 | *convergence = 0; | ||
992 | } | ||
993 | tprintf("\n"); | ||
994 | } | ||
995 | } | ||
996 | |||
997 | static void show_summary(double runtime_ns_max, int l, double *convergence) | ||
998 | { | ||
999 | tprintf("\r # %5.1f%% [%.1f mins]", | ||
1000 | (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0); | ||
1001 | |||
1002 | calc_convergence(runtime_ns_max, convergence); | ||
1003 | |||
1004 | if (g->p.show_details >= 0) | ||
1005 | fflush(stdout); | ||
1006 | } | ||
1007 | |||
1008 | static void *worker_thread(void *__tdata) | ||
1009 | { | ||
1010 | struct thread_data *td = __tdata; | ||
1011 | struct timeval start0, start, stop, diff; | ||
1012 | int process_nr = td->process_nr; | ||
1013 | int thread_nr = td->thread_nr; | ||
1014 | unsigned long last_perturbance; | ||
1015 | int task_nr = td->task_nr; | ||
1016 | int details = g->p.show_details; | ||
1017 | int first_task, last_task; | ||
1018 | double convergence = 0; | ||
1019 | u64 val = td->val; | ||
1020 | double runtime_ns_max; | ||
1021 | u8 *global_data; | ||
1022 | u8 *process_data; | ||
1023 | u8 *thread_data; | ||
1024 | u64 bytes_done; | ||
1025 | long work_done; | ||
1026 | u32 l; | ||
1027 | |||
1028 | bind_to_cpumask(td->bind_cpumask); | ||
1029 | bind_to_memnode(td->bind_node); | ||
1030 | |||
1031 | set_taskname("thread %d/%d", process_nr, thread_nr); | ||
1032 | |||
1033 | global_data = g->data; | ||
1034 | process_data = td->process_data; | ||
1035 | thread_data = setup_private_data(g->p.bytes_thread); | ||
1036 | |||
1037 | bytes_done = 0; | ||
1038 | |||
1039 | last_task = 0; | ||
1040 | if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1) | ||
1041 | last_task = 1; | ||
1042 | |||
1043 | first_task = 0; | ||
1044 | if (process_nr == 0 && thread_nr == 0) | ||
1045 | first_task = 1; | ||
1046 | |||
1047 | if (details >= 2) { | ||
1048 | printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n", | ||
1049 | process_nr, thread_nr, global_data, process_data, thread_data); | ||
1050 | } | ||
1051 | |||
1052 | if (g->p.serialize_startup) { | ||
1053 | pthread_mutex_lock(&g->startup_mutex); | ||
1054 | g->nr_tasks_started++; | ||
1055 | pthread_mutex_unlock(&g->startup_mutex); | ||
1056 | |||
1057 | /* Here we will wait for the main process to start us all at once: */ | ||
1058 | pthread_mutex_lock(&g->start_work_mutex); | ||
1059 | g->nr_tasks_working++; | ||
1060 | |||
1061 | /* Last one wake the main process: */ | ||
1062 | if (g->nr_tasks_working == g->p.nr_tasks) | ||
1063 | pthread_mutex_unlock(&g->startup_done_mutex); | ||
1064 | |||
1065 | pthread_mutex_unlock(&g->start_work_mutex); | ||
1066 | } | ||
1067 | |||
1068 | gettimeofday(&start0, NULL); | ||
1069 | |||
1070 | start = stop = start0; | ||
1071 | last_perturbance = start.tv_sec; | ||
1072 | |||
1073 | for (l = 0; l < g->p.nr_loops; l++) { | ||
1074 | start = stop; | ||
1075 | |||
1076 | if (g->stop_work) | ||
1077 | break; | ||
1078 | |||
1079 | val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val); | ||
1080 | val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val); | ||
1081 | val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val); | ||
1082 | |||
1083 | if (g->p.sleep_usecs) { | ||
1084 | pthread_mutex_lock(td->process_lock); | ||
1085 | usleep(g->p.sleep_usecs); | ||
1086 | pthread_mutex_unlock(td->process_lock); | ||
1087 | } | ||
1088 | /* | ||
1089 | * Amount of work to be done under a process-global lock: | ||
1090 | */ | ||
1091 | if (g->p.bytes_process_locked) { | ||
1092 | pthread_mutex_lock(td->process_lock); | ||
1093 | val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val); | ||
1094 | pthread_mutex_unlock(td->process_lock); | ||
1095 | } | ||
1096 | |||
1097 | work_done = g->p.bytes_global + g->p.bytes_process + | ||
1098 | g->p.bytes_process_locked + g->p.bytes_thread; | ||
1099 | |||
1100 | update_curr_cpu(task_nr, work_done); | ||
1101 | bytes_done += work_done; | ||
1102 | |||
1103 | if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs) | ||
1104 | continue; | ||
1105 | |||
1106 | td->loops_done = l; | ||
1107 | |||
1108 | gettimeofday(&stop, NULL); | ||
1109 | |||
1110 | /* Check whether our max runtime timed out: */ | ||
1111 | if (g->p.nr_secs) { | ||
1112 | timersub(&stop, &start0, &diff); | ||
1113 | if (diff.tv_sec >= g->p.nr_secs) { | ||
1114 | g->stop_work = true; | ||
1115 | break; | ||
1116 | } | ||
1117 | } | ||
1118 | |||
1119 | /* Update the summary at most once per second: */ | ||
1120 | if (start.tv_sec == stop.tv_sec) | ||
1121 | continue; | ||
1122 | |||
1123 | /* | ||
1124 | * Perturb the first task's equilibrium every g->p.perturb_secs seconds, | ||
1125 | * by migrating to CPU#0: | ||
1126 | */ | ||
1127 | if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { | ||
1128 | cpu_set_t orig_mask; | ||
1129 | int target_cpu; | ||
1130 | int this_cpu; | ||
1131 | |||
1132 | last_perturbance = stop.tv_sec; | ||
1133 | |||
1134 | /* | ||
1135 | * Depending on where we are running, move into | ||
1136 | * the other half of the system, to create some | ||
1137 | * real disturbance: | ||
1138 | */ | ||
1139 | this_cpu = g->threads[task_nr].curr_cpu; | ||
1140 | if (this_cpu < g->p.nr_cpus/2) | ||
1141 | target_cpu = g->p.nr_cpus-1; | ||
1142 | else | ||
1143 | target_cpu = 0; | ||
1144 | |||
1145 | orig_mask = bind_to_cpu(target_cpu); | ||
1146 | |||
1147 | /* Here we are running on the target CPU already */ | ||
1148 | if (details >= 1) | ||
1149 | printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); | ||
1150 | |||
1151 | bind_to_cpumask(orig_mask); | ||
1152 | } | ||
1153 | |||
1154 | if (details >= 3) { | ||
1155 | timersub(&stop, &start, &diff); | ||
1156 | runtime_ns_max = diff.tv_sec * 1000000000; | ||
1157 | runtime_ns_max += diff.tv_usec * 1000; | ||
1158 | |||
1159 | if (details >= 0) { | ||
1160 | printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", | ||
1161 | process_nr, thread_nr, runtime_ns_max / bytes_done, val); | ||
1162 | } | ||
1163 | fflush(stdout); | ||
1164 | } | ||
1165 | if (!last_task) | ||
1166 | continue; | ||
1167 | |||
1168 | timersub(&stop, &start0, &diff); | ||
1169 | runtime_ns_max = diff.tv_sec * 1000000000ULL; | ||
1170 | runtime_ns_max += diff.tv_usec * 1000ULL; | ||
1171 | |||
1172 | show_summary(runtime_ns_max, l, &convergence); | ||
1173 | } | ||
1174 | |||
1175 | gettimeofday(&stop, NULL); | ||
1176 | timersub(&stop, &start0, &diff); | ||
1177 | td->runtime_ns = diff.tv_sec * 1000000000ULL; | ||
1178 | td->runtime_ns += diff.tv_usec * 1000ULL; | ||
1179 | |||
1180 | free_data(thread_data, g->p.bytes_thread); | ||
1181 | |||
1182 | pthread_mutex_lock(&g->stop_work_mutex); | ||
1183 | g->bytes_done += bytes_done; | ||
1184 | pthread_mutex_unlock(&g->stop_work_mutex); | ||
1185 | |||
1186 | return NULL; | ||
1187 | } | ||
1188 | |||
1189 | /* | ||
1190 | * A worker process starts a couple of threads: | ||
1191 | */ | ||
1192 | static void worker_process(int process_nr) | ||
1193 | { | ||
1194 | pthread_mutex_t process_lock; | ||
1195 | struct thread_data *td; | ||
1196 | pthread_t *pthreads; | ||
1197 | u8 *process_data; | ||
1198 | int task_nr; | ||
1199 | int ret; | ||
1200 | int t; | ||
1201 | |||
1202 | pthread_mutex_init(&process_lock, NULL); | ||
1203 | set_taskname("process %d", process_nr); | ||
1204 | |||
1205 | /* | ||
1206 | * Pick up the memory policy and the CPU binding of our first thread, | ||
1207 | * so that we initialize memory accordingly: | ||
1208 | */ | ||
1209 | task_nr = process_nr*g->p.nr_threads; | ||
1210 | td = g->threads + task_nr; | ||
1211 | |||
1212 | bind_to_memnode(td->bind_node); | ||
1213 | bind_to_cpumask(td->bind_cpumask); | ||
1214 | |||
1215 | pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t)); | ||
1216 | process_data = setup_private_data(g->p.bytes_process); | ||
1217 | |||
1218 | if (g->p.show_details >= 3) { | ||
1219 | printf(" # process %2d global mem: %p, process mem: %p\n", | ||
1220 | process_nr, g->data, process_data); | ||
1221 | } | ||
1222 | |||
1223 | for (t = 0; t < g->p.nr_threads; t++) { | ||
1224 | task_nr = process_nr*g->p.nr_threads + t; | ||
1225 | td = g->threads + task_nr; | ||
1226 | |||
1227 | td->process_data = process_data; | ||
1228 | td->process_nr = process_nr; | ||
1229 | td->thread_nr = t; | ||
1230 | td->task_nr = task_nr; | ||
1231 | td->val = rand(); | ||
1232 | td->curr_cpu = -1; | ||
1233 | td->process_lock = &process_lock; | ||
1234 | |||
1235 | ret = pthread_create(pthreads + t, NULL, worker_thread, td); | ||
1236 | BUG_ON(ret); | ||
1237 | } | ||
1238 | |||
1239 | for (t = 0; t < g->p.nr_threads; t++) { | ||
1240 | ret = pthread_join(pthreads[t], NULL); | ||
1241 | BUG_ON(ret); | ||
1242 | } | ||
1243 | |||
1244 | free_data(process_data, g->p.bytes_process); | ||
1245 | free(pthreads); | ||
1246 | } | ||
1247 | |||
1248 | static void print_summary(void) | ||
1249 | { | ||
1250 | if (g->p.show_details < 0) | ||
1251 | return; | ||
1252 | |||
1253 | printf("\n ###\n"); | ||
1254 | printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", | ||
1255 | g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus); | ||
1256 | printf(" # %5dx %5ldMB global shared mem operations\n", | ||
1257 | g->p.nr_loops, g->p.bytes_global/1024/1024); | ||
1258 | printf(" # %5dx %5ldMB process shared mem operations\n", | ||
1259 | g->p.nr_loops, g->p.bytes_process/1024/1024); | ||
1260 | printf(" # %5dx %5ldMB thread local mem operations\n", | ||
1261 | g->p.nr_loops, g->p.bytes_thread/1024/1024); | ||
1262 | |||
1263 | printf(" ###\n"); | ||
1264 | |||
1265 | printf("\n ###\n"); fflush(stdout); | ||
1266 | } | ||
1267 | |||
1268 | static void init_thread_data(void) | ||
1269 | { | ||
1270 | ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; | ||
1271 | int t; | ||
1272 | |||
1273 | g->threads = zalloc_shared_data(size); | ||
1274 | |||
1275 | for (t = 0; t < g->p.nr_tasks; t++) { | ||
1276 | struct thread_data *td = g->threads + t; | ||
1277 | int cpu; | ||
1278 | |||
1279 | /* Allow all nodes by default: */ | ||
1280 | td->bind_node = -1; | ||
1281 | |||
1282 | /* Allow all CPUs by default: */ | ||
1283 | CPU_ZERO(&td->bind_cpumask); | ||
1284 | for (cpu = 0; cpu < g->p.nr_cpus; cpu++) | ||
1285 | CPU_SET(cpu, &td->bind_cpumask); | ||
1286 | } | ||
1287 | } | ||
1288 | |||
1289 | static void deinit_thread_data(void) | ||
1290 | { | ||
1291 | ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; | ||
1292 | |||
1293 | free_data(g->threads, size); | ||
1294 | } | ||
1295 | |||
1296 | static int init(void) | ||
1297 | { | ||
1298 | g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0); | ||
1299 | |||
1300 | /* Copy over options: */ | ||
1301 | g->p = p0; | ||
1302 | |||
1303 | g->p.nr_cpus = numa_num_configured_cpus(); | ||
1304 | |||
1305 | g->p.nr_nodes = numa_max_node() + 1; | ||
1306 | |||
1307 | /* char array in count_process_nodes(): */ | ||
1308 | BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); | ||
1309 | |||
1310 | if (g->p.show_quiet && !g->p.show_details) | ||
1311 | g->p.show_details = -1; | ||
1312 | |||
1313 | /* Some memory should be specified: */ | ||
1314 | if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str) | ||
1315 | return -1; | ||
1316 | |||
1317 | if (g->p.mb_global_str) { | ||
1318 | g->p.mb_global = atof(g->p.mb_global_str); | ||
1319 | BUG_ON(g->p.mb_global < 0); | ||
1320 | } | ||
1321 | |||
1322 | if (g->p.mb_proc_str) { | ||
1323 | g->p.mb_proc = atof(g->p.mb_proc_str); | ||
1324 | BUG_ON(g->p.mb_proc < 0); | ||
1325 | } | ||
1326 | |||
1327 | if (g->p.mb_proc_locked_str) { | ||
1328 | g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str); | ||
1329 | BUG_ON(g->p.mb_proc_locked < 0); | ||
1330 | BUG_ON(g->p.mb_proc_locked > g->p.mb_proc); | ||
1331 | } | ||
1332 | |||
1333 | if (g->p.mb_thread_str) { | ||
1334 | g->p.mb_thread = atof(g->p.mb_thread_str); | ||
1335 | BUG_ON(g->p.mb_thread < 0); | ||
1336 | } | ||
1337 | |||
1338 | BUG_ON(g->p.nr_threads <= 0); | ||
1339 | BUG_ON(g->p.nr_proc <= 0); | ||
1340 | |||
1341 | g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads; | ||
1342 | |||
1343 | g->p.bytes_global = g->p.mb_global *1024L*1024L; | ||
1344 | g->p.bytes_process = g->p.mb_proc *1024L*1024L; | ||
1345 | g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L; | ||
1346 | g->p.bytes_thread = g->p.mb_thread *1024L*1024L; | ||
1347 | |||
1348 | g->data = setup_shared_data(g->p.bytes_global); | ||
1349 | |||
1350 | /* Startup serialization: */ | ||
1351 | init_global_mutex(&g->start_work_mutex); | ||
1352 | init_global_mutex(&g->startup_mutex); | ||
1353 | init_global_mutex(&g->startup_done_mutex); | ||
1354 | init_global_mutex(&g->stop_work_mutex); | ||
1355 | |||
1356 | init_thread_data(); | ||
1357 | |||
1358 | tprintf("#\n"); | ||
1359 | parse_setup_cpu_list(); | ||
1360 | parse_setup_node_list(); | ||
1361 | tprintf("#\n"); | ||
1362 | |||
1363 | print_summary(); | ||
1364 | |||
1365 | return 0; | ||
1366 | } | ||
1367 | |||
1368 | static void deinit(void) | ||
1369 | { | ||
1370 | free_data(g->data, g->p.bytes_global); | ||
1371 | g->data = NULL; | ||
1372 | |||
1373 | deinit_thread_data(); | ||
1374 | |||
1375 | free_data(g, sizeof(*g)); | ||
1376 | g = NULL; | ||
1377 | } | ||
1378 | |||
1379 | /* | ||
1380 | * Print a short or long result, depending on the verbosity setting: | ||
1381 | */ | ||
1382 | static void print_res(const char *name, double val, | ||
1383 | const char *txt_unit, const char *txt_short, const char *txt_long) | ||
1384 | { | ||
1385 | if (!name) | ||
1386 | name = "main,"; | ||
1387 | |||
1388 | if (g->p.show_quiet) | ||
1389 | printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); | ||
1390 | else | ||
1391 | printf(" %14.3f %s\n", val, txt_long); | ||
1392 | } | ||
1393 | |||
1394 | static int __bench_numa(const char *name) | ||
1395 | { | ||
1396 | struct timeval start, stop, diff; | ||
1397 | u64 runtime_ns_min, runtime_ns_sum; | ||
1398 | pid_t *pids, pid, wpid; | ||
1399 | double delta_runtime; | ||
1400 | double runtime_avg; | ||
1401 | double runtime_sec_max; | ||
1402 | double runtime_sec_min; | ||
1403 | int wait_stat; | ||
1404 | double bytes; | ||
1405 | int i, t; | ||
1406 | |||
1407 | if (init()) | ||
1408 | return -1; | ||
1409 | |||
1410 | pids = zalloc(g->p.nr_proc * sizeof(*pids)); | ||
1411 | pid = -1; | ||
1412 | |||
1413 | /* All threads try to acquire it, this way we can wait for them to start up: */ | ||
1414 | pthread_mutex_lock(&g->start_work_mutex); | ||
1415 | |||
1416 | if (g->p.serialize_startup) { | ||
1417 | tprintf(" #\n"); | ||
1418 | tprintf(" # Startup synchronization: ..."); fflush(stdout); | ||
1419 | } | ||
1420 | |||
1421 | gettimeofday(&start, NULL); | ||
1422 | |||
1423 | for (i = 0; i < g->p.nr_proc; i++) { | ||
1424 | pid = fork(); | ||
1425 | dprintf(" # process %2d: PID %d\n", i, pid); | ||
1426 | |||
1427 | BUG_ON(pid < 0); | ||
1428 | if (!pid) { | ||
1429 | /* Child process: */ | ||
1430 | worker_process(i); | ||
1431 | |||
1432 | exit(0); | ||
1433 | } | ||
1434 | pids[i] = pid; | ||
1435 | |||
1436 | } | ||
1437 | /* Wait for all the threads to start up: */ | ||
1438 | while (g->nr_tasks_started != g->p.nr_tasks) | ||
1439 | usleep(1000); | ||
1440 | |||
1441 | BUG_ON(g->nr_tasks_started != g->p.nr_tasks); | ||
1442 | |||
1443 | if (g->p.serialize_startup) { | ||
1444 | double startup_sec; | ||
1445 | |||
1446 | pthread_mutex_lock(&g->startup_done_mutex); | ||
1447 | |||
1448 | /* This will start all threads: */ | ||
1449 | pthread_mutex_unlock(&g->start_work_mutex); | ||
1450 | |||
1451 | /* This mutex is locked - the last started thread will wake us: */ | ||
1452 | pthread_mutex_lock(&g->startup_done_mutex); | ||
1453 | |||
1454 | gettimeofday(&stop, NULL); | ||
1455 | |||
1456 | timersub(&stop, &start, &diff); | ||
1457 | |||
1458 | startup_sec = diff.tv_sec * 1000000000.0; | ||
1459 | startup_sec += diff.tv_usec * 1000.0; | ||
1460 | startup_sec /= 1e9; | ||
1461 | |||
1462 | tprintf(" threads initialized in %.6f seconds.\n", startup_sec); | ||
1463 | tprintf(" #\n"); | ||
1464 | |||
1465 | start = stop; | ||
1466 | pthread_mutex_unlock(&g->startup_done_mutex); | ||
1467 | } else { | ||
1468 | gettimeofday(&start, NULL); | ||
1469 | } | ||
1470 | |||
1471 | /* Parent process: */ | ||
1472 | |||
1473 | |||
1474 | for (i = 0; i < g->p.nr_proc; i++) { | ||
1475 | wpid = waitpid(pids[i], &wait_stat, 0); | ||
1476 | BUG_ON(wpid < 0); | ||
1477 | BUG_ON(!WIFEXITED(wait_stat)); | ||
1478 | |||
1479 | } | ||
1480 | |||
1481 | runtime_ns_sum = 0; | ||
1482 | runtime_ns_min = -1LL; | ||
1483 | |||
1484 | for (t = 0; t < g->p.nr_tasks; t++) { | ||
1485 | u64 thread_runtime_ns = g->threads[t].runtime_ns; | ||
1486 | |||
1487 | runtime_ns_sum += thread_runtime_ns; | ||
1488 | runtime_ns_min = min(thread_runtime_ns, runtime_ns_min); | ||
1489 | } | ||
1490 | |||
1491 | gettimeofday(&stop, NULL); | ||
1492 | timersub(&stop, &start, &diff); | ||
1493 | |||
1494 | BUG_ON(bench_format != BENCH_FORMAT_DEFAULT); | ||
1495 | |||
1496 | tprintf("\n ###\n"); | ||
1497 | tprintf("\n"); | ||
1498 | |||
1499 | runtime_sec_max = diff.tv_sec * 1000000000.0; | ||
1500 | runtime_sec_max += diff.tv_usec * 1000.0; | ||
1501 | runtime_sec_max /= 1e9; | ||
1502 | |||
1503 | runtime_sec_min = runtime_ns_min/1e9; | ||
1504 | |||
1505 | bytes = g->bytes_done; | ||
1506 | runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9; | ||
1507 | |||
1508 | if (g->p.measure_convergence) { | ||
1509 | print_res(name, runtime_sec_max, | ||
1510 | "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge"); | ||
1511 | } | ||
1512 | |||
1513 | print_res(name, runtime_sec_max, | ||
1514 | "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime"); | ||
1515 | |||
1516 | print_res(name, runtime_sec_min, | ||
1517 | "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime"); | ||
1518 | |||
1519 | print_res(name, runtime_avg, | ||
1520 | "secs,", "runtime-avg/thread", "secs average thread-runtime"); | ||
1521 | |||
1522 | delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0; | ||
1523 | print_res(name, delta_runtime / runtime_sec_max * 100.0, | ||
1524 | "%,", "spread-runtime/thread", "% difference between max/avg runtime"); | ||
1525 | |||
1526 | print_res(name, bytes / g->p.nr_tasks / 1e9, | ||
1527 | "GB,", "data/thread", "GB data processed, per thread"); | ||
1528 | |||
1529 | print_res(name, bytes / 1e9, | ||
1530 | "GB,", "data-total", "GB data processed, total"); | ||
1531 | |||
1532 | print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks), | ||
1533 | "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime"); | ||
1534 | |||
1535 | print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, | ||
1536 | "GB/sec,", "thread-speed", "GB/sec/thread speed"); | ||
1537 | |||
1538 | print_res(name, bytes / runtime_sec_max / 1e9, | ||
1539 | "GB/sec,", "total-speed", "GB/sec total speed"); | ||
1540 | |||
1541 | free(pids); | ||
1542 | |||
1543 | deinit(); | ||
1544 | |||
1545 | return 0; | ||
1546 | } | ||
1547 | |||
1548 | #define MAX_ARGS 50 | ||
1549 | |||
1550 | static int command_size(const char **argv) | ||
1551 | { | ||
1552 | int size = 0; | ||
1553 | |||
1554 | while (*argv) { | ||
1555 | size++; | ||
1556 | argv++; | ||
1557 | } | ||
1558 | |||
1559 | BUG_ON(size >= MAX_ARGS); | ||
1560 | |||
1561 | return size; | ||
1562 | } | ||
1563 | |||
1564 | static void init_params(struct params *p, const char *name, int argc, const char **argv) | ||
1565 | { | ||
1566 | int i; | ||
1567 | |||
1568 | printf("\n # Running %s \"perf bench numa", name); | ||
1569 | |||
1570 | for (i = 0; i < argc; i++) | ||
1571 | printf(" %s", argv[i]); | ||
1572 | |||
1573 | printf("\"\n"); | ||
1574 | |||
1575 | memset(p, 0, sizeof(*p)); | ||
1576 | |||
1577 | /* Initialize nonzero defaults: */ | ||
1578 | |||
1579 | p->serialize_startup = 1; | ||
1580 | p->data_reads = true; | ||
1581 | p->data_writes = true; | ||
1582 | p->data_backwards = true; | ||
1583 | p->data_rand_walk = true; | ||
1584 | p->nr_loops = -1; | ||
1585 | p->init_random = true; | ||
1586 | } | ||
1587 | |||
1588 | static int run_bench_numa(const char *name, const char **argv) | ||
1589 | { | ||
1590 | int argc = command_size(argv); | ||
1591 | |||
1592 | init_params(&p0, name, argc, argv); | ||
1593 | argc = parse_options(argc, argv, options, bench_numa_usage, 0); | ||
1594 | if (argc) | ||
1595 | goto err; | ||
1596 | |||
1597 | if (__bench_numa(name)) | ||
1598 | goto err; | ||
1599 | |||
1600 | return 0; | ||
1601 | |||
1602 | err: | ||
1603 | usage_with_options(numa_usage, options); | ||
1604 | return -1; | ||
1605 | } | ||
1606 | |||
1607 | #define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk" | ||
1608 | #define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1" | ||
1609 | |||
1610 | #define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1" | ||
1611 | #define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1" | ||
1612 | |||
1613 | #define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1" | ||
1614 | #define OPT_BW_NOTHP OPT_BW, "--thp", "-1" | ||
1615 | |||
1616 | /* | ||
1617 | * The built-in test-suite executed by "perf bench numa -a". | ||
1618 | * | ||
1619 | * (A minimum of 4 nodes and 16 GB of RAM is recommended.) | ||
1620 | */ | ||
1621 | static const char *tests[][MAX_ARGS] = { | ||
1622 | /* Basic single-stream NUMA bandwidth measurements: */ | ||
1623 | { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", | ||
1624 | "-C" , "0", "-M", "0", OPT_BW_RAM }, | ||
1625 | { "RAM-bw-local-NOTHP,", | ||
1626 | "mem", "-p", "1", "-t", "1", "-P", "1024", | ||
1627 | "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, | ||
1628 | { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", | ||
1629 | "-C" , "0", "-M", "1", OPT_BW_RAM }, | ||
1630 | |||
1631 | /* 2-stream NUMA bandwidth measurements: */ | ||
1632 | { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", | ||
1633 | "-C", "0,2", "-M", "0x2", OPT_BW_RAM }, | ||
1634 | { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", | ||
1635 | "-C", "0,2", "-M", "1x2", OPT_BW_RAM }, | ||
1636 | |||
1637 | /* Cross-stream NUMA bandwidth measurement: */ | ||
1638 | { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024", | ||
1639 | "-C", "0,8", "-M", "1,0", OPT_BW_RAM }, | ||
1640 | |||
1641 | /* Convergence latency measurements: */ | ||
1642 | { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, | ||
1643 | { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, | ||
1644 | { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, | ||
1645 | { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, | ||
1646 | { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, | ||
1647 | { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, | ||
1648 | { " 4x4-convergence-NOTHP,", | ||
1649 | "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, | ||
1650 | { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV }, | ||
1651 | { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV }, | ||
1652 | { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV }, | ||
1653 | { " 8x4-convergence-NOTHP,", | ||
1654 | "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, | ||
1655 | { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV }, | ||
1656 | { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV }, | ||
1657 | { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV }, | ||
1658 | { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV }, | ||
1659 | { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV }, | ||
1660 | |||
1661 | /* Various NUMA process/thread layout bandwidth measurements: */ | ||
1662 | { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW }, | ||
1663 | { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW }, | ||
1664 | { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW }, | ||
1665 | { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW }, | ||
1666 | { " 8x1-bw-process-NOTHP,", | ||
1667 | "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, | ||
1668 | { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, | ||
1669 | |||
1670 | { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, | ||
1671 | { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, | ||
1672 | { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, | ||
1673 | { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, | ||
1674 | |||
1675 | { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, | ||
1676 | { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, | ||
1677 | { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, | ||
1678 | { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, | ||
1679 | { " 4x8-bw-thread-NOTHP,", | ||
1680 | "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, | ||
1681 | { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, | ||
1682 | { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, | ||
1683 | |||
1684 | { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, | ||
1685 | { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, | ||
1686 | |||
1687 | { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, | ||
1688 | { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, | ||
1689 | { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, | ||
1690 | { "numa01-bw-thread-NOTHP,", | ||
1691 | "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP }, | ||
1692 | }; | ||
1693 | |||
1694 | static int bench_all(void) | ||
1695 | { | ||
1696 | int nr = ARRAY_SIZE(tests); | ||
1697 | int ret; | ||
1698 | int i; | ||
1699 | |||
1700 | ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'"); | ||
1701 | BUG_ON(ret < 0); | ||
1702 | |||
1703 | for (i = 0; i < nr; i++) { | ||
1704 | if (run_bench_numa(tests[i][0], tests[i] + 1)) | ||
1705 | return -1; | ||
1706 | } | ||
1707 | |||
1708 | printf("\n"); | ||
1709 | |||
1710 | return 0; | ||
1711 | } | ||
1712 | |||
1713 | int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused) | ||
1714 | { | ||
1715 | init_params(&p0, "main,", argc, argv); | ||
1716 | argc = parse_options(argc, argv, options, bench_numa_usage, 0); | ||
1717 | if (argc) | ||
1718 | goto err; | ||
1719 | |||
1720 | if (p0.run_all) | ||
1721 | return bench_all(); | ||
1722 | |||
1723 | if (__bench_numa(NULL)) | ||
1724 | goto err; | ||
1725 | |||
1726 | return 0; | ||
1727 | |||
1728 | err: | ||
1729 | usage_with_options(numa_usage, options); | ||
1730 | return -1; | ||
1731 | } | ||
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index afd1255a632f..77298bf892b8 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
@@ -35,6 +35,18 @@ struct bench_suite { | |||
35 | /* sentinel: easy for help */ | 35 | /* sentinel: easy for help */ |
36 | #define suite_all { "all", "Test all benchmark suites", NULL } | 36 | #define suite_all { "all", "Test all benchmark suites", NULL } |
37 | 37 | ||
38 | #ifdef LIBNUMA_SUPPORT | ||
39 | static struct bench_suite numa_suites[] = { | ||
40 | { "mem", | ||
41 | "Benchmark for NUMA workloads", | ||
42 | bench_numa }, | ||
43 | suite_all, | ||
44 | { NULL, | ||
45 | NULL, | ||
46 | NULL } | ||
47 | }; | ||
48 | #endif | ||
49 | |||
38 | static struct bench_suite sched_suites[] = { | 50 | static struct bench_suite sched_suites[] = { |
39 | { "messaging", | 51 | { "messaging", |
40 | "Benchmark for scheduler and IPC mechanisms", | 52 | "Benchmark for scheduler and IPC mechanisms", |
@@ -68,6 +80,11 @@ struct bench_subsys { | |||
68 | }; | 80 | }; |
69 | 81 | ||
70 | static struct bench_subsys subsystems[] = { | 82 | static struct bench_subsys subsystems[] = { |
83 | #ifdef LIBNUMA_SUPPORT | ||
84 | { "numa", | ||
85 | "NUMA scheduling and MM behavior", | ||
86 | numa_suites }, | ||
87 | #endif | ||
71 | { "sched", | 88 | { "sched", |
72 | "scheduler and IPC mechanism", | 89 | "scheduler and IPC mechanism", |
73 | sched_suites }, | 90 | sched_suites }, |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index c746108c5d48..46878daca5cc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include "util/debug.h" | 17 | #include "util/debug.h" |
18 | 18 | ||
19 | #include <linux/rbtree.h> | 19 | #include <linux/rbtree.h> |
20 | #include <linux/string.h> | ||
20 | 21 | ||
21 | struct alloc_stat; | 22 | struct alloc_stat; |
22 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); | 23 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); |
@@ -618,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list) | |||
618 | 619 | ||
619 | for (i = 0; i < NUM_AVAIL_SORTS; i++) { | 620 | for (i = 0; i < NUM_AVAIL_SORTS; i++) { |
620 | if (!strcmp(avail_sorts[i]->name, tok)) { | 621 | if (!strcmp(avail_sorts[i]->name, tok)) { |
621 | sort = malloc(sizeof(*sort)); | 622 | sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i])); |
622 | if (!sort) { | 623 | if (!sort) { |
623 | pr_err("%s: malloc failed\n", __func__); | 624 | pr_err("%s: memdup failed\n", __func__); |
624 | return -1; | 625 | return -1; |
625 | } | 626 | } |
626 | memcpy(sort, avail_sorts[i], sizeof(*sort)); | ||
627 | list_add_tail(&sort->list, list); | 627 | list_add_tail(&sort->list, list); |
628 | return 0; | 628 | return 0; |
629 | } | 629 | } |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1c2ac148a7d5..0368a1036ad6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -65,6 +65,10 @@ | |||
65 | #define CNTR_NOT_SUPPORTED "<not supported>" | 65 | #define CNTR_NOT_SUPPORTED "<not supported>" |
66 | #define CNTR_NOT_COUNTED "<not counted>" | 66 | #define CNTR_NOT_COUNTED "<not counted>" |
67 | 67 | ||
68 | static void print_stat(int argc, const char **argv); | ||
69 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix); | ||
70 | static void print_counter(struct perf_evsel *counter, char *prefix); | ||
71 | |||
68 | static struct perf_evlist *evsel_list; | 72 | static struct perf_evlist *evsel_list; |
69 | 73 | ||
70 | static struct perf_target target = { | 74 | static struct perf_target target = { |
@@ -87,6 +91,8 @@ static FILE *output = NULL; | |||
87 | static const char *pre_cmd = NULL; | 91 | static const char *pre_cmd = NULL; |
88 | static const char *post_cmd = NULL; | 92 | static const char *post_cmd = NULL; |
89 | static bool sync_run = false; | 93 | static bool sync_run = false; |
94 | static unsigned int interval = 0; | ||
95 | static struct timespec ref_time; | ||
90 | 96 | ||
91 | static volatile int done = 0; | 97 | static volatile int done = 0; |
92 | 98 | ||
@@ -94,6 +100,28 @@ struct perf_stat { | |||
94 | struct stats res_stats[3]; | 100 | struct stats res_stats[3]; |
95 | }; | 101 | }; |
96 | 102 | ||
103 | static inline void diff_timespec(struct timespec *r, struct timespec *a, | ||
104 | struct timespec *b) | ||
105 | { | ||
106 | r->tv_sec = a->tv_sec - b->tv_sec; | ||
107 | if (a->tv_nsec < b->tv_nsec) { | ||
108 | r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; | ||
109 | r->tv_sec--; | ||
110 | } else { | ||
111 | r->tv_nsec = a->tv_nsec - b->tv_nsec ; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) | ||
116 | { | ||
117 | return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; | ||
118 | } | ||
119 | |||
120 | static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) | ||
121 | { | ||
122 | return perf_evsel__cpus(evsel)->nr; | ||
123 | } | ||
124 | |||
97 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | 125 | static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) |
98 | { | 126 | { |
99 | evsel->priv = zalloc(sizeof(struct perf_stat)); | 127 | evsel->priv = zalloc(sizeof(struct perf_stat)); |
@@ -106,14 +134,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) | |||
106 | evsel->priv = NULL; | 134 | evsel->priv = NULL; |
107 | } | 135 | } |
108 | 136 | ||
109 | static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) | 137 | static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) |
110 | { | 138 | { |
111 | return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; | 139 | void *addr; |
140 | size_t sz; | ||
141 | |||
142 | sz = sizeof(*evsel->counts) + | ||
143 | (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); | ||
144 | |||
145 | addr = zalloc(sz); | ||
146 | if (!addr) | ||
147 | return -ENOMEM; | ||
148 | |||
149 | evsel->prev_raw_counts = addr; | ||
150 | |||
151 | return 0; | ||
112 | } | 152 | } |
113 | 153 | ||
114 | static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) | 154 | static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) |
115 | { | 155 | { |
116 | return perf_evsel__cpus(evsel)->nr; | 156 | free(evsel->prev_raw_counts); |
157 | evsel->prev_raw_counts = NULL; | ||
117 | } | 158 | } |
118 | 159 | ||
119 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; | 160 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
@@ -245,16 +286,69 @@ static int read_counter(struct perf_evsel *counter) | |||
245 | return 0; | 286 | return 0; |
246 | } | 287 | } |
247 | 288 | ||
289 | static void print_interval(void) | ||
290 | { | ||
291 | static int num_print_interval; | ||
292 | struct perf_evsel *counter; | ||
293 | struct perf_stat *ps; | ||
294 | struct timespec ts, rs; | ||
295 | char prefix[64]; | ||
296 | |||
297 | if (no_aggr) { | ||
298 | list_for_each_entry(counter, &evsel_list->entries, node) { | ||
299 | ps = counter->priv; | ||
300 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); | ||
301 | read_counter(counter); | ||
302 | } | ||
303 | } else { | ||
304 | list_for_each_entry(counter, &evsel_list->entries, node) { | ||
305 | ps = counter->priv; | ||
306 | memset(ps->res_stats, 0, sizeof(ps->res_stats)); | ||
307 | read_counter_aggr(counter); | ||
308 | } | ||
309 | } | ||
310 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
311 | diff_timespec(&rs, &ts, &ref_time); | ||
312 | sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); | ||
313 | |||
314 | if (num_print_interval == 0 && !csv_output) { | ||
315 | if (no_aggr) | ||
316 | fprintf(output, "# time CPU counts events\n"); | ||
317 | else | ||
318 | fprintf(output, "# time counts events\n"); | ||
319 | } | ||
320 | |||
321 | if (++num_print_interval == 25) | ||
322 | num_print_interval = 0; | ||
323 | |||
324 | if (no_aggr) { | ||
325 | list_for_each_entry(counter, &evsel_list->entries, node) | ||
326 | print_counter(counter, prefix); | ||
327 | } else { | ||
328 | list_for_each_entry(counter, &evsel_list->entries, node) | ||
329 | print_counter_aggr(counter, prefix); | ||
330 | } | ||
331 | } | ||
332 | |||
248 | static int __run_perf_stat(int argc __maybe_unused, const char **argv) | 333 | static int __run_perf_stat(int argc __maybe_unused, const char **argv) |
249 | { | 334 | { |
250 | char msg[512]; | 335 | char msg[512]; |
251 | unsigned long long t0, t1; | 336 | unsigned long long t0, t1; |
252 | struct perf_evsel *counter; | 337 | struct perf_evsel *counter; |
338 | struct timespec ts; | ||
253 | int status = 0; | 339 | int status = 0; |
254 | int child_ready_pipe[2], go_pipe[2]; | 340 | int child_ready_pipe[2], go_pipe[2]; |
255 | const bool forks = (argc > 0); | 341 | const bool forks = (argc > 0); |
256 | char buf; | 342 | char buf; |
257 | 343 | ||
344 | if (interval) { | ||
345 | ts.tv_sec = interval / 1000; | ||
346 | ts.tv_nsec = (interval % 1000) * 1000000; | ||
347 | } else { | ||
348 | ts.tv_sec = 1; | ||
349 | ts.tv_nsec = 0; | ||
350 | } | ||
351 | |||
258 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { | 352 | if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { |
259 | perror("failed to create pipes"); | 353 | perror("failed to create pipes"); |
260 | return -1; | 354 | return -1; |
@@ -347,14 +441,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) | |||
347 | * Enable counters and exec the command: | 441 | * Enable counters and exec the command: |
348 | */ | 442 | */ |
349 | t0 = rdclock(); | 443 | t0 = rdclock(); |
444 | clock_gettime(CLOCK_MONOTONIC, &ref_time); | ||
350 | 445 | ||
351 | if (forks) { | 446 | if (forks) { |
352 | close(go_pipe[1]); | 447 | close(go_pipe[1]); |
448 | if (interval) { | ||
449 | while (!waitpid(child_pid, &status, WNOHANG)) { | ||
450 | nanosleep(&ts, NULL); | ||
451 | print_interval(); | ||
452 | } | ||
453 | } | ||
353 | wait(&status); | 454 | wait(&status); |
354 | if (WIFSIGNALED(status)) | 455 | if (WIFSIGNALED(status)) |
355 | psignal(WTERMSIG(status), argv[0]); | 456 | psignal(WTERMSIG(status), argv[0]); |
356 | } else { | 457 | } else { |
357 | while(!done) sleep(1); | 458 | while (!done) { |
459 | nanosleep(&ts, NULL); | ||
460 | if (interval) | ||
461 | print_interval(); | ||
462 | } | ||
358 | } | 463 | } |
359 | 464 | ||
360 | t1 = rdclock(); | 465 | t1 = rdclock(); |
@@ -440,7 +545,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
440 | if (evsel->cgrp) | 545 | if (evsel->cgrp) |
441 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 546 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
442 | 547 | ||
443 | if (csv_output) | 548 | if (csv_output || interval) |
444 | return; | 549 | return; |
445 | 550 | ||
446 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 551 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
@@ -654,12 +759,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
654 | if (evsel->cgrp) | 759 | if (evsel->cgrp) |
655 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 760 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
656 | 761 | ||
657 | if (csv_output) | 762 | if (csv_output || interval) |
658 | return; | 763 | return; |
659 | 764 | ||
660 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | 765 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { |
661 | total = avg_stats(&runtime_cycles_stats[cpu]); | 766 | total = avg_stats(&runtime_cycles_stats[cpu]); |
662 | |||
663 | if (total) | 767 | if (total) |
664 | ratio = avg / total; | 768 | ratio = avg / total; |
665 | 769 | ||
@@ -753,12 +857,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
753 | * Print out the results of a single counter: | 857 | * Print out the results of a single counter: |
754 | * aggregated counts in system-wide mode | 858 | * aggregated counts in system-wide mode |
755 | */ | 859 | */ |
756 | static void print_counter_aggr(struct perf_evsel *counter) | 860 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix) |
757 | { | 861 | { |
758 | struct perf_stat *ps = counter->priv; | 862 | struct perf_stat *ps = counter->priv; |
759 | double avg = avg_stats(&ps->res_stats[0]); | 863 | double avg = avg_stats(&ps->res_stats[0]); |
760 | int scaled = counter->counts->scaled; | 864 | int scaled = counter->counts->scaled; |
761 | 865 | ||
866 | if (prefix) | ||
867 | fprintf(output, "%s", prefix); | ||
868 | |||
762 | if (scaled == -1) { | 869 | if (scaled == -1) { |
763 | fprintf(output, "%*s%s%*s", | 870 | fprintf(output, "%*s%s%*s", |
764 | csv_output ? 0 : 18, | 871 | csv_output ? 0 : 18, |
@@ -801,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
801 | * Print out the results of a single counter: | 908 | * Print out the results of a single counter: |
802 | * does not use aggregated count in system-wide | 909 | * does not use aggregated count in system-wide |
803 | */ | 910 | */ |
804 | static void print_counter(struct perf_evsel *counter) | 911 | static void print_counter(struct perf_evsel *counter, char *prefix) |
805 | { | 912 | { |
806 | u64 ena, run, val; | 913 | u64 ena, run, val; |
807 | int cpu; | 914 | int cpu; |
@@ -810,6 +917,10 @@ static void print_counter(struct perf_evsel *counter) | |||
810 | val = counter->counts->cpu[cpu].val; | 917 | val = counter->counts->cpu[cpu].val; |
811 | ena = counter->counts->cpu[cpu].ena; | 918 | ena = counter->counts->cpu[cpu].ena; |
812 | run = counter->counts->cpu[cpu].run; | 919 | run = counter->counts->cpu[cpu].run; |
920 | |||
921 | if (prefix) | ||
922 | fprintf(output, "%s", prefix); | ||
923 | |||
813 | if (run == 0 || ena == 0) { | 924 | if (run == 0 || ena == 0) { |
814 | fprintf(output, "CPU%*d%s%*s%s%*s", | 925 | fprintf(output, "CPU%*d%s%*s%s%*s", |
815 | csv_output ? 0 : -4, | 926 | csv_output ? 0 : -4, |
@@ -871,10 +982,10 @@ static void print_stat(int argc, const char **argv) | |||
871 | 982 | ||
872 | if (no_aggr) { | 983 | if (no_aggr) { |
873 | list_for_each_entry(counter, &evsel_list->entries, node) | 984 | list_for_each_entry(counter, &evsel_list->entries, node) |
874 | print_counter(counter); | 985 | print_counter(counter, NULL); |
875 | } else { | 986 | } else { |
876 | list_for_each_entry(counter, &evsel_list->entries, node) | 987 | list_for_each_entry(counter, &evsel_list->entries, node) |
877 | print_counter_aggr(counter); | 988 | print_counter_aggr(counter, NULL); |
878 | } | 989 | } |
879 | 990 | ||
880 | if (!csv_output) { | 991 | if (!csv_output) { |
@@ -895,7 +1006,7 @@ static volatile int signr = -1; | |||
895 | 1006 | ||
896 | static void skip_signal(int signo) | 1007 | static void skip_signal(int signo) |
897 | { | 1008 | { |
898 | if(child_pid == -1) | 1009 | if ((child_pid == -1) || interval) |
899 | done = 1; | 1010 | done = 1; |
900 | 1011 | ||
901 | signr = signo; | 1012 | signr = signo; |
@@ -1115,6 +1226,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1115 | "command to run prior to the measured command"), | 1226 | "command to run prior to the measured command"), |
1116 | OPT_STRING(0, "post", &post_cmd, "command", | 1227 | OPT_STRING(0, "post", &post_cmd, "command", |
1117 | "command to run after to the measured command"), | 1228 | "command to run after to the measured command"), |
1229 | OPT_UINTEGER('I', "interval-print", &interval, | ||
1230 | "print counts at regular interval in ms (>= 100)"), | ||
1118 | OPT_END() | 1231 | OPT_END() |
1119 | }; | 1232 | }; |
1120 | const char * const stat_usage[] = { | 1233 | const char * const stat_usage[] = { |
@@ -1215,12 +1328,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1215 | usage_with_options(stat_usage, options); | 1328 | usage_with_options(stat_usage, options); |
1216 | return -1; | 1329 | return -1; |
1217 | } | 1330 | } |
1331 | if (interval && interval < 100) { | ||
1332 | pr_err("print interval must be >= 100ms\n"); | ||
1333 | usage_with_options(stat_usage, options); | ||
1334 | return -1; | ||
1335 | } | ||
1218 | 1336 | ||
1219 | list_for_each_entry(pos, &evsel_list->entries, node) { | 1337 | list_for_each_entry(pos, &evsel_list->entries, node) { |
1220 | if (perf_evsel__alloc_stat_priv(pos) < 0 || | 1338 | if (perf_evsel__alloc_stat_priv(pos) < 0 || |
1221 | perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) | 1339 | perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) |
1222 | goto out_free_fd; | 1340 | goto out_free_fd; |
1223 | } | 1341 | } |
1342 | if (interval) { | ||
1343 | list_for_each_entry(pos, &evsel_list->entries, node) { | ||
1344 | if (perf_evsel__alloc_prev_raw_counts(pos) < 0) | ||
1345 | goto out_free_fd; | ||
1346 | } | ||
1347 | } | ||
1224 | 1348 | ||
1225 | /* | 1349 | /* |
1226 | * We dont want to block the signals - that would cause | 1350 | * We dont want to block the signals - that would cause |
@@ -1230,6 +1354,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1230 | */ | 1354 | */ |
1231 | atexit(sig_atexit); | 1355 | atexit(sig_atexit); |
1232 | signal(SIGINT, skip_signal); | 1356 | signal(SIGINT, skip_signal); |
1357 | signal(SIGCHLD, skip_signal); | ||
1233 | signal(SIGALRM, skip_signal); | 1358 | signal(SIGALRM, skip_signal); |
1234 | signal(SIGABRT, skip_signal); | 1359 | signal(SIGABRT, skip_signal); |
1235 | 1360 | ||
@@ -1242,11 +1367,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1242 | status = run_perf_stat(argc, argv); | 1367 | status = run_perf_stat(argc, argv); |
1243 | } | 1368 | } |
1244 | 1369 | ||
1245 | if (status != -1) | 1370 | if (status != -1 && !interval) |
1246 | print_stat(argc, argv); | 1371 | print_stat(argc, argv); |
1247 | out_free_fd: | 1372 | out_free_fd: |
1248 | list_for_each_entry(pos, &evsel_list->entries, node) | 1373 | list_for_each_entry(pos, &evsel_list->entries, node) { |
1249 | perf_evsel__free_stat_priv(pos); | 1374 | perf_evsel__free_stat_priv(pos); |
1375 | perf_evsel__free_counts(pos); | ||
1376 | perf_evsel__free_prev_raw_counts(pos); | ||
1377 | } | ||
1250 | perf_evlist__delete_maps(evsel_list); | 1378 | perf_evlist__delete_maps(evsel_list); |
1251 | out: | 1379 | out: |
1252 | perf_evlist__delete(evsel_list); | 1380 | perf_evlist__delete(evsel_list); |
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index f5ac77485a4f..b4eabb44e381 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak | |||
@@ -225,3 +225,14 @@ int main(void) | |||
225 | return on_exit(NULL, NULL); | 225 | return on_exit(NULL, NULL); |
226 | } | 226 | } |
227 | endef | 227 | endef |
228 | |||
229 | define SOURCE_LIBNUMA | ||
230 | #include <numa.h> | ||
231 | #include <numaif.h> | ||
232 | |||
233 | int main(void) | ||
234 | { | ||
235 | numa_available(); | ||
236 | return 0; | ||
237 | } | ||
238 | endef \ No newline at end of file | ||
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index f61dd3fb546b..bdcceb886f77 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c | |||
@@ -19,6 +19,11 @@ | |||
19 | * permissions. All the event text files are stored there. | 19 | * permissions. All the event text files are stored there. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | /* | ||
23 | * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select | ||
24 | * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. | ||
25 | */ | ||
26 | #define __SANE_USERSPACE_TYPES__ | ||
22 | #include <stdlib.h> | 27 | #include <stdlib.h> |
23 | #include <stdio.h> | 28 | #include <stdio.h> |
24 | #include <inttypes.h> | 29 | #include <inttypes.h> |
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 9b920a0cce79..b0657a9ccda6 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c | |||
@@ -98,6 +98,7 @@ int test__open_syscall_event_on_all_cpus(void) | |||
98 | } | 98 | } |
99 | } | 99 | } |
100 | 100 | ||
101 | perf_evsel__free_counts(evsel); | ||
101 | out_close_fd: | 102 | out_close_fd: |
102 | perf_evsel__close_fd(evsel, 1, threads->nr); | 103 | perf_evsel__close_fd(evsel, 1, threads->nr); |
103 | out_evsel_delete: | 104 | out_evsel_delete: |
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6ea66cf6791b..1e8e5128d0da 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c | |||
@@ -96,7 +96,7 @@ int test__PERF_RECORD(void) | |||
96 | err = perf_evlist__prepare_workload(evlist, &opts, argv); | 96 | err = perf_evlist__prepare_workload(evlist, &opts, argv); |
97 | if (err < 0) { | 97 | if (err < 0) { |
98 | pr_debug("Couldn't run the workload!\n"); | 98 | pr_debug("Couldn't run the workload!\n"); |
99 | goto out_delete_evlist; | 99 | goto out_delete_maps; |
100 | } | 100 | } |
101 | 101 | ||
102 | /* | 102 | /* |
@@ -111,7 +111,7 @@ int test__PERF_RECORD(void) | |||
111 | err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); | 111 | err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); |
112 | if (err < 0) { | 112 | if (err < 0) { |
113 | pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); | 113 | pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); |
114 | goto out_delete_evlist; | 114 | goto out_delete_maps; |
115 | } | 115 | } |
116 | 116 | ||
117 | cpu = err; | 117 | cpu = err; |
@@ -121,7 +121,7 @@ int test__PERF_RECORD(void) | |||
121 | */ | 121 | */ |
122 | if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { | 122 | if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { |
123 | pr_debug("sched_setaffinity: %s\n", strerror(errno)); | 123 | pr_debug("sched_setaffinity: %s\n", strerror(errno)); |
124 | goto out_delete_evlist; | 124 | goto out_delete_maps; |
125 | } | 125 | } |
126 | 126 | ||
127 | /* | 127 | /* |
@@ -131,7 +131,7 @@ int test__PERF_RECORD(void) | |||
131 | err = perf_evlist__open(evlist); | 131 | err = perf_evlist__open(evlist); |
132 | if (err < 0) { | 132 | if (err < 0) { |
133 | pr_debug("perf_evlist__open: %s\n", strerror(errno)); | 133 | pr_debug("perf_evlist__open: %s\n", strerror(errno)); |
134 | goto out_delete_evlist; | 134 | goto out_delete_maps; |
135 | } | 135 | } |
136 | 136 | ||
137 | /* | 137 | /* |
@@ -142,7 +142,7 @@ int test__PERF_RECORD(void) | |||
142 | err = perf_evlist__mmap(evlist, opts.mmap_pages, false); | 142 | err = perf_evlist__mmap(evlist, opts.mmap_pages, false); |
143 | if (err < 0) { | 143 | if (err < 0) { |
144 | pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); | 144 | pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); |
145 | goto out_delete_evlist; | 145 | goto out_delete_maps; |
146 | } | 146 | } |
147 | 147 | ||
148 | /* | 148 | /* |
@@ -305,6 +305,8 @@ found_exit: | |||
305 | } | 305 | } |
306 | out_err: | 306 | out_err: |
307 | perf_evlist__munmap(evlist); | 307 | perf_evlist__munmap(evlist); |
308 | out_delete_maps: | ||
309 | perf_evlist__delete_maps(evlist); | ||
308 | out_delete_evlist: | 310 | out_delete_evlist: |
309 | perf_evlist__delete(evlist); | 311 | perf_evlist__delete(evlist); |
310 | out: | 312 | out: |
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index a1a8442829b4..7b4c4d26d1ba 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c | |||
@@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void) | |||
44 | */ | 44 | */ |
45 | if (machine__create_kernel_maps(&kallsyms) < 0) { | 45 | if (machine__create_kernel_maps(&kallsyms) < 0) { |
46 | pr_debug("machine__create_kernel_maps "); | 46 | pr_debug("machine__create_kernel_maps "); |
47 | return -1; | 47 | goto out; |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
@@ -227,5 +227,7 @@ detour: | |||
227 | map__fprintf(pos, stderr); | 227 | map__fprintf(pos, stderr); |
228 | } | 228 | } |
229 | out: | 229 | out: |
230 | machine__exit(&kallsyms); | ||
231 | machine__exit(&vmlinux); | ||
230 | return err; | 232 | return err; |
231 | } | 233 | } |
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 588bcb2d008b..809ea4632a34 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c | |||
@@ -273,6 +273,8 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused) | |||
273 | { | 273 | { |
274 | pthread_mutex_lock(&ui__lock); | 274 | pthread_mutex_lock(&ui__lock); |
275 | ui_helpline__pop(); | 275 | ui_helpline__pop(); |
276 | free(browser->helpline); | ||
277 | browser->helpline = NULL; | ||
276 | pthread_mutex_unlock(&ui__lock); | 278 | pthread_mutex_unlock(&ui__lock); |
277 | } | 279 | } |
278 | 280 | ||
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3cf2c3e0605f..5cd13d768cec 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c | |||
@@ -476,8 +476,10 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, | |||
476 | } | 476 | } |
477 | } | 477 | } |
478 | 478 | ||
479 | if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) | 479 | if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) { |
480 | free(event); | ||
480 | return -ENOENT; | 481 | return -ENOENT; |
482 | } | ||
481 | 483 | ||
482 | map = machine->vmlinux_maps[MAP__FUNCTION]; | 484 | map = machine->vmlinux_maps[MAP__FUNCTION]; |
483 | size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), | 485 | size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e45332d08a58..baa26ddbcc7b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -640,6 +640,11 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | |||
640 | } | 640 | } |
641 | } | 641 | } |
642 | 642 | ||
643 | void perf_evsel__free_counts(struct perf_evsel *evsel) | ||
644 | { | ||
645 | free(evsel->counts); | ||
646 | } | ||
647 | |||
643 | void perf_evsel__exit(struct perf_evsel *evsel) | 648 | void perf_evsel__exit(struct perf_evsel *evsel) |
644 | { | 649 | { |
645 | assert(list_empty(&evsel->node)); | 650 | assert(list_empty(&evsel->node)); |
@@ -659,6 +664,28 @@ void perf_evsel__delete(struct perf_evsel *evsel) | |||
659 | free(evsel); | 664 | free(evsel); |
660 | } | 665 | } |
661 | 666 | ||
667 | static inline void compute_deltas(struct perf_evsel *evsel, | ||
668 | int cpu, | ||
669 | struct perf_counts_values *count) | ||
670 | { | ||
671 | struct perf_counts_values tmp; | ||
672 | |||
673 | if (!evsel->prev_raw_counts) | ||
674 | return; | ||
675 | |||
676 | if (cpu == -1) { | ||
677 | tmp = evsel->prev_raw_counts->aggr; | ||
678 | evsel->prev_raw_counts->aggr = *count; | ||
679 | } else { | ||
680 | tmp = evsel->prev_raw_counts->cpu[cpu]; | ||
681 | evsel->prev_raw_counts->cpu[cpu] = *count; | ||
682 | } | ||
683 | |||
684 | count->val = count->val - tmp.val; | ||
685 | count->ena = count->ena - tmp.ena; | ||
686 | count->run = count->run - tmp.run; | ||
687 | } | ||
688 | |||
662 | int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | 689 | int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, |
663 | int cpu, int thread, bool scale) | 690 | int cpu, int thread, bool scale) |
664 | { | 691 | { |
@@ -674,6 +701,8 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | |||
674 | if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) | 701 | if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) |
675 | return -errno; | 702 | return -errno; |
676 | 703 | ||
704 | compute_deltas(evsel, cpu, &count); | ||
705 | |||
677 | if (scale) { | 706 | if (scale) { |
678 | if (count.run == 0) | 707 | if (count.run == 0) |
679 | count.val = 0; | 708 | count.val = 0; |
@@ -712,6 +741,8 @@ int __perf_evsel__read(struct perf_evsel *evsel, | |||
712 | } | 741 | } |
713 | } | 742 | } |
714 | 743 | ||
744 | compute_deltas(evsel, -1, aggr); | ||
745 | |||
715 | evsel->counts->scaled = 0; | 746 | evsel->counts->scaled = 0; |
716 | if (scale) { | 747 | if (scale) { |
717 | if (aggr->run == 0) { | 748 | if (aggr->run == 0) { |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c68d1b82e843..cbf42322a27e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -53,6 +53,7 @@ struct perf_evsel { | |||
53 | struct xyarray *sample_id; | 53 | struct xyarray *sample_id; |
54 | u64 *id; | 54 | u64 *id; |
55 | struct perf_counts *counts; | 55 | struct perf_counts *counts; |
56 | struct perf_counts *prev_raw_counts; | ||
56 | int idx; | 57 | int idx; |
57 | u32 ids; | 58 | u32 ids; |
58 | struct hists hists; | 59 | struct hists hists; |
@@ -116,6 +117,7 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); | |||
116 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); | 117 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); |
117 | void perf_evsel__free_fd(struct perf_evsel *evsel); | 118 | void perf_evsel__free_fd(struct perf_evsel *evsel); |
118 | void perf_evsel__free_id(struct perf_evsel *evsel); | 119 | void perf_evsel__free_id(struct perf_evsel *evsel); |
120 | void perf_evsel__free_counts(struct perf_evsel *evsel); | ||
119 | void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); | 121 | void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); |
120 | 122 | ||
121 | void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, | 123 | void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fccd69dbbbb9..f6081cb3fca3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -313,7 +313,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, | |||
313 | if (is_kallsyms) { | 313 | if (is_kallsyms) { |
314 | if (symbol_conf.kptr_restrict) { | 314 | if (symbol_conf.kptr_restrict) { |
315 | pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); | 315 | pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); |
316 | return 0; | 316 | err = 0; |
317 | goto out_free; | ||
317 | } | 318 | } |
318 | realname = (char *) name; | 319 | realname = (char *) name; |
319 | } else | 320 | } else |
@@ -954,6 +955,7 @@ static int write_topo_node(int fd, int node) | |||
954 | } | 955 | } |
955 | 956 | ||
956 | fclose(fp); | 957 | fclose(fp); |
958 | fp = NULL; | ||
957 | 959 | ||
958 | ret = do_write(fd, &mem_total, sizeof(u64)); | 960 | ret = do_write(fd, &mem_total, sizeof(u64)); |
959 | if (ret) | 961 | if (ret) |
@@ -980,7 +982,8 @@ static int write_topo_node(int fd, int node) | |||
980 | ret = do_write_string(fd, buf); | 982 | ret = do_write_string(fd, buf); |
981 | done: | 983 | done: |
982 | free(buf); | 984 | free(buf); |
983 | fclose(fp); | 985 | if (fp) |
986 | fclose(fp); | ||
984 | return ret; | 987 | return ret; |
985 | } | 988 | } |
986 | 989 | ||
@@ -2921,16 +2924,22 @@ int perf_event__process_tracing_data(union perf_event *event, | |||
2921 | session->repipe); | 2924 | session->repipe); |
2922 | padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; | 2925 | padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; |
2923 | 2926 | ||
2924 | if (readn(session->fd, buf, padding) < 0) | 2927 | if (readn(session->fd, buf, padding) < 0) { |
2925 | die("reading input file"); | 2928 | pr_err("%s: reading input file", __func__); |
2929 | return -1; | ||
2930 | } | ||
2926 | if (session->repipe) { | 2931 | if (session->repipe) { |
2927 | int retw = write(STDOUT_FILENO, buf, padding); | 2932 | int retw = write(STDOUT_FILENO, buf, padding); |
2928 | if (retw <= 0 || retw != padding) | 2933 | if (retw <= 0 || retw != padding) { |
2929 | die("repiping tracing data padding"); | 2934 | pr_err("%s: repiping tracing data padding", __func__); |
2935 | return -1; | ||
2936 | } | ||
2930 | } | 2937 | } |
2931 | 2938 | ||
2932 | if (size_read + padding != size) | 2939 | if (size_read + padding != size) { |
2933 | die("tracing data size mismatch"); | 2940 | pr_err("%s: tracing data size mismatch", __func__); |
2941 | return -1; | ||
2942 | } | ||
2934 | 2943 | ||
2935 | perf_evlist__prepare_tracepoint_events(session->evlist, | 2944 | perf_evlist__prepare_tracepoint_events(session->evlist, |
2936 | session->pevent); | 2945 | session->pevent); |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index ff94425779a2..6fcb9de62340 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include "strlist.h" | 11 | #include "strlist.h" |
12 | #include "vdso.h" | 12 | #include "vdso.h" |
13 | #include "build-id.h" | 13 | #include "build-id.h" |
14 | #include <linux/string.h> | ||
14 | 15 | ||
15 | const char *map_type__name[MAP__NR_TYPES] = { | 16 | const char *map_type__name[MAP__NR_TYPES] = { |
16 | [MAP__FUNCTION] = "Functions", | 17 | [MAP__FUNCTION] = "Functions", |
@@ -29,29 +30,29 @@ static inline int is_no_dso_memory(const char *filename) | |||
29 | !strcmp(filename, "[heap]"); | 30 | !strcmp(filename, "[heap]"); |
30 | } | 31 | } |
31 | 32 | ||
32 | void map__init(struct map *self, enum map_type type, | 33 | void map__init(struct map *map, enum map_type type, |
33 | u64 start, u64 end, u64 pgoff, struct dso *dso) | 34 | u64 start, u64 end, u64 pgoff, struct dso *dso) |
34 | { | 35 | { |
35 | self->type = type; | 36 | map->type = type; |
36 | self->start = start; | 37 | map->start = start; |
37 | self->end = end; | 38 | map->end = end; |
38 | self->pgoff = pgoff; | 39 | map->pgoff = pgoff; |
39 | self->dso = dso; | 40 | map->dso = dso; |
40 | self->map_ip = map__map_ip; | 41 | map->map_ip = map__map_ip; |
41 | self->unmap_ip = map__unmap_ip; | 42 | map->unmap_ip = map__unmap_ip; |
42 | RB_CLEAR_NODE(&self->rb_node); | 43 | RB_CLEAR_NODE(&map->rb_node); |
43 | self->groups = NULL; | 44 | map->groups = NULL; |
44 | self->referenced = false; | 45 | map->referenced = false; |
45 | self->erange_warned = false; | 46 | map->erange_warned = false; |
46 | } | 47 | } |
47 | 48 | ||
48 | struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, | 49 | struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, |
49 | u64 pgoff, u32 pid, char *filename, | 50 | u64 pgoff, u32 pid, char *filename, |
50 | enum map_type type) | 51 | enum map_type type) |
51 | { | 52 | { |
52 | struct map *self = malloc(sizeof(*self)); | 53 | struct map *map = malloc(sizeof(*map)); |
53 | 54 | ||
54 | if (self != NULL) { | 55 | if (map != NULL) { |
55 | char newfilename[PATH_MAX]; | 56 | char newfilename[PATH_MAX]; |
56 | struct dso *dso; | 57 | struct dso *dso; |
57 | int anon, no_dso, vdso; | 58 | int anon, no_dso, vdso; |
@@ -74,10 +75,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, | |||
74 | if (dso == NULL) | 75 | if (dso == NULL) |
75 | goto out_delete; | 76 | goto out_delete; |
76 | 77 | ||
77 | map__init(self, type, start, start + len, pgoff, dso); | 78 | map__init(map, type, start, start + len, pgoff, dso); |
78 | 79 | ||
79 | if (anon || no_dso) { | 80 | if (anon || no_dso) { |
80 | self->map_ip = self->unmap_ip = identity__map_ip; | 81 | map->map_ip = map->unmap_ip = identity__map_ip; |
81 | 82 | ||
82 | /* | 83 | /* |
83 | * Set memory without DSO as loaded. All map__find_* | 84 | * Set memory without DSO as loaded. All map__find_* |
@@ -85,12 +86,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, | |||
85 | * unnecessary map__load warning. | 86 | * unnecessary map__load warning. |
86 | */ | 87 | */ |
87 | if (no_dso) | 88 | if (no_dso) |
88 | dso__set_loaded(dso, self->type); | 89 | dso__set_loaded(dso, map->type); |
89 | } | 90 | } |
90 | } | 91 | } |
91 | return self; | 92 | return map; |
92 | out_delete: | 93 | out_delete: |
93 | free(self); | 94 | free(map); |
94 | return NULL; | 95 | return NULL; |
95 | } | 96 | } |
96 | 97 | ||
@@ -113,48 +114,48 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) | |||
113 | return map; | 114 | return map; |
114 | } | 115 | } |
115 | 116 | ||
116 | void map__delete(struct map *self) | 117 | void map__delete(struct map *map) |
117 | { | 118 | { |
118 | free(self); | 119 | free(map); |
119 | } | 120 | } |
120 | 121 | ||
121 | void map__fixup_start(struct map *self) | 122 | void map__fixup_start(struct map *map) |
122 | { | 123 | { |
123 | struct rb_root *symbols = &self->dso->symbols[self->type]; | 124 | struct rb_root *symbols = &map->dso->symbols[map->type]; |
124 | struct rb_node *nd = rb_first(symbols); | 125 | struct rb_node *nd = rb_first(symbols); |
125 | if (nd != NULL) { | 126 | if (nd != NULL) { |
126 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 127 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); |
127 | self->start = sym->start; | 128 | map->start = sym->start; |
128 | } | 129 | } |
129 | } | 130 | } |
130 | 131 | ||
131 | void map__fixup_end(struct map *self) | 132 | void map__fixup_end(struct map *map) |
132 | { | 133 | { |
133 | struct rb_root *symbols = &self->dso->symbols[self->type]; | 134 | struct rb_root *symbols = &map->dso->symbols[map->type]; |
134 | struct rb_node *nd = rb_last(symbols); | 135 | struct rb_node *nd = rb_last(symbols); |
135 | if (nd != NULL) { | 136 | if (nd != NULL) { |
136 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 137 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); |
137 | self->end = sym->end; | 138 | map->end = sym->end; |
138 | } | 139 | } |
139 | } | 140 | } |
140 | 141 | ||
141 | #define DSO__DELETED "(deleted)" | 142 | #define DSO__DELETED "(deleted)" |
142 | 143 | ||
143 | int map__load(struct map *self, symbol_filter_t filter) | 144 | int map__load(struct map *map, symbol_filter_t filter) |
144 | { | 145 | { |
145 | const char *name = self->dso->long_name; | 146 | const char *name = map->dso->long_name; |
146 | int nr; | 147 | int nr; |
147 | 148 | ||
148 | if (dso__loaded(self->dso, self->type)) | 149 | if (dso__loaded(map->dso, map->type)) |
149 | return 0; | 150 | return 0; |
150 | 151 | ||
151 | nr = dso__load(self->dso, self, filter); | 152 | nr = dso__load(map->dso, map, filter); |
152 | if (nr < 0) { | 153 | if (nr < 0) { |
153 | if (self->dso->has_build_id) { | 154 | if (map->dso->has_build_id) { |
154 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 155 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; |
155 | 156 | ||
156 | build_id__sprintf(self->dso->build_id, | 157 | build_id__sprintf(map->dso->build_id, |
157 | sizeof(self->dso->build_id), | 158 | sizeof(map->dso->build_id), |
158 | sbuild_id); | 159 | sbuild_id); |
159 | pr_warning("%s with build id %s not found", | 160 | pr_warning("%s with build id %s not found", |
160 | name, sbuild_id); | 161 | name, sbuild_id); |
@@ -184,43 +185,36 @@ int map__load(struct map *self, symbol_filter_t filter) | |||
184 | * Only applies to the kernel, as its symtabs aren't relative like the | 185 | * Only applies to the kernel, as its symtabs aren't relative like the |
185 | * module ones. | 186 | * module ones. |
186 | */ | 187 | */ |
187 | if (self->dso->kernel) | 188 | if (map->dso->kernel) |
188 | map__reloc_vmlinux(self); | 189 | map__reloc_vmlinux(map); |
189 | 190 | ||
190 | return 0; | 191 | return 0; |
191 | } | 192 | } |
192 | 193 | ||
193 | struct symbol *map__find_symbol(struct map *self, u64 addr, | 194 | struct symbol *map__find_symbol(struct map *map, u64 addr, |
194 | symbol_filter_t filter) | 195 | symbol_filter_t filter) |
195 | { | 196 | { |
196 | if (map__load(self, filter) < 0) | 197 | if (map__load(map, filter) < 0) |
197 | return NULL; | 198 | return NULL; |
198 | 199 | ||
199 | return dso__find_symbol(self->dso, self->type, addr); | 200 | return dso__find_symbol(map->dso, map->type, addr); |
200 | } | 201 | } |
201 | 202 | ||
202 | struct symbol *map__find_symbol_by_name(struct map *self, const char *name, | 203 | struct symbol *map__find_symbol_by_name(struct map *map, const char *name, |
203 | symbol_filter_t filter) | 204 | symbol_filter_t filter) |
204 | { | 205 | { |
205 | if (map__load(self, filter) < 0) | 206 | if (map__load(map, filter) < 0) |
206 | return NULL; | 207 | return NULL; |
207 | 208 | ||
208 | if (!dso__sorted_by_name(self->dso, self->type)) | 209 | if (!dso__sorted_by_name(map->dso, map->type)) |
209 | dso__sort_by_name(self->dso, self->type); | 210 | dso__sort_by_name(map->dso, map->type); |
210 | 211 | ||
211 | return dso__find_symbol_by_name(self->dso, self->type, name); | 212 | return dso__find_symbol_by_name(map->dso, map->type, name); |
212 | } | 213 | } |
213 | 214 | ||
214 | struct map *map__clone(struct map *self) | 215 | struct map *map__clone(struct map *map) |
215 | { | 216 | { |
216 | struct map *map = malloc(sizeof(*self)); | 217 | return memdup(map, sizeof(*map)); |
217 | |||
218 | if (!map) | ||
219 | return NULL; | ||
220 | |||
221 | memcpy(map, self, sizeof(*self)); | ||
222 | |||
223 | return map; | ||
224 | } | 218 | } |
225 | 219 | ||
226 | int map__overlap(struct map *l, struct map *r) | 220 | int map__overlap(struct map *l, struct map *r) |
@@ -237,10 +231,10 @@ int map__overlap(struct map *l, struct map *r) | |||
237 | return 0; | 231 | return 0; |
238 | } | 232 | } |
239 | 233 | ||
240 | size_t map__fprintf(struct map *self, FILE *fp) | 234 | size_t map__fprintf(struct map *map, FILE *fp) |
241 | { | 235 | { |
242 | return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", | 236 | return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", |
243 | self->start, self->end, self->pgoff, self->dso->name); | 237 | map->start, map->end, map->pgoff, map->dso->name); |
244 | } | 238 | } |
245 | 239 | ||
246 | size_t map__fprintf_dsoname(struct map *map, FILE *fp) | 240 | size_t map__fprintf_dsoname(struct map *map, FILE *fp) |
@@ -528,9 +522,9 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip) | |||
528 | return ip - (s64)map->pgoff; | 522 | return ip - (s64)map->pgoff; |
529 | } | 523 | } |
530 | 524 | ||
531 | void map__reloc_vmlinux(struct map *self) | 525 | void map__reloc_vmlinux(struct map *map) |
532 | { | 526 | { |
533 | struct kmap *kmap = map__kmap(self); | 527 | struct kmap *kmap = map__kmap(map); |
534 | s64 reloc; | 528 | s64 reloc; |
535 | 529 | ||
536 | if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) | 530 | if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) |
@@ -542,9 +536,9 @@ void map__reloc_vmlinux(struct map *self) | |||
542 | if (!reloc) | 536 | if (!reloc) |
543 | return; | 537 | return; |
544 | 538 | ||
545 | self->map_ip = map__reloc_map_ip; | 539 | map->map_ip = map__reloc_map_ip; |
546 | self->unmap_ip = map__reloc_unmap_ip; | 540 | map->unmap_ip = map__reloc_unmap_ip; |
547 | self->pgoff = reloc; | 541 | map->pgoff = reloc; |
548 | } | 542 | } |
549 | 543 | ||
550 | void maps__insert(struct rb_root *maps, struct map *map) | 544 | void maps__insert(struct rb_root *maps, struct map *map) |
@@ -567,9 +561,9 @@ void maps__insert(struct rb_root *maps, struct map *map) | |||
567 | rb_insert_color(&map->rb_node, maps); | 561 | rb_insert_color(&map->rb_node, maps); |
568 | } | 562 | } |
569 | 563 | ||
570 | void maps__remove(struct rb_root *self, struct map *map) | 564 | void maps__remove(struct rb_root *maps, struct map *map) |
571 | { | 565 | { |
572 | rb_erase(&map->rb_node, self); | 566 | rb_erase(&map->rb_node, maps); |
573 | } | 567 | } |
574 | 568 | ||
575 | struct map *maps__find(struct rb_root *maps, u64 ip) | 569 | struct map *maps__find(struct rb_root *maps, u64 ip) |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index bcb39e2a6965..a887f2c9dfbb 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
@@ -57,9 +57,9 @@ struct map_groups { | |||
57 | struct machine *machine; | 57 | struct machine *machine; |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static inline struct kmap *map__kmap(struct map *self) | 60 | static inline struct kmap *map__kmap(struct map *map) |
61 | { | 61 | { |
62 | return (struct kmap *)(self + 1); | 62 | return (struct kmap *)(map + 1); |
63 | } | 63 | } |
64 | 64 | ||
65 | static inline u64 map__map_ip(struct map *map, u64 ip) | 65 | static inline u64 map__map_ip(struct map *map, u64 ip) |
@@ -85,27 +85,27 @@ struct symbol; | |||
85 | 85 | ||
86 | typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); | 86 | typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); |
87 | 87 | ||
88 | void map__init(struct map *self, enum map_type type, | 88 | void map__init(struct map *map, enum map_type type, |
89 | u64 start, u64 end, u64 pgoff, struct dso *dso); | 89 | u64 start, u64 end, u64 pgoff, struct dso *dso); |
90 | struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, | 90 | struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, |
91 | u64 pgoff, u32 pid, char *filename, | 91 | u64 pgoff, u32 pid, char *filename, |
92 | enum map_type type); | 92 | enum map_type type); |
93 | struct map *map__new2(u64 start, struct dso *dso, enum map_type type); | 93 | struct map *map__new2(u64 start, struct dso *dso, enum map_type type); |
94 | void map__delete(struct map *self); | 94 | void map__delete(struct map *map); |
95 | struct map *map__clone(struct map *self); | 95 | struct map *map__clone(struct map *map); |
96 | int map__overlap(struct map *l, struct map *r); | 96 | int map__overlap(struct map *l, struct map *r); |
97 | size_t map__fprintf(struct map *self, FILE *fp); | 97 | size_t map__fprintf(struct map *map, FILE *fp); |
98 | size_t map__fprintf_dsoname(struct map *map, FILE *fp); | 98 | size_t map__fprintf_dsoname(struct map *map, FILE *fp); |
99 | 99 | ||
100 | int map__load(struct map *self, symbol_filter_t filter); | 100 | int map__load(struct map *map, symbol_filter_t filter); |
101 | struct symbol *map__find_symbol(struct map *self, | 101 | struct symbol *map__find_symbol(struct map *map, |
102 | u64 addr, symbol_filter_t filter); | 102 | u64 addr, symbol_filter_t filter); |
103 | struct symbol *map__find_symbol_by_name(struct map *self, const char *name, | 103 | struct symbol *map__find_symbol_by_name(struct map *map, const char *name, |
104 | symbol_filter_t filter); | 104 | symbol_filter_t filter); |
105 | void map__fixup_start(struct map *self); | 105 | void map__fixup_start(struct map *map); |
106 | void map__fixup_end(struct map *self); | 106 | void map__fixup_end(struct map *map); |
107 | 107 | ||
108 | void map__reloc_vmlinux(struct map *self); | 108 | void map__reloc_vmlinux(struct map *map); |
109 | 109 | ||
110 | size_t __map_groups__fprintf_maps(struct map_groups *mg, | 110 | size_t __map_groups__fprintf_maps(struct map_groups *mg, |
111 | enum map_type type, int verbose, FILE *fp); | 111 | enum map_type type, int verbose, FILE *fp); |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7ad62393aa88..83336610faa9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -249,7 +249,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, | |||
249 | size_t size, | 249 | size_t size, |
250 | unsigned int width __maybe_unused) | 250 | unsigned int width __maybe_unused) |
251 | { | 251 | { |
252 | FILE *fp; | 252 | FILE *fp = NULL; |
253 | char cmd[PATH_MAX + 2], *path = self->srcline, *nl; | 253 | char cmd[PATH_MAX + 2], *path = self->srcline, *nl; |
254 | size_t line_len; | 254 | size_t line_len; |
255 | 255 | ||
@@ -270,7 +270,6 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, | |||
270 | 270 | ||
271 | if (getline(&path, &line_len, fp) < 0 || !line_len) | 271 | if (getline(&path, &line_len, fp) < 0 || !line_len) |
272 | goto out_ip; | 272 | goto out_ip; |
273 | fclose(fp); | ||
274 | self->srcline = strdup(path); | 273 | self->srcline = strdup(path); |
275 | if (self->srcline == NULL) | 274 | if (self->srcline == NULL) |
276 | goto out_ip; | 275 | goto out_ip; |
@@ -280,8 +279,12 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, | |||
280 | *nl = '\0'; | 279 | *nl = '\0'; |
281 | path = self->srcline; | 280 | path = self->srcline; |
282 | out_path: | 281 | out_path: |
282 | if (fp) | ||
283 | pclose(fp); | ||
283 | return repsep_snprintf(bf, size, "%s", path); | 284 | return repsep_snprintf(bf, size, "%s", path); |
284 | out_ip: | 285 | out_ip: |
286 | if (fp) | ||
287 | pclose(fp); | ||
285 | return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); | 288 | return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); |
286 | } | 289 | } |
287 | 290 | ||
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 155d8b7078a7..55433aa42c8f 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c | |||
@@ -35,11 +35,11 @@ out_delete: | |||
35 | return NULL; | 35 | return NULL; |
36 | } | 36 | } |
37 | 37 | ||
38 | static void str_node__delete(struct str_node *self, bool dupstr) | 38 | static void str_node__delete(struct str_node *snode, bool dupstr) |
39 | { | 39 | { |
40 | if (dupstr) | 40 | if (dupstr) |
41 | free((void *)self->s); | 41 | free((void *)snode->s); |
42 | free(self); | 42 | free(snode); |
43 | } | 43 | } |
44 | 44 | ||
45 | static | 45 | static |
@@ -59,12 +59,12 @@ static int strlist__node_cmp(struct rb_node *rb_node, const void *entry) | |||
59 | return strcmp(snode->s, str); | 59 | return strcmp(snode->s, str); |
60 | } | 60 | } |
61 | 61 | ||
62 | int strlist__add(struct strlist *self, const char *new_entry) | 62 | int strlist__add(struct strlist *slist, const char *new_entry) |
63 | { | 63 | { |
64 | return rblist__add_node(&self->rblist, new_entry); | 64 | return rblist__add_node(&slist->rblist, new_entry); |
65 | } | 65 | } |
66 | 66 | ||
67 | int strlist__load(struct strlist *self, const char *filename) | 67 | int strlist__load(struct strlist *slist, const char *filename) |
68 | { | 68 | { |
69 | char entry[1024]; | 69 | char entry[1024]; |
70 | int err; | 70 | int err; |
@@ -80,7 +80,7 @@ int strlist__load(struct strlist *self, const char *filename) | |||
80 | continue; | 80 | continue; |
81 | entry[len - 1] = '\0'; | 81 | entry[len - 1] = '\0'; |
82 | 82 | ||
83 | err = strlist__add(self, entry); | 83 | err = strlist__add(slist, entry); |
84 | if (err != 0) | 84 | if (err != 0) |
85 | goto out; | 85 | goto out; |
86 | } | 86 | } |
@@ -107,56 +107,56 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry) | |||
107 | return snode; | 107 | return snode; |
108 | } | 108 | } |
109 | 109 | ||
110 | static int strlist__parse_list_entry(struct strlist *self, const char *s) | 110 | static int strlist__parse_list_entry(struct strlist *slist, const char *s) |
111 | { | 111 | { |
112 | if (strncmp(s, "file://", 7) == 0) | 112 | if (strncmp(s, "file://", 7) == 0) |
113 | return strlist__load(self, s + 7); | 113 | return strlist__load(slist, s + 7); |
114 | 114 | ||
115 | return strlist__add(self, s); | 115 | return strlist__add(slist, s); |
116 | } | 116 | } |
117 | 117 | ||
118 | int strlist__parse_list(struct strlist *self, const char *s) | 118 | int strlist__parse_list(struct strlist *slist, const char *s) |
119 | { | 119 | { |
120 | char *sep; | 120 | char *sep; |
121 | int err; | 121 | int err; |
122 | 122 | ||
123 | while ((sep = strchr(s, ',')) != NULL) { | 123 | while ((sep = strchr(s, ',')) != NULL) { |
124 | *sep = '\0'; | 124 | *sep = '\0'; |
125 | err = strlist__parse_list_entry(self, s); | 125 | err = strlist__parse_list_entry(slist, s); |
126 | *sep = ','; | 126 | *sep = ','; |
127 | if (err != 0) | 127 | if (err != 0) |
128 | return err; | 128 | return err; |
129 | s = sep + 1; | 129 | s = sep + 1; |
130 | } | 130 | } |
131 | 131 | ||
132 | return *s ? strlist__parse_list_entry(self, s) : 0; | 132 | return *s ? strlist__parse_list_entry(slist, s) : 0; |
133 | } | 133 | } |
134 | 134 | ||
135 | struct strlist *strlist__new(bool dupstr, const char *slist) | 135 | struct strlist *strlist__new(bool dupstr, const char *list) |
136 | { | 136 | { |
137 | struct strlist *self = malloc(sizeof(*self)); | 137 | struct strlist *slist = malloc(sizeof(*slist)); |
138 | 138 | ||
139 | if (self != NULL) { | 139 | if (slist != NULL) { |
140 | rblist__init(&self->rblist); | 140 | rblist__init(&slist->rblist); |
141 | self->rblist.node_cmp = strlist__node_cmp; | 141 | slist->rblist.node_cmp = strlist__node_cmp; |
142 | self->rblist.node_new = strlist__node_new; | 142 | slist->rblist.node_new = strlist__node_new; |
143 | self->rblist.node_delete = strlist__node_delete; | 143 | slist->rblist.node_delete = strlist__node_delete; |
144 | 144 | ||
145 | self->dupstr = dupstr; | 145 | slist->dupstr = dupstr; |
146 | if (slist && strlist__parse_list(self, slist) != 0) | 146 | if (slist && strlist__parse_list(slist, list) != 0) |
147 | goto out_error; | 147 | goto out_error; |
148 | } | 148 | } |
149 | 149 | ||
150 | return self; | 150 | return slist; |
151 | out_error: | 151 | out_error: |
152 | free(self); | 152 | free(slist); |
153 | return NULL; | 153 | return NULL; |
154 | } | 154 | } |
155 | 155 | ||
156 | void strlist__delete(struct strlist *self) | 156 | void strlist__delete(struct strlist *slist) |
157 | { | 157 | { |
158 | if (self != NULL) | 158 | if (slist != NULL) |
159 | rblist__delete(&self->rblist); | 159 | rblist__delete(&slist->rblist); |
160 | } | 160 | } |
161 | 161 | ||
162 | struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx) | 162 | struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx) |
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index dd9f922ec67c..5c7f87069d9c 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h | |||
@@ -17,34 +17,34 @@ struct strlist { | |||
17 | }; | 17 | }; |
18 | 18 | ||
19 | struct strlist *strlist__new(bool dupstr, const char *slist); | 19 | struct strlist *strlist__new(bool dupstr, const char *slist); |
20 | void strlist__delete(struct strlist *self); | 20 | void strlist__delete(struct strlist *slist); |
21 | 21 | ||
22 | void strlist__remove(struct strlist *self, struct str_node *sn); | 22 | void strlist__remove(struct strlist *slist, struct str_node *sn); |
23 | int strlist__load(struct strlist *self, const char *filename); | 23 | int strlist__load(struct strlist *slist, const char *filename); |
24 | int strlist__add(struct strlist *self, const char *str); | 24 | int strlist__add(struct strlist *slist, const char *str); |
25 | 25 | ||
26 | struct str_node *strlist__entry(const struct strlist *self, unsigned int idx); | 26 | struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx); |
27 | struct str_node *strlist__find(struct strlist *self, const char *entry); | 27 | struct str_node *strlist__find(struct strlist *slist, const char *entry); |
28 | 28 | ||
29 | static inline bool strlist__has_entry(struct strlist *self, const char *entry) | 29 | static inline bool strlist__has_entry(struct strlist *slist, const char *entry) |
30 | { | 30 | { |
31 | return strlist__find(self, entry) != NULL; | 31 | return strlist__find(slist, entry) != NULL; |
32 | } | 32 | } |
33 | 33 | ||
34 | static inline bool strlist__empty(const struct strlist *self) | 34 | static inline bool strlist__empty(const struct strlist *slist) |
35 | { | 35 | { |
36 | return rblist__empty(&self->rblist); | 36 | return rblist__empty(&slist->rblist); |
37 | } | 37 | } |
38 | 38 | ||
39 | static inline unsigned int strlist__nr_entries(const struct strlist *self) | 39 | static inline unsigned int strlist__nr_entries(const struct strlist *slist) |
40 | { | 40 | { |
41 | return rblist__nr_entries(&self->rblist); | 41 | return rblist__nr_entries(&slist->rblist); |
42 | } | 42 | } |
43 | 43 | ||
44 | /* For strlist iteration */ | 44 | /* For strlist iteration */ |
45 | static inline struct str_node *strlist__first(struct strlist *self) | 45 | static inline struct str_node *strlist__first(struct strlist *slist) |
46 | { | 46 | { |
47 | struct rb_node *rn = rb_first(&self->rblist.entries); | 47 | struct rb_node *rn = rb_first(&slist->rblist.entries); |
48 | return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; | 48 | return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; |
49 | } | 49 | } |
50 | static inline struct str_node *strlist__next(struct str_node *sn) | 50 | static inline struct str_node *strlist__next(struct str_node *sn) |
@@ -59,21 +59,21 @@ static inline struct str_node *strlist__next(struct str_node *sn) | |||
59 | /** | 59 | /** |
60 | * strlist_for_each - iterate over a strlist | 60 | * strlist_for_each - iterate over a strlist |
61 | * @pos: the &struct str_node to use as a loop cursor. | 61 | * @pos: the &struct str_node to use as a loop cursor. |
62 | * @self: the &struct strlist for loop. | 62 | * @slist: the &struct strlist for loop. |
63 | */ | 63 | */ |
64 | #define strlist__for_each(pos, self) \ | 64 | #define strlist__for_each(pos, slist) \ |
65 | for (pos = strlist__first(self); pos; pos = strlist__next(pos)) | 65 | for (pos = strlist__first(slist); pos; pos = strlist__next(pos)) |
66 | 66 | ||
67 | /** | 67 | /** |
68 | * strlist_for_each_safe - iterate over a strlist safe against removal of | 68 | * strlist_for_each_safe - iterate over a strlist safe against removal of |
69 | * str_node | 69 | * str_node |
70 | * @pos: the &struct str_node to use as a loop cursor. | 70 | * @pos: the &struct str_node to use as a loop cursor. |
71 | * @n: another &struct str_node to use as temporary storage. | 71 | * @n: another &struct str_node to use as temporary storage. |
72 | * @self: the &struct strlist for loop. | 72 | * @slist: the &struct strlist for loop. |
73 | */ | 73 | */ |
74 | #define strlist__for_each_safe(pos, n, self) \ | 74 | #define strlist__for_each_safe(pos, n, slist) \ |
75 | for (pos = strlist__first(self), n = strlist__next(pos); pos;\ | 75 | for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ |
76 | pos = n, n = strlist__next(n)) | 76 | pos = n, n = strlist__next(n)) |
77 | 77 | ||
78 | int strlist__parse_list(struct strlist *self, const char *s); | 78 | int strlist__parse_list(struct strlist *slist, const char *s); |
79 | #endif /* __PERF_STRLIST_H */ | 79 | #endif /* __PERF_STRLIST_H */ |