diff options
-rw-r--r-- | tools/perf/builtin-stat.c | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c5a290727a92..201ef2367dcb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -99,7 +99,7 @@ static u64 runtime_cycles_noise; | |||
99 | #define ERR_PERF_OPEN \ | 99 | #define ERR_PERF_OPEN \ |
100 | "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" | 100 | "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" |
101 | 101 | ||
102 | static void create_perf_stat_counter(int counter) | 102 | static void create_perf_stat_counter(int counter, int pid) |
103 | { | 103 | { |
104 | struct perf_counter_attr *attr = attrs + counter; | 104 | struct perf_counter_attr *attr = attrs + counter; |
105 | 105 | ||
@@ -119,7 +119,7 @@ static void create_perf_stat_counter(int counter) | |||
119 | attr->inherit = inherit; | 119 | attr->inherit = inherit; |
120 | attr->disabled = 1; | 120 | attr->disabled = 1; |
121 | 121 | ||
122 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | 122 | fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); |
123 | if (fd[0][counter] < 0 && verbose) | 123 | if (fd[0][counter] < 0 && verbose) |
124 | fprintf(stderr, ERR_PERF_OPEN, counter, | 124 | fprintf(stderr, ERR_PERF_OPEN, counter, |
125 | fd[0][counter], strerror(errno)); | 125 | fd[0][counter], strerror(errno)); |
@@ -205,12 +205,58 @@ static int run_perf_stat(int argc, const char **argv) | |||
205 | int status = 0; | 205 | int status = 0; |
206 | int counter; | 206 | int counter; |
207 | int pid; | 207 | int pid; |
208 | int child_ready_pipe[2], go_pipe[2]; | ||
209 | char buf; | ||
208 | 210 | ||
209 | if (!system_wide) | 211 | if (!system_wide) |
210 | nr_cpus = 1; | 212 | nr_cpus = 1; |
211 | 213 | ||
214 | if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { | ||
215 | perror("failed to create pipes"); | ||
216 | exit(1); | ||
217 | } | ||
218 | |||
219 | if ((pid = fork()) < 0) | ||
220 | perror("failed to fork"); | ||
221 | |||
222 | if (!pid) { | ||
223 | close(child_ready_pipe[0]); | ||
224 | close(go_pipe[1]); | ||
225 | fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); | ||
226 | |||
227 | /* | ||
228 | * Do a dummy execvp to get the PLT entry resolved, | ||
229 | * so we avoid the resolver overhead on the real | ||
230 | * execvp call. | ||
231 | */ | ||
232 | execvp("", (char **)argv); | ||
233 | |||
234 | /* | ||
235 | * Tell the parent we're ready to go | ||
236 | */ | ||
237 | close(child_ready_pipe[1]); | ||
238 | |||
239 | /* | ||
240 | * Wait until the parent tells us to go. | ||
241 | */ | ||
242 | read(go_pipe[0], &buf, 1); | ||
243 | |||
244 | execvp(argv[0], (char **)argv); | ||
245 | |||
246 | perror(argv[0]); | ||
247 | exit(-1); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Wait for the child to be ready to exec. | ||
252 | */ | ||
253 | close(child_ready_pipe[1]); | ||
254 | close(go_pipe[0]); | ||
255 | read(child_ready_pipe[0], &buf, 1); | ||
256 | close(child_ready_pipe[0]); | ||
257 | |||
212 | for (counter = 0; counter < nr_counters; counter++) | 258 | for (counter = 0; counter < nr_counters; counter++) |
213 | create_perf_stat_counter(counter); | 259 | create_perf_stat_counter(counter, pid); |
214 | 260 | ||
215 | /* | 261 | /* |
216 | * Enable counters and exec the command: | 262 | * Enable counters and exec the command: |
@@ -218,19 +264,9 @@ static int run_perf_stat(int argc, const char **argv) | |||
218 | t0 = rdclock(); | 264 | t0 = rdclock(); |
219 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | 265 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); |
220 | 266 | ||
221 | if ((pid = fork()) < 0) | 267 | close(go_pipe[1]); |
222 | perror("failed to fork"); | ||
223 | |||
224 | if (!pid) { | ||
225 | if (execvp(argv[0], (char **)argv)) { | ||
226 | perror(argv[0]); | ||
227 | exit(-1); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | wait(&status); | 268 | wait(&status); |
232 | 269 | ||
233 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
234 | t1 = rdclock(); | 270 | t1 = rdclock(); |
235 | 271 | ||
236 | walltime_nsecs[run_idx] = t1 - t0; | 272 | walltime_nsecs[run_idx] = t1 - t0; |