aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c603
1 files changed, 258 insertions, 345 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6ab58cc99d5..0abfb18b911 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
22#include "util/evsel.h" 22#include "util/evsel.h"
23#include "util/debug.h" 23#include "util/debug.h"
24#include "util/session.h" 24#include "util/session.h"
25#include "util/tool.h"
25#include "util/symbol.h" 26#include "util/symbol.h"
26#include "util/cpumap.h" 27#include "util/cpumap.h"
27#include "util/thread_map.h" 28#include "util/thread_map.h"
@@ -35,55 +36,36 @@ enum write_mode_t {
35 WRITE_APPEND 36 WRITE_APPEND
36}; 37};
37 38
38static u64 user_interval = ULLONG_MAX; 39struct perf_record {
39static u64 default_interval = 0; 40 struct perf_tool tool;
40 41 struct perf_record_opts opts;
41static unsigned int page_size; 42 u64 bytes_written;
42static unsigned int mmap_pages = UINT_MAX; 43 const char *output_name;
43static unsigned int user_freq = UINT_MAX; 44 struct perf_evlist *evlist;
44static int freq = 1000; 45 struct perf_session *session;
45static int output; 46 const char *progname;
46static int pipe_output = 0; 47 int output;
47static const char *output_name = NULL; 48 unsigned int page_size;
48static bool group = false; 49 int realtime_prio;
49static int realtime_prio = 0; 50 enum write_mode_t write_mode;
50static bool nodelay = false; 51 bool no_buildid;
51static bool raw_samples = false; 52 bool no_buildid_cache;
52static bool sample_id_all_avail = true; 53 bool force;
53static bool system_wide = false; 54 bool file_new;
54static pid_t target_pid = -1; 55 bool append_file;
55static pid_t target_tid = -1; 56 long samples;
56static pid_t child_pid = -1; 57 off_t post_processing_offset;
57static bool no_inherit = false; 58};
58static enum write_mode_t write_mode = WRITE_FORCE; 59
59static bool call_graph = false; 60static void advance_output(struct perf_record *rec, size_t size)
60static bool inherit_stat = false;
61static bool no_samples = false;
62static bool sample_address = false;
63static bool sample_time = false;
64static bool no_buildid = false;
65static bool no_buildid_cache = false;
66static struct perf_evlist *evsel_list;
67
68static long samples = 0;
69static u64 bytes_written = 0;
70
71static int file_new = 1;
72static off_t post_processing_offset;
73
74static struct perf_session *session;
75static const char *cpu_list;
76static const char *progname;
77
78static void advance_output(size_t size)
79{ 61{
80 bytes_written += size; 62 rec->bytes_written += size;
81} 63}
82 64
83static void write_output(void *buf, size_t size) 65static void write_output(struct perf_record *rec, void *buf, size_t size)
84{ 66{
85 while (size) { 67 while (size) {
86 int ret = write(output, buf, size); 68 int ret = write(rec->output, buf, size);
87 69
88 if (ret < 0) 70 if (ret < 0)
89 die("failed to write"); 71 die("failed to write");
@@ -91,30 +73,33 @@ static void write_output(void *buf, size_t size)
91 size -= ret; 73 size -= ret;
92 buf += ret; 74 buf += ret;
93 75
94 bytes_written += ret; 76 rec->bytes_written += ret;
95 } 77 }
96} 78}
97 79
98static int process_synthesized_event(union perf_event *event, 80static int process_synthesized_event(struct perf_tool *tool,
81 union perf_event *event,
99 struct perf_sample *sample __used, 82 struct perf_sample *sample __used,
100 struct perf_session *self __used) 83 struct machine *machine __used)
101{ 84{
102 write_output(event, event->header.size); 85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 write_output(rec, event, event->header.size);
103 return 0; 87 return 0;
104} 88}
105 89
106static void mmap_read(struct perf_mmap *md) 90static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md)
107{ 92{
108 unsigned int head = perf_mmap__read_head(md); 93 unsigned int head = perf_mmap__read_head(md);
109 unsigned int old = md->prev; 94 unsigned int old = md->prev;
110 unsigned char *data = md->base + page_size; 95 unsigned char *data = md->base + rec->page_size;
111 unsigned long size; 96 unsigned long size;
112 void *buf; 97 void *buf;
113 98
114 if (old == head) 99 if (old == head)
115 return; 100 return;
116 101
117 samples++; 102 rec->samples++;
118 103
119 size = head - old; 104 size = head - old;
120 105
@@ -123,14 +108,14 @@ static void mmap_read(struct perf_mmap *md)
123 size = md->mask + 1 - (old & md->mask); 108 size = md->mask + 1 - (old & md->mask);
124 old += size; 109 old += size;
125 110
126 write_output(buf, size); 111 write_output(rec, buf, size);
127 } 112 }
128 113
129 buf = &data[old & md->mask]; 114 buf = &data[old & md->mask];
130 size = head - old; 115 size = head - old;
131 old += size; 116 old += size;
132 117
133 write_output(buf, size); 118 write_output(rec, buf, size);
134 119
135 md->prev = old; 120 md->prev = old;
136 perf_mmap__write_tail(md, old); 121 perf_mmap__write_tail(md, old);
@@ -149,17 +134,18 @@ static void sig_handler(int sig)
149 signr = sig; 134 signr = sig;
150} 135}
151 136
152static void sig_atexit(void) 137static void perf_record__sig_exit(int exit_status __used, void *arg)
153{ 138{
139 struct perf_record *rec = arg;
154 int status; 140 int status;
155 141
156 if (child_pid > 0) { 142 if (rec->evlist->workload.pid > 0) {
157 if (!child_finished) 143 if (!child_finished)
158 kill(child_pid, SIGTERM); 144 kill(rec->evlist->workload.pid, SIGTERM);
159 145
160 wait(&status); 146 wait(&status);
161 if (WIFSIGNALED(status)) 147 if (WIFSIGNALED(status))
162 psignal(WTERMSIG(status), progname); 148 psignal(WTERMSIG(status), rec->progname);
163 } 149 }
164 150
165 if (signr == -1 || signr == SIGUSR1) 151 if (signr == -1 || signr == SIGUSR1)
@@ -169,78 +155,6 @@ static void sig_atexit(void)
169 kill(getpid(), signr); 155 kill(getpid(), signr);
170} 156}
171 157
172static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173{
174 struct perf_event_attr *attr = &evsel->attr;
175 int track = !evsel->idx; /* only the first counter needs these */
176
177 attr->disabled = 1;
178 attr->inherit = !no_inherit;
179 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
180 PERF_FORMAT_TOTAL_TIME_RUNNING |
181 PERF_FORMAT_ID;
182
183 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184
185 if (evlist->nr_entries > 1)
186 attr->sample_type |= PERF_SAMPLE_ID;
187
188 /*
189 * We default some events to a 1 default interval. But keep
190 * it a weak assumption overridable by the user.
191 */
192 if (!attr->sample_period || (user_freq != UINT_MAX &&
193 user_interval != ULLONG_MAX)) {
194 if (freq) {
195 attr->sample_type |= PERF_SAMPLE_PERIOD;
196 attr->freq = 1;
197 attr->sample_freq = freq;
198 } else {
199 attr->sample_period = default_interval;
200 }
201 }
202
203 if (no_samples)
204 attr->sample_freq = 0;
205
206 if (inherit_stat)
207 attr->inherit_stat = 1;
208
209 if (sample_address) {
210 attr->sample_type |= PERF_SAMPLE_ADDR;
211 attr->mmap_data = track;
212 }
213
214 if (call_graph)
215 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
216
217 if (system_wide)
218 attr->sample_type |= PERF_SAMPLE_CPU;
219
220 if (sample_id_all_avail &&
221 (sample_time || system_wide || !no_inherit || cpu_list))
222 attr->sample_type |= PERF_SAMPLE_TIME;
223
224 if (raw_samples) {
225 attr->sample_type |= PERF_SAMPLE_TIME;
226 attr->sample_type |= PERF_SAMPLE_RAW;
227 attr->sample_type |= PERF_SAMPLE_CPU;
228 }
229
230 if (nodelay) {
231 attr->watermark = 0;
232 attr->wakeup_events = 1;
233 }
234
235 attr->mmap = track;
236 attr->comm = track;
237
238 if (target_pid == -1 && target_tid == -1 && !system_wide) {
239 attr->disabled = 1;
240 attr->enable_on_exec = 1;
241 }
242}
243
244static bool perf_evlist__equal(struct perf_evlist *evlist, 158static bool perf_evlist__equal(struct perf_evlist *evlist,
245 struct perf_evlist *other) 159 struct perf_evlist *other)
246{ 160{
@@ -260,15 +174,17 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
260 return true; 174 return true;
261} 175}
262 176
263static void open_counters(struct perf_evlist *evlist) 177static void perf_record__open(struct perf_record *rec)
264{ 178{
265 struct perf_evsel *pos, *first; 179 struct perf_evsel *pos, *first;
266 180 struct perf_evlist *evlist = rec->evlist;
267 if (evlist->cpus->map[0] < 0) 181 struct perf_session *session = rec->session;
268 no_inherit = true; 182 struct perf_record_opts *opts = &rec->opts;
269 183
270 first = list_entry(evlist->entries.next, struct perf_evsel, node); 184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
271 185
186 perf_evlist__config_attrs(evlist, opts);
187
272 list_for_each_entry(pos, &evlist->entries, node) { 188 list_for_each_entry(pos, &evlist->entries, node) {
273 struct perf_event_attr *attr = &pos->attr; 189 struct perf_event_attr *attr = &pos->attr;
274 struct xyarray *group_fd = NULL; 190 struct xyarray *group_fd = NULL;
@@ -286,29 +202,27 @@ static void open_counters(struct perf_evlist *evlist)
286 */ 202 */
287 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
288 204
289 if (group && pos != first) 205 if (opts->group && pos != first)
290 group_fd = first->fd; 206 group_fd = first->fd;
291
292 config_attr(pos, evlist);
293retry_sample_id: 207retry_sample_id:
294 attr->sample_id_all = sample_id_all_avail ? 1 : 0; 208 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
295try_again: 209try_again:
296 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, 210 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
297 group_fd) < 0) { 211 opts->group, group_fd) < 0) {
298 int err = errno; 212 int err = errno;
299 213
300 if (err == EPERM || err == EACCES) { 214 if (err == EPERM || err == EACCES) {
301 ui__error_paranoid(); 215 ui__error_paranoid();
302 exit(EXIT_FAILURE); 216 exit(EXIT_FAILURE);
303 } else if (err == ENODEV && cpu_list) { 217 } else if (err == ENODEV && opts->cpu_list) {
304 die("No such device - did you specify" 218 die("No such device - did you specify"
305 " an out-of-range profile CPU?\n"); 219 " an out-of-range profile CPU?\n");
306 } else if (err == EINVAL && sample_id_all_avail) { 220 } else if (err == EINVAL && opts->sample_id_all_avail) {
307 /* 221 /*
308 * Old kernel, no attr->sample_id_type_all field 222 * Old kernel, no attr->sample_id_type_all field
309 */ 223 */
310 sample_id_all_avail = false; 224 opts->sample_id_all_avail = false;
311 if (!sample_time && !raw_samples && !time_needed) 225 if (!opts->sample_time && !opts->raw_samples && !time_needed)
312 attr->sample_type &= ~PERF_SAMPLE_TIME; 226 attr->sample_type &= ~PERF_SAMPLE_TIME;
313 227
314 goto retry_sample_id; 228 goto retry_sample_id;
@@ -358,10 +272,20 @@ try_again:
358 exit(-1); 272 exit(-1);
359 } 273 }
360 274
361 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) 275 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
276 if (errno == EPERM)
277 die("Permission error mapping pages.\n"
278 "Consider increasing "
279 "/proc/sys/kernel/perf_event_mlock_kb,\n"
280 "or try again with a smaller value of -m/--mmap_pages.\n"
281 "(current value: %d)\n", opts->mmap_pages);
282 else if (!is_power_of_2(opts->mmap_pages))
283 die("--mmap_pages/-m value must be a power of two.");
284
362 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 285 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
286 }
363 287
364 if (file_new) 288 if (rec->file_new)
365 session->evlist = evlist; 289 session->evlist = evlist;
366 else { 290 else {
367 if (!perf_evlist__equal(session->evlist, evlist)) { 291 if (!perf_evlist__equal(session->evlist, evlist)) {
@@ -373,29 +297,32 @@ try_again:
373 perf_session__update_sample_type(session); 297 perf_session__update_sample_type(session);
374} 298}
375 299
376static int process_buildids(void) 300static int process_buildids(struct perf_record *rec)
377{ 301{
378 u64 size = lseek(output, 0, SEEK_CUR); 302 u64 size = lseek(rec->output, 0, SEEK_CUR);
379 303
380 if (size == 0) 304 if (size == 0)
381 return 0; 305 return 0;
382 306
383 session->fd = output; 307 rec->session->fd = rec->output;
384 return __perf_session__process_events(session, post_processing_offset, 308 return __perf_session__process_events(rec->session, rec->post_processing_offset,
385 size - post_processing_offset, 309 size - rec->post_processing_offset,
386 size, &build_id__mark_dso_hit_ops); 310 size, &build_id__mark_dso_hit_ops);
387} 311}
388 312
389static void atexit_header(void) 313static void perf_record__exit(int status __used, void *arg)
390{ 314{
391 if (!pipe_output) { 315 struct perf_record *rec = arg;
392 session->header.data_size += bytes_written; 316
393 317 if (!rec->opts.pipe_output) {
394 if (!no_buildid) 318 rec->session->header.data_size += rec->bytes_written;
395 process_buildids(); 319
396 perf_session__write_header(session, evsel_list, output, true); 320 if (!rec->no_buildid)
397 perf_session__delete(session); 321 process_buildids(rec);
398 perf_evlist__delete(evsel_list); 322 perf_session__write_header(rec->session, rec->evlist,
323 rec->output, true);
324 perf_session__delete(rec->session);
325 perf_evlist__delete(rec->evlist);
399 symbol__exit(); 326 symbol__exit();
400 } 327 }
401} 328}
@@ -403,7 +330,7 @@ static void atexit_header(void)
403static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 330static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
404{ 331{
405 int err; 332 int err;
406 struct perf_session *psession = data; 333 struct perf_tool *tool = data;
407 334
408 if (machine__is_host(machine)) 335 if (machine__is_host(machine))
409 return; 336 return;
@@ -416,8 +343,8 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
416 *method is used to avoid symbol missing when the first addr is 343 *method is used to avoid symbol missing when the first addr is
417 *in module instead of in guest kernel. 344 *in module instead of in guest kernel.
418 */ 345 */
419 err = perf_event__synthesize_modules(process_synthesized_event, 346 err = perf_event__synthesize_modules(tool, process_synthesized_event,
420 psession, machine); 347 machine);
421 if (err < 0) 348 if (err < 0)
422 pr_err("Couldn't record guest kernel [%d]'s reference" 349 pr_err("Couldn't record guest kernel [%d]'s reference"
423 " relocation symbol.\n", machine->pid); 350 " relocation symbol.\n", machine->pid);
@@ -426,12 +353,11 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
426 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 353 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
427 * have no _text sometimes. 354 * have no _text sometimes.
428 */ 355 */
429 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 356 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
430 psession, machine, "_text"); 357 machine, "_text");
431 if (err < 0) 358 if (err < 0)
432 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 359 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
433 psession, machine, 360 machine, "_stext");
434 "_stext");
435 if (err < 0) 361 if (err < 0)
436 pr_err("Couldn't record guest kernel [%d]'s reference" 362 pr_err("Couldn't record guest kernel [%d]'s reference"
437 " relocation symbol.\n", machine->pid); 363 " relocation symbol.\n", machine->pid);
@@ -442,73 +368,71 @@ static struct perf_event_header finished_round_event = {
442 .type = PERF_RECORD_FINISHED_ROUND, 368 .type = PERF_RECORD_FINISHED_ROUND,
443}; 369};
444 370
445static void mmap_read_all(void) 371static void perf_record__mmap_read_all(struct perf_record *rec)
446{ 372{
447 int i; 373 int i;
448 374
449 for (i = 0; i < evsel_list->nr_mmaps; i++) { 375 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
450 if (evsel_list->mmap[i].base) 376 if (rec->evlist->mmap[i].base)
451 mmap_read(&evsel_list->mmap[i]); 377 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
452 } 378 }
453 379
454 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 380 if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
455 write_output(&finished_round_event, sizeof(finished_round_event)); 381 write_output(rec, &finished_round_event, sizeof(finished_round_event));
456} 382}
457 383
458static int __cmd_record(int argc, const char **argv) 384static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
459{ 385{
460 struct stat st; 386 struct stat st;
461 int flags; 387 int flags;
462 int err; 388 int err, output;
463 unsigned long waking = 0; 389 unsigned long waking = 0;
464 int child_ready_pipe[2], go_pipe[2];
465 const bool forks = argc > 0; 390 const bool forks = argc > 0;
466 char buf;
467 struct machine *machine; 391 struct machine *machine;
392 struct perf_tool *tool = &rec->tool;
393 struct perf_record_opts *opts = &rec->opts;
394 struct perf_evlist *evsel_list = rec->evlist;
395 const char *output_name = rec->output_name;
396 struct perf_session *session;
468 397
469 progname = argv[0]; 398 rec->progname = argv[0];
470 399
471 page_size = sysconf(_SC_PAGE_SIZE); 400 rec->page_size = sysconf(_SC_PAGE_SIZE);
472 401
473 atexit(sig_atexit); 402 on_exit(perf_record__sig_exit, rec);
474 signal(SIGCHLD, sig_handler); 403 signal(SIGCHLD, sig_handler);
475 signal(SIGINT, sig_handler); 404 signal(SIGINT, sig_handler);
476 signal(SIGUSR1, sig_handler); 405 signal(SIGUSR1, sig_handler);
477 406
478 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
479 perror("failed to create pipes");
480 exit(-1);
481 }
482
483 if (!output_name) { 407 if (!output_name) {
484 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 408 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
485 pipe_output = 1; 409 opts->pipe_output = true;
486 else 410 else
487 output_name = "perf.data"; 411 rec->output_name = output_name = "perf.data";
488 } 412 }
489 if (output_name) { 413 if (output_name) {
490 if (!strcmp(output_name, "-")) 414 if (!strcmp(output_name, "-"))
491 pipe_output = 1; 415 opts->pipe_output = true;
492 else if (!stat(output_name, &st) && st.st_size) { 416 else if (!stat(output_name, &st) && st.st_size) {
493 if (write_mode == WRITE_FORCE) { 417 if (rec->write_mode == WRITE_FORCE) {
494 char oldname[PATH_MAX]; 418 char oldname[PATH_MAX];
495 snprintf(oldname, sizeof(oldname), "%s.old", 419 snprintf(oldname, sizeof(oldname), "%s.old",
496 output_name); 420 output_name);
497 unlink(oldname); 421 unlink(oldname);
498 rename(output_name, oldname); 422 rename(output_name, oldname);
499 } 423 }
500 } else if (write_mode == WRITE_APPEND) { 424 } else if (rec->write_mode == WRITE_APPEND) {
501 write_mode = WRITE_FORCE; 425 rec->write_mode = WRITE_FORCE;
502 } 426 }
503 } 427 }
504 428
505 flags = O_CREAT|O_RDWR; 429 flags = O_CREAT|O_RDWR;
506 if (write_mode == WRITE_APPEND) 430 if (rec->write_mode == WRITE_APPEND)
507 file_new = 0; 431 rec->file_new = 0;
508 else 432 else
509 flags |= O_TRUNC; 433 flags |= O_TRUNC;
510 434
511 if (pipe_output) 435 if (opts->pipe_output)
512 output = STDOUT_FILENO; 436 output = STDOUT_FILENO;
513 else 437 else
514 output = open(output_name, flags, S_IRUSR | S_IWUSR); 438 output = open(output_name, flags, S_IRUSR | S_IWUSR);
@@ -517,17 +441,21 @@ static int __cmd_record(int argc, const char **argv)
517 exit(-1); 441 exit(-1);
518 } 442 }
519 443
444 rec->output = output;
445
520 session = perf_session__new(output_name, O_WRONLY, 446 session = perf_session__new(output_name, O_WRONLY,
521 write_mode == WRITE_FORCE, false, NULL); 447 rec->write_mode == WRITE_FORCE, false, NULL);
522 if (session == NULL) { 448 if (session == NULL) {
523 pr_err("Not enough memory for reading perf file header\n"); 449 pr_err("Not enough memory for reading perf file header\n");
524 return -1; 450 return -1;
525 } 451 }
526 452
527 if (!no_buildid) 453 rec->session = session;
454
455 if (!rec->no_buildid)
528 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 456 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
529 457
530 if (!file_new) { 458 if (!rec->file_new) {
531 err = perf_session__read_header(session, output); 459 err = perf_session__read_header(session, output);
532 if (err < 0) 460 if (err < 0)
533 goto out_delete_session; 461 goto out_delete_session;
@@ -549,94 +477,57 @@ static int __cmd_record(int argc, const char **argv)
549 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); 477 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
550 perf_header__set_feat(&session->header, HEADER_CPUID); 478 perf_header__set_feat(&session->header, HEADER_CPUID);
551 479
552 /* 512 kiB: default amount of unprivileged mlocked memory */
553 if (mmap_pages == UINT_MAX)
554 mmap_pages = (512 * 1024) / page_size;
555
556 if (forks) { 480 if (forks) {
557 child_pid = fork(); 481 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
558 if (child_pid < 0) { 482 if (err < 0) {
559 perror("failed to fork"); 483 pr_err("Couldn't run the workload!\n");
560 exit(-1); 484 goto out_delete_session;
561 }
562
563 if (!child_pid) {
564 if (pipe_output)
565 dup2(2, 1);
566 close(child_ready_pipe[0]);
567 close(go_pipe[1]);
568 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
569
570 /*
571 * Do a dummy execvp to get the PLT entry resolved,
572 * so we avoid the resolver overhead on the real
573 * execvp call.
574 */
575 execvp("", (char **)argv);
576
577 /*
578 * Tell the parent we're ready to go
579 */
580 close(child_ready_pipe[1]);
581
582 /*
583 * Wait until the parent tells us to go.
584 */
585 if (read(go_pipe[0], &buf, 1) == -1)
586 perror("unable to read pipe");
587
588 execvp(argv[0], (char **)argv);
589
590 perror(argv[0]);
591 kill(getppid(), SIGUSR1);
592 exit(-1);
593 }
594
595 if (!system_wide && target_tid == -1 && target_pid == -1)
596 evsel_list->threads->map[0] = child_pid;
597
598 close(child_ready_pipe[1]);
599 close(go_pipe[0]);
600 /*
601 * wait for child to settle
602 */
603 if (read(child_ready_pipe[0], &buf, 1) == -1) {
604 perror("unable to read pipe");
605 exit(-1);
606 } 485 }
607 close(child_ready_pipe[0]);
608 } 486 }
609 487
610 open_counters(evsel_list); 488 perf_record__open(rec);
611 489
612 /* 490 /*
613 * perf_session__delete(session) will be called at atexit_header() 491 * perf_session__delete(session) will be called at perf_record__exit()
614 */ 492 */
615 atexit(atexit_header); 493 on_exit(perf_record__exit, rec);
616 494
617 if (pipe_output) { 495 if (opts->pipe_output) {
618 err = perf_header__write_pipe(output); 496 err = perf_header__write_pipe(output);
619 if (err < 0) 497 if (err < 0)
620 return err; 498 return err;
621 } else if (file_new) { 499 } else if (rec->file_new) {
622 err = perf_session__write_header(session, evsel_list, 500 err = perf_session__write_header(session, evsel_list,
623 output, false); 501 output, false);
624 if (err < 0) 502 if (err < 0)
625 return err; 503 return err;
626 } 504 }
627 505
628 post_processing_offset = lseek(output, 0, SEEK_CUR); 506 if (!!rec->no_buildid
507 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
508 pr_err("Couldn't generating buildids. "
509 "Use --no-buildid to profile anyway.\n");
510 return -1;
511 }
629 512
630 if (pipe_output) { 513 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
631 err = perf_session__synthesize_attrs(session, 514
632 process_synthesized_event); 515 machine = perf_session__find_host_machine(session);
516 if (!machine) {
517 pr_err("Couldn't find native kernel information.\n");
518 return -1;
519 }
520
521 if (opts->pipe_output) {
522 err = perf_event__synthesize_attrs(tool, session,
523 process_synthesized_event);
633 if (err < 0) { 524 if (err < 0) {
634 pr_err("Couldn't synthesize attrs.\n"); 525 pr_err("Couldn't synthesize attrs.\n");
635 return err; 526 return err;
636 } 527 }
637 528
638 err = perf_event__synthesize_event_types(process_synthesized_event, 529 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
639 session); 530 machine);
640 if (err < 0) { 531 if (err < 0) {
641 pr_err("Couldn't synthesize event_types.\n"); 532 pr_err("Couldn't synthesize event_types.\n");
642 return err; 533 return err;
@@ -651,56 +542,49 @@ static int __cmd_record(int argc, const char **argv)
651 * return this more properly and also 542 * return this more properly and also
652 * propagate errors that now are calling die() 543 * propagate errors that now are calling die()
653 */ 544 */
654 err = perf_event__synthesize_tracing_data(output, evsel_list, 545 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
655 process_synthesized_event, 546 process_synthesized_event);
656 session);
657 if (err <= 0) { 547 if (err <= 0) {
658 pr_err("Couldn't record tracing data.\n"); 548 pr_err("Couldn't record tracing data.\n");
659 return err; 549 return err;
660 } 550 }
661 advance_output(err); 551 advance_output(rec, err);
662 } 552 }
663 } 553 }
664 554
665 machine = perf_session__find_host_machine(session); 555 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
666 if (!machine) { 556 machine, "_text");
667 pr_err("Couldn't find native kernel information.\n");
668 return -1;
669 }
670
671 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
672 session, machine, "_text");
673 if (err < 0) 557 if (err < 0)
674 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 558 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
675 session, machine, "_stext"); 559 machine, "_stext");
676 if (err < 0) 560 if (err < 0)
677 pr_err("Couldn't record kernel reference relocation symbol\n" 561 pr_err("Couldn't record kernel reference relocation symbol\n"
678 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 562 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679 "Check /proc/kallsyms permission or run as root.\n"); 563 "Check /proc/kallsyms permission or run as root.\n");
680 564
681 err = perf_event__synthesize_modules(process_synthesized_event, 565 err = perf_event__synthesize_modules(tool, process_synthesized_event,
682 session, machine); 566 machine);
683 if (err < 0) 567 if (err < 0)
684 pr_err("Couldn't record kernel module information.\n" 568 pr_err("Couldn't record kernel module information.\n"
685 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 569 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
686 "Check /proc/modules permission or run as root.\n"); 570 "Check /proc/modules permission or run as root.\n");
687 571
688 if (perf_guest) 572 if (perf_guest)
689 perf_session__process_machines(session, 573 perf_session__process_machines(session, tool,
690 perf_event__synthesize_guest_os); 574 perf_event__synthesize_guest_os);
691 575
692 if (!system_wide) 576 if (!opts->system_wide)
693 perf_event__synthesize_thread_map(evsel_list->threads, 577 perf_event__synthesize_thread_map(tool, evsel_list->threads,
694 process_synthesized_event, 578 process_synthesized_event,
695 session); 579 machine);
696 else 580 else
697 perf_event__synthesize_threads(process_synthesized_event, 581 perf_event__synthesize_threads(tool, process_synthesized_event,
698 session); 582 machine);
699 583
700 if (realtime_prio) { 584 if (rec->realtime_prio) {
701 struct sched_param param; 585 struct sched_param param;
702 586
703 param.sched_priority = realtime_prio; 587 param.sched_priority = rec->realtime_prio;
704 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 588 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
705 pr_err("Could not set realtime priority.\n"); 589 pr_err("Could not set realtime priority.\n");
706 exit(-1); 590 exit(-1);
@@ -713,14 +597,14 @@ static int __cmd_record(int argc, const char **argv)
713 * Let the child rip 597 * Let the child rip
714 */ 598 */
715 if (forks) 599 if (forks)
716 close(go_pipe[1]); 600 perf_evlist__start_workload(evsel_list);
717 601
718 for (;;) { 602 for (;;) {
719 int hits = samples; 603 int hits = rec->samples;
720 604
721 mmap_read_all(); 605 perf_record__mmap_read_all(rec);
722 606
723 if (hits == samples) { 607 if (hits == rec->samples) {
724 if (done) 608 if (done)
725 break; 609 break;
726 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); 610 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
@@ -741,9 +625,9 @@ static int __cmd_record(int argc, const char **argv)
741 */ 625 */
742 fprintf(stderr, 626 fprintf(stderr,
743 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 627 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
744 (double)bytes_written / 1024.0 / 1024.0, 628 (double)rec->bytes_written / 1024.0 / 1024.0,
745 output_name, 629 output_name,
746 bytes_written / 24); 630 rec->bytes_written / 24);
747 631
748 return 0; 632 return 0;
749 633
@@ -758,58 +642,89 @@ static const char * const record_usage[] = {
758 NULL 642 NULL
759}; 643};
760 644
761static bool force, append_file; 645/*
646 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
647 * because we need to have access to it in perf_record__exit, that is called
648 * after cmd_record() exits, but since record_options need to be accessible to
649 * builtin-script, leave it here.
650 *
651 * At least we don't ouch it in all the other functions here directly.
652 *
653 * Just say no to tons of global variables, sigh.
654 */
655static struct perf_record record = {
656 .opts = {
657 .target_pid = -1,
658 .target_tid = -1,
659 .mmap_pages = UINT_MAX,
660 .user_freq = UINT_MAX,
661 .user_interval = ULLONG_MAX,
662 .freq = 1000,
663 .sample_id_all_avail = true,
664 },
665 .write_mode = WRITE_FORCE,
666 .file_new = true,
667};
762 668
669/*
670 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
671 * with it and switch to use the library functions in perf_evlist that came
672 * from builtin-record.c, i.e. use perf_record_opts,
673 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
674 * using pipes, etc.
675 */
763const struct option record_options[] = { 676const struct option record_options[] = {
764 OPT_CALLBACK('e', "event", &evsel_list, "event", 677 OPT_CALLBACK('e', "event", &record.evlist, "event",
765 "event selector. use 'perf list' to list available events", 678 "event selector. use 'perf list' to list available events",
766 parse_events_option), 679 parse_events_option),
767 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 680 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
768 "event filter", parse_filter), 681 "event filter", parse_filter),
769 OPT_INTEGER('p', "pid", &target_pid, 682 OPT_INTEGER('p', "pid", &record.opts.target_pid,
770 "record events on existing process id"), 683 "record events on existing process id"),
771 OPT_INTEGER('t', "tid", &target_tid, 684 OPT_INTEGER('t', "tid", &record.opts.target_tid,
772 "record events on existing thread id"), 685 "record events on existing thread id"),
773 OPT_INTEGER('r', "realtime", &realtime_prio, 686 OPT_INTEGER('r', "realtime", &record.realtime_prio,
774 "collect data with this RT SCHED_FIFO priority"), 687 "collect data with this RT SCHED_FIFO priority"),
775 OPT_BOOLEAN('D', "no-delay", &nodelay, 688 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
776 "collect data without buffering"), 689 "collect data without buffering"),
777 OPT_BOOLEAN('R', "raw-samples", &raw_samples, 690 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
778 "collect raw sample records from all opened counters"), 691 "collect raw sample records from all opened counters"),
779 OPT_BOOLEAN('a', "all-cpus", &system_wide, 692 OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
780 "system-wide collection from all CPUs"), 693 "system-wide collection from all CPUs"),
781 OPT_BOOLEAN('A', "append", &append_file, 694 OPT_BOOLEAN('A', "append", &record.append_file,
782 "append to the output file to do incremental profiling"), 695 "append to the output file to do incremental profiling"),
783 OPT_STRING('C', "cpu", &cpu_list, "cpu", 696 OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
784 "list of cpus to monitor"), 697 "list of cpus to monitor"),
785 OPT_BOOLEAN('f', "force", &force, 698 OPT_BOOLEAN('f', "force", &record.force,
786 "overwrite existing data file (deprecated)"), 699 "overwrite existing data file (deprecated)"),
787 OPT_U64('c', "count", &user_interval, "event period to sample"), 700 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
788 OPT_STRING('o', "output", &output_name, "file", 701 OPT_STRING('o', "output", &record.output_name, "file",
789 "output file name"), 702 "output file name"),
790 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 703 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
791 "child tasks do not inherit counters"), 704 "child tasks do not inherit counters"),
792 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), 705 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
793 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 706 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
794 OPT_BOOLEAN(0, "group", &group, 707 "number of mmap data pages"),
708 OPT_BOOLEAN(0, "group", &record.opts.group,
795 "put the counters into a counter group"), 709 "put the counters into a counter group"),
796 OPT_BOOLEAN('g', "call-graph", &call_graph, 710 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
797 "do call-graph (stack chain/backtrace) recording"), 711 "do call-graph (stack chain/backtrace) recording"),
798 OPT_INCR('v', "verbose", &verbose, 712 OPT_INCR('v', "verbose", &verbose,
799 "be more verbose (show counter open errors, etc)"), 713 "be more verbose (show counter open errors, etc)"),
800 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 714 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801 OPT_BOOLEAN('s', "stat", &inherit_stat, 715 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
802 "per thread counts"), 716 "per thread counts"),
803 OPT_BOOLEAN('d', "data", &sample_address, 717 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
804 "Sample addresses"), 718 "Sample addresses"),
805 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), 719 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
806 OPT_BOOLEAN('n', "no-samples", &no_samples, 720 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
721 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
807 "don't sample"), 722 "don't sample"),
808 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, 723 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
809 "do not update the buildid cache"), 724 "do not update the buildid cache"),
810 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 725 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
811 "do not collect buildids in perf.data"), 726 "do not collect buildids in perf.data"),
812 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 727 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
813 "monitor event in cgroup name only", 728 "monitor event in cgroup name only",
814 parse_cgroups), 729 parse_cgroups),
815 OPT_END() 730 OPT_END()
@@ -819,6 +734,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
819{ 734{
820 int err = -ENOMEM; 735 int err = -ENOMEM;
821 struct perf_evsel *pos; 736 struct perf_evsel *pos;
737 struct perf_evlist *evsel_list;
738 struct perf_record *rec = &record;
822 739
823 perf_header__set_cmdline(argc, argv); 740 perf_header__set_cmdline(argc, argv);
824 741
@@ -826,23 +743,25 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
826 if (evsel_list == NULL) 743 if (evsel_list == NULL)
827 return -ENOMEM; 744 return -ENOMEM;
828 745
746 rec->evlist = evsel_list;
747
829 argc = parse_options(argc, argv, record_options, record_usage, 748 argc = parse_options(argc, argv, record_options, record_usage,
830 PARSE_OPT_STOP_AT_NON_OPTION); 749 PARSE_OPT_STOP_AT_NON_OPTION);
831 if (!argc && target_pid == -1 && target_tid == -1 && 750 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
832 !system_wide && !cpu_list) 751 !rec->opts.system_wide && !rec->opts.cpu_list)
833 usage_with_options(record_usage, record_options); 752 usage_with_options(record_usage, record_options);
834 753
835 if (force && append_file) { 754 if (rec->force && rec->append_file) {
836 fprintf(stderr, "Can't overwrite and append at the same time." 755 fprintf(stderr, "Can't overwrite and append at the same time."
837 " You need to choose between -f and -A"); 756 " You need to choose between -f and -A");
838 usage_with_options(record_usage, record_options); 757 usage_with_options(record_usage, record_options);
839 } else if (append_file) { 758 } else if (rec->append_file) {
840 write_mode = WRITE_APPEND; 759 rec->write_mode = WRITE_APPEND;
841 } else { 760 } else {
842 write_mode = WRITE_FORCE; 761 rec->write_mode = WRITE_FORCE;
843 } 762 }
844 763
845 if (nr_cgroups && !system_wide) { 764 if (nr_cgroups && !rec->opts.system_wide) {
846 fprintf(stderr, "cgroup monitoring only available in" 765 fprintf(stderr, "cgroup monitoring only available in"
847 " system-wide mode\n"); 766 " system-wide mode\n");
848 usage_with_options(record_usage, record_options); 767 usage_with_options(record_usage, record_options);
@@ -860,7 +779,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
860"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 779"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
861"even with a suitable vmlinux or kallsyms file.\n\n"); 780"even with a suitable vmlinux or kallsyms file.\n\n");
862 781
863 if (no_buildid_cache || no_buildid) 782 if (rec->no_buildid_cache || rec->no_buildid)
864 disable_buildid_cache(); 783 disable_buildid_cache();
865 784
866 if (evsel_list->nr_entries == 0 && 785 if (evsel_list->nr_entries == 0 &&
@@ -869,43 +788,37 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
869 goto out_symbol_exit; 788 goto out_symbol_exit;
870 } 789 }
871 790
872 if (target_pid != -1) 791 if (rec->opts.target_pid != -1)
873 target_tid = target_pid; 792 rec->opts.target_tid = rec->opts.target_pid;
874 793
875 if (perf_evlist__create_maps(evsel_list, target_pid, 794 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
876 target_tid, cpu_list) < 0) 795 rec->opts.target_tid, rec->opts.cpu_list) < 0)
877 usage_with_options(record_usage, record_options); 796 usage_with_options(record_usage, record_options);
878 797
879 list_for_each_entry(pos, &evsel_list->entries, node) { 798 list_for_each_entry(pos, &evsel_list->entries, node) {
880 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
881 evsel_list->threads->nr) < 0)
882 goto out_free_fd;
883 if (perf_header__push_event(pos->attr.config, event_name(pos))) 799 if (perf_header__push_event(pos->attr.config, event_name(pos)))
884 goto out_free_fd; 800 goto out_free_fd;
885 } 801 }
886 802
887 if (perf_evlist__alloc_pollfd(evsel_list) < 0) 803 if (rec->opts.user_interval != ULLONG_MAX)
888 goto out_free_fd; 804 rec->opts.default_interval = rec->opts.user_interval;
889 805 if (rec->opts.user_freq != UINT_MAX)
890 if (user_interval != ULLONG_MAX) 806 rec->opts.freq = rec->opts.user_freq;
891 default_interval = user_interval;
892 if (user_freq != UINT_MAX)
893 freq = user_freq;
894 807
895 /* 808 /*
896 * User specified count overrides default frequency. 809 * User specified count overrides default frequency.
897 */ 810 */
898 if (default_interval) 811 if (rec->opts.default_interval)
899 freq = 0; 812 rec->opts.freq = 0;
900 else if (freq) { 813 else if (rec->opts.freq) {
901 default_interval = freq; 814 rec->opts.default_interval = rec->opts.freq;
902 } else { 815 } else {
903 fprintf(stderr, "frequency and count are zero, aborting\n"); 816 fprintf(stderr, "frequency and count are zero, aborting\n");
904 err = -EINVAL; 817 err = -EINVAL;
905 goto out_free_fd; 818 goto out_free_fd;
906 } 819 }
907 820
908 err = __cmd_record(argc, argv); 821 err = __cmd_record(&record, argc, argv);
909out_free_fd: 822out_free_fd:
910 perf_evlist__delete_maps(evsel_list); 823 perf_evlist__delete_maps(evsel_list);
911out_symbol_exit: 824out_symbol_exit: