aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-06-06 14:33:43 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-06 14:33:43 -0400
commit864709302a80f26fa9da3be5b47304f0b8bae192 (patch)
tree8c2bab78f141fe43a38914bd3e3aae0a88f958e5 /tools/perf/builtin-record.c
parent75b5032212641f6d38ac041416945e70da833b68 (diff)
perf_counter tools: Move from Documentation/perf_counter/ to tools/perf/
Several people have suggested that 'perf' has become a full-fledged tool that should be moved out of Documentation/. Move it to the (new) tools/ directory. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c544
1 files changed, 544 insertions, 0 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
new file mode 100644
index 000000000000..aeab9c4b15e4
--- /dev/null
+++ b/tools/perf/builtin-record.c
@@ -0,0 +1,544 @@
1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#include "builtin.h"
9
10#include "perf.h"
11
12#include "util/util.h"
13#include "util/parse-options.h"
14#include "util/parse-events.h"
15#include "util/string.h"
16
17#include <unistd.h>
18#include <sched.h>
19
20#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
21#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
22
23static int fd[MAX_NR_CPUS][MAX_COUNTERS];
24
25static long default_interval = 100000;
26
27static int nr_cpus = 0;
28static unsigned int page_size;
29static unsigned int mmap_pages = 128;
30static int freq = 0;
31static int output;
32static const char *output_name = "perf.data";
33static int group = 0;
34static unsigned int realtime_prio = 0;
35static int system_wide = 0;
36static pid_t target_pid = -1;
37static int inherit = 1;
38static int force = 0;
39static int append_file = 0;
40
41static long samples;
42static struct timeval last_read;
43static struct timeval this_read;
44
45static __u64 bytes_written;
46
47static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
48
49static int nr_poll;
50static int nr_cpu;
51
52struct mmap_event {
53 struct perf_event_header header;
54 __u32 pid;
55 __u32 tid;
56 __u64 start;
57 __u64 len;
58 __u64 pgoff;
59 char filename[PATH_MAX];
60};
61
62struct comm_event {
63 struct perf_event_header header;
64 __u32 pid;
65 __u32 tid;
66 char comm[16];
67};
68
69
70struct mmap_data {
71 int counter;
72 void *base;
73 unsigned int mask;
74 unsigned int prev;
75};
76
77static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
78
79static unsigned int mmap_read_head(struct mmap_data *md)
80{
81 struct perf_counter_mmap_page *pc = md->base;
82 int head;
83
84 head = pc->data_head;
85 rmb();
86
87 return head;
88}
89
90static void mmap_read(struct mmap_data *md)
91{
92 unsigned int head = mmap_read_head(md);
93 unsigned int old = md->prev;
94 unsigned char *data = md->base + page_size;
95 unsigned long size;
96 void *buf;
97 int diff;
98
99 gettimeofday(&this_read, NULL);
100
101 /*
102 * If we're further behind than half the buffer, there's a chance
103 * the writer will bite our tail and mess up the samples under us.
104 *
105 * If we somehow ended up ahead of the head, we got messed up.
106 *
107 * In either case, truncate and restart at head.
108 */
109 diff = head - old;
110 if (diff > md->mask / 2 || diff < 0) {
111 struct timeval iv;
112 unsigned long msecs;
113
114 timersub(&this_read, &last_read, &iv);
115 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
116
117 fprintf(stderr, "WARNING: failed to keep up with mmap data."
118 " Last read %lu msecs ago.\n", msecs);
119
120 /*
121 * head points to a known good entry, start there.
122 */
123 old = head;
124 }
125
126 last_read = this_read;
127
128 if (old != head)
129 samples++;
130
131 size = head - old;
132
133 if ((old & md->mask) + size != (head & md->mask)) {
134 buf = &data[old & md->mask];
135 size = md->mask + 1 - (old & md->mask);
136 old += size;
137
138 while (size) {
139 int ret = write(output, buf, size);
140
141 if (ret < 0)
142 die("failed to write");
143
144 size -= ret;
145 buf += ret;
146
147 bytes_written += ret;
148 }
149 }
150
151 buf = &data[old & md->mask];
152 size = head - old;
153 old += size;
154
155 while (size) {
156 int ret = write(output, buf, size);
157
158 if (ret < 0)
159 die("failed to write");
160
161 size -= ret;
162 buf += ret;
163
164 bytes_written += ret;
165 }
166
167 md->prev = old;
168}
169
170static volatile int done = 0;
171
172static void sig_handler(int sig)
173{
174 done = 1;
175}
176
177static void pid_synthesize_comm_event(pid_t pid, int full)
178{
179 struct comm_event comm_ev;
180 char filename[PATH_MAX];
181 char bf[BUFSIZ];
182 int fd, ret;
183 size_t size;
184 char *field, *sep;
185 DIR *tasks;
186 struct dirent dirent, *next;
187
188 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
189
190 fd = open(filename, O_RDONLY);
191 if (fd < 0) {
192 fprintf(stderr, "couldn't open %s\n", filename);
193 exit(EXIT_FAILURE);
194 }
195 if (read(fd, bf, sizeof(bf)) < 0) {
196 fprintf(stderr, "couldn't read %s\n", filename);
197 exit(EXIT_FAILURE);
198 }
199 close(fd);
200
201 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
202 memset(&comm_ev, 0, sizeof(comm_ev));
203 field = strchr(bf, '(');
204 if (field == NULL)
205 goto out_failure;
206 sep = strchr(++field, ')');
207 if (sep == NULL)
208 goto out_failure;
209 size = sep - field;
210 memcpy(comm_ev.comm, field, size++);
211
212 comm_ev.pid = pid;
213 comm_ev.header.type = PERF_EVENT_COMM;
214 size = ALIGN(size, sizeof(uint64_t));
215 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
216
217 if (!full) {
218 comm_ev.tid = pid;
219
220 ret = write(output, &comm_ev, comm_ev.header.size);
221 if (ret < 0) {
222 perror("failed to write");
223 exit(-1);
224 }
225 return;
226 }
227
228 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
229
230 tasks = opendir(filename);
231 while (!readdir_r(tasks, &dirent, &next) && next) {
232 char *end;
233 pid = strtol(dirent.d_name, &end, 10);
234 if (*end)
235 continue;
236
237 comm_ev.tid = pid;
238
239 ret = write(output, &comm_ev, comm_ev.header.size);
240 if (ret < 0) {
241 perror("failed to write");
242 exit(-1);
243 }
244 }
245 closedir(tasks);
246 return;
247
248out_failure:
249 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
250 filename);
251 exit(EXIT_FAILURE);
252}
253
254static void pid_synthesize_mmap_samples(pid_t pid)
255{
256 char filename[PATH_MAX];
257 FILE *fp;
258
259 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
260
261 fp = fopen(filename, "r");
262 if (fp == NULL) {
263 fprintf(stderr, "couldn't open %s\n", filename);
264 exit(EXIT_FAILURE);
265 }
266 while (1) {
267 char bf[BUFSIZ], *pbf = bf;
268 struct mmap_event mmap_ev = {
269 .header.type = PERF_EVENT_MMAP,
270 };
271 int n;
272 size_t size;
273 if (fgets(bf, sizeof(bf), fp) == NULL)
274 break;
275
276 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
277 n = hex2u64(pbf, &mmap_ev.start);
278 if (n < 0)
279 continue;
280 pbf += n + 1;
281 n = hex2u64(pbf, &mmap_ev.len);
282 if (n < 0)
283 continue;
284 pbf += n + 3;
285 if (*pbf == 'x') { /* vm_exec */
286 char *execname = strrchr(bf, ' ');
287
288 if (execname == NULL || execname[1] != '/')
289 continue;
290
291 execname += 1;
292 size = strlen(execname);
293 execname[size - 1] = '\0'; /* Remove \n */
294 memcpy(mmap_ev.filename, execname, size);
295 size = ALIGN(size, sizeof(uint64_t));
296 mmap_ev.len -= mmap_ev.start;
297 mmap_ev.header.size = (sizeof(mmap_ev) -
298 (sizeof(mmap_ev.filename) - size));
299 mmap_ev.pid = pid;
300 mmap_ev.tid = pid;
301
302 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
303 perror("failed to write");
304 exit(-1);
305 }
306 }
307 }
308
309 fclose(fp);
310}
311
312static void synthesize_samples(void)
313{
314 DIR *proc;
315 struct dirent dirent, *next;
316
317 proc = opendir("/proc");
318
319 while (!readdir_r(proc, &dirent, &next) && next) {
320 char *end;
321 pid_t pid;
322
323 pid = strtol(dirent.d_name, &end, 10);
324 if (*end) /* only interested in proper numerical dirents */
325 continue;
326
327 pid_synthesize_comm_event(pid, 1);
328 pid_synthesize_mmap_samples(pid);
329 }
330
331 closedir(proc);
332}
333
334static int group_fd;
335
336static void create_counter(int counter, int cpu, pid_t pid)
337{
338 struct perf_counter_attr *attr = attrs + counter;
339 int track = 1;
340
341 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
342 if (freq) {
343 attr->freq = 1;
344 attr->sample_freq = freq;
345 }
346 attr->mmap = track;
347 attr->comm = track;
348 attr->inherit = (cpu < 0) && inherit;
349
350 track = 0; /* only the first counter needs these */
351
352 fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
353
354 if (fd[nr_cpu][counter] < 0) {
355 int err = errno;
356
357 error("syscall returned with %d (%s)\n",
358 fd[nr_cpu][counter], strerror(err));
359 if (err == EPERM)
360 printf("Are you root?\n");
361 exit(-1);
362 }
363 assert(fd[nr_cpu][counter] >= 0);
364 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
365
366 /*
367 * First counter acts as the group leader:
368 */
369 if (group && group_fd == -1)
370 group_fd = fd[nr_cpu][counter];
371
372 event_array[nr_poll].fd = fd[nr_cpu][counter];
373 event_array[nr_poll].events = POLLIN;
374 nr_poll++;
375
376 mmap_array[nr_cpu][counter].counter = counter;
377 mmap_array[nr_cpu][counter].prev = 0;
378 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
379 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
380 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
381 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
382 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
383 exit(-1);
384 }
385}
386
387static void open_counters(int cpu, pid_t pid)
388{
389 int counter;
390
391 if (pid > 0) {
392 pid_synthesize_comm_event(pid, 0);
393 pid_synthesize_mmap_samples(pid);
394 }
395
396 group_fd = -1;
397 for (counter = 0; counter < nr_counters; counter++)
398 create_counter(counter, cpu, pid);
399
400 nr_cpu++;
401}
402
403static int __cmd_record(int argc, const char **argv)
404{
405 int i, counter;
406 struct stat st;
407 pid_t pid;
408 int flags;
409 int ret;
410
411 page_size = sysconf(_SC_PAGE_SIZE);
412 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
413 assert(nr_cpus <= MAX_NR_CPUS);
414 assert(nr_cpus >= 0);
415
416 if (!stat(output_name, &st) && !force && !append_file) {
417 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
418 output_name);
419 exit(-1);
420 }
421
422 flags = O_CREAT|O_RDWR;
423 if (append_file)
424 flags |= O_APPEND;
425 else
426 flags |= O_TRUNC;
427
428 output = open(output_name, flags, S_IRUSR|S_IWUSR);
429 if (output < 0) {
430 perror("failed to create output file");
431 exit(-1);
432 }
433
434 if (!system_wide) {
435 open_counters(-1, target_pid != -1 ? target_pid : getpid());
436 } else for (i = 0; i < nr_cpus; i++)
437 open_counters(i, target_pid);
438
439 signal(SIGCHLD, sig_handler);
440 signal(SIGINT, sig_handler);
441
442 if (target_pid == -1 && argc) {
443 pid = fork();
444 if (pid < 0)
445 perror("failed to fork");
446
447 if (!pid) {
448 if (execvp(argv[0], (char **)argv)) {
449 perror(argv[0]);
450 exit(-1);
451 }
452 }
453 }
454
455 if (realtime_prio) {
456 struct sched_param param;
457
458 param.sched_priority = realtime_prio;
459 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
460 printf("Could not set realtime priority.\n");
461 exit(-1);
462 }
463 }
464
465 if (system_wide)
466 synthesize_samples();
467
468 while (!done) {
469 int hits = samples;
470
471 for (i = 0; i < nr_cpu; i++) {
472 for (counter = 0; counter < nr_counters; counter++)
473 mmap_read(&mmap_array[i][counter]);
474 }
475
476 if (hits == samples)
477 ret = poll(event_array, nr_poll, 100);
478 }
479
480 /*
481 * Approximate RIP event size: 24 bytes.
482 */
483 fprintf(stderr,
484 "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
485 (double)bytes_written / 1024.0 / 1024.0,
486 output_name,
487 bytes_written / 24);
488
489 return 0;
490}
491
492static const char * const record_usage[] = {
493 "perf record [<options>] [<command>]",
494 "perf record [<options>] -- <command> [<options>]",
495 NULL
496};
497
498static const struct option options[] = {
499 OPT_CALLBACK('e', "event", NULL, "event",
500 "event selector. use 'perf list' to list available events",
501 parse_events),
502 OPT_INTEGER('p', "pid", &target_pid,
503 "record events on existing pid"),
504 OPT_INTEGER('r', "realtime", &realtime_prio,
505 "collect data with this RT SCHED_FIFO priority"),
506 OPT_BOOLEAN('a', "all-cpus", &system_wide,
507 "system-wide collection from all CPUs"),
508 OPT_BOOLEAN('A', "append", &append_file,
509 "append to the output file to do incremental profiling"),
510 OPT_BOOLEAN('f', "force", &force,
511 "overwrite existing data file"),
512 OPT_LONG('c', "count", &default_interval,
513 "event period to sample"),
514 OPT_STRING('o', "output", &output_name, "file",
515 "output file name"),
516 OPT_BOOLEAN('i', "inherit", &inherit,
517 "child tasks inherit counters"),
518 OPT_INTEGER('F', "freq", &freq,
519 "profile at this frequency"),
520 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
521 "number of mmap data pages"),
522 OPT_END()
523};
524
525int cmd_record(int argc, const char **argv, const char *prefix)
526{
527 int counter;
528
529 argc = parse_options(argc, argv, options, record_usage, 0);
530 if (!argc && target_pid == -1 && !system_wide)
531 usage_with_options(record_usage, options);
532
533 if (!nr_counters)
534 nr_counters = 1;
535
536 for (counter = 0; counter < nr_counters; counter++) {
537 if (attrs[counter].sample_period)
538 continue;
539
540 attrs[counter].sample_period = default_interval;
541 }
542
543 return __cmd_record(argc, argv);
544}