diff options
author | Don Zickus <dzickus@redhat.com> | 2014-03-14 10:43:44 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2014-03-14 17:08:41 -0400 |
commit | 363b785f3805a2632eb09a8b430842461c21a640 (patch) | |
tree | 1d6dcd4a3e7c0309c1c19a16ea53c45cd1de5407 /tools | |
parent | 09a71b97cce70551356b13b668aa1d7d6da84457 (diff) |
perf tools: Speed up thread map generation
When trying to capture perf data on a system running spejbb2013, perf
hung for about 15 minutes. This is because it took that long to gather
about 10,000 thread maps and process them.
I don't think a user wants to wait that long.
Instead, recognize that thread maps are roughly equivalent to pid maps
and just quickly copy those instead.
To do this, I synthesize 'fork' events, this eventually calls
thread__fork() and copies the maps over.
The overhead goes from 15 minutes down to about a few seconds.
--
V2: based on Jiri's comments, moved malloc up a level
and made sure the memory was freed
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Link: http://lkml.kernel.org/r/1394808224-113774-1-git-send-email-dzickus@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/util/event.c | 59 |
1 files changed, 51 insertions, 8 deletions
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 55eebe936513..3e580be0f6fb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c | |||
@@ -129,6 +129,28 @@ out: | |||
129 | return tgid; | 129 | return tgid; |
130 | } | 130 | } |
131 | 131 | ||
132 | static int perf_event__synthesize_fork(struct perf_tool *tool, | ||
133 | union perf_event *event, pid_t pid, | ||
134 | pid_t tgid, perf_event__handler_t process, | ||
135 | struct machine *machine) | ||
136 | { | ||
137 | memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); | ||
138 | |||
139 | /* this is really a clone event but we use fork to synthesize it */ | ||
140 | event->fork.ppid = tgid; | ||
141 | event->fork.ptid = tgid; | ||
142 | event->fork.pid = tgid; | ||
143 | event->fork.tid = pid; | ||
144 | event->fork.header.type = PERF_RECORD_FORK; | ||
145 | |||
146 | event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); | ||
147 | |||
148 | if (process(tool, event, &synth_sample, machine) != 0) | ||
149 | return -1; | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
132 | int perf_event__synthesize_mmap_events(struct perf_tool *tool, | 154 | int perf_event__synthesize_mmap_events(struct perf_tool *tool, |
133 | union perf_event *event, | 155 | union perf_event *event, |
134 | pid_t pid, pid_t tgid, | 156 | pid_t pid, pid_t tgid, |
@@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, | |||
278 | 300 | ||
279 | static int __event__synthesize_thread(union perf_event *comm_event, | 301 | static int __event__synthesize_thread(union perf_event *comm_event, |
280 | union perf_event *mmap_event, | 302 | union perf_event *mmap_event, |
303 | union perf_event *fork_event, | ||
281 | pid_t pid, int full, | 304 | pid_t pid, int full, |
282 | perf_event__handler_t process, | 305 | perf_event__handler_t process, |
283 | struct perf_tool *tool, | 306 | struct perf_tool *tool, |
@@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event, | |||
326 | if (tgid == -1) | 349 | if (tgid == -1) |
327 | return -1; | 350 | return -1; |
328 | 351 | ||
329 | /* process the thread's maps too */ | 352 | if (_pid == pid) { |
330 | rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid, | 353 | /* process the parent's maps too */ |
331 | process, machine, mmap_data); | 354 | rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, |
355 | process, machine, mmap_data); | ||
356 | } else { | ||
357 | /* only fork the tid's map, to save time */ | ||
358 | rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, | ||
359 | process, machine); | ||
360 | } | ||
332 | 361 | ||
333 | if (rc) | 362 | if (rc) |
334 | return rc; | 363 | return rc; |
@@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, | |||
344 | struct machine *machine, | 373 | struct machine *machine, |
345 | bool mmap_data) | 374 | bool mmap_data) |
346 | { | 375 | { |
347 | union perf_event *comm_event, *mmap_event; | 376 | union perf_event *comm_event, *mmap_event, *fork_event; |
348 | int err = -1, thread, j; | 377 | int err = -1, thread, j; |
349 | 378 | ||
350 | comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); | 379 | comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); |
@@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, | |||
355 | if (mmap_event == NULL) | 384 | if (mmap_event == NULL) |
356 | goto out_free_comm; | 385 | goto out_free_comm; |
357 | 386 | ||
387 | fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); | ||
388 | if (fork_event == NULL) | ||
389 | goto out_free_mmap; | ||
390 | |||
358 | err = 0; | 391 | err = 0; |
359 | for (thread = 0; thread < threads->nr; ++thread) { | 392 | for (thread = 0; thread < threads->nr; ++thread) { |
360 | if (__event__synthesize_thread(comm_event, mmap_event, | 393 | if (__event__synthesize_thread(comm_event, mmap_event, |
394 | fork_event, | ||
361 | threads->map[thread], 0, | 395 | threads->map[thread], 0, |
362 | process, tool, machine, | 396 | process, tool, machine, |
363 | mmap_data)) { | 397 | mmap_data)) { |
@@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, | |||
383 | /* if not, generate events for it */ | 417 | /* if not, generate events for it */ |
384 | if (need_leader && | 418 | if (need_leader && |
385 | __event__synthesize_thread(comm_event, mmap_event, | 419 | __event__synthesize_thread(comm_event, mmap_event, |
420 | fork_event, | ||
386 | comm_event->comm.pid, 0, | 421 | comm_event->comm.pid, 0, |
387 | process, tool, machine, | 422 | process, tool, machine, |
388 | mmap_data)) { | 423 | mmap_data)) { |
@@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, | |||
391 | } | 426 | } |
392 | } | 427 | } |
393 | } | 428 | } |
429 | free(fork_event); | ||
430 | out_free_mmap: | ||
394 | free(mmap_event); | 431 | free(mmap_event); |
395 | out_free_comm: | 432 | out_free_comm: |
396 | free(comm_event); | 433 | free(comm_event); |
@@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, | |||
405 | DIR *proc; | 442 | DIR *proc; |
406 | char proc_path[PATH_MAX]; | 443 | char proc_path[PATH_MAX]; |
407 | struct dirent dirent, *next; | 444 | struct dirent dirent, *next; |
408 | union perf_event *comm_event, *mmap_event; | 445 | union perf_event *comm_event, *mmap_event, *fork_event; |
409 | int err = -1; | 446 | int err = -1; |
410 | 447 | ||
411 | comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); | 448 | comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); |
@@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool, | |||
416 | if (mmap_event == NULL) | 453 | if (mmap_event == NULL) |
417 | goto out_free_comm; | 454 | goto out_free_comm; |
418 | 455 | ||
456 | fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); | ||
457 | if (fork_event == NULL) | ||
458 | goto out_free_mmap; | ||
459 | |||
419 | if (machine__is_default_guest(machine)) | 460 | if (machine__is_default_guest(machine)) |
420 | return 0; | 461 | return 0; |
421 | 462 | ||
@@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, | |||
423 | proc = opendir(proc_path); | 464 | proc = opendir(proc_path); |
424 | 465 | ||
425 | if (proc == NULL) | 466 | if (proc == NULL) |
426 | goto out_free_mmap; | 467 | goto out_free_fork; |
427 | 468 | ||
428 | while (!readdir_r(proc, &dirent, &next) && next) { | 469 | while (!readdir_r(proc, &dirent, &next) && next) { |
429 | char *end; | 470 | char *end; |
@@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, | |||
435 | * We may race with exiting thread, so don't stop just because | 476 | * We may race with exiting thread, so don't stop just because |
436 | * one thread couldn't be synthesized. | 477 | * one thread couldn't be synthesized. |
437 | */ | 478 | */ |
438 | __event__synthesize_thread(comm_event, mmap_event, pid, 1, | 479 | __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, |
439 | process, tool, machine, mmap_data); | 480 | 1, process, tool, machine, mmap_data); |
440 | } | 481 | } |
441 | 482 | ||
442 | err = 0; | 483 | err = 0; |
443 | closedir(proc); | 484 | closedir(proc); |
485 | out_free_fork: | ||
486 | free(fork_event); | ||
444 | out_free_mmap: | 487 | out_free_mmap: |
445 | free(mmap_event); | 488 | free(mmap_event); |
446 | out_free_comm: | 489 | out_free_comm: |