diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/perf/Documentation/perf-lock.txt | 29 | ||||
| -rw-r--r-- | tools/perf/builtin-lock.c | 148 | ||||
| -rw-r--r-- | tools/perf/builtin-trace.c | 4 | ||||
| -rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
| -rw-r--r-- | tools/perf/perf-archive.sh | 3 | ||||
| -rw-r--r-- | tools/perf/perf.c | 2 | ||||
| -rw-r--r-- | tools/perf/perf.h | 4 | ||||
| -rw-r--r-- | tools/perf/util/hist.c | 2 | ||||
| -rw-r--r-- | tools/perf/util/probe-event.c | 2 |
9 files changed, 184 insertions, 11 deletions
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt new file mode 100644 index 000000000000..b317102138c8 --- /dev/null +++ b/tools/perf/Documentation/perf-lock.txt | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | perf-lock(1) | ||
| 2 | ============ | ||
| 3 | |||
| 4 | NAME | ||
| 5 | ---- | ||
| 6 | perf-lock - Analyze lock events | ||
| 7 | |||
| 8 | SYNOPSIS | ||
| 9 | -------- | ||
| 10 | [verse] | ||
| 11 | 'perf lock' {record|report|trace} | ||
| 12 | |||
| 13 | DESCRIPTION | ||
| 14 | ----------- | ||
| 15 | You can analyze various lock behaviours | ||
| 16 | and statistics with this 'perf lock' command. | ||
| 17 | |||
| 18 | 'perf lock record <command>' records lock events | ||
| 19 | between start and end <command>. And this command | ||
| 20 | produces the file "perf.data" which contains tracing | ||
| 21 | results of lock events. | ||
| 22 | |||
| 23 | 'perf lock trace' shows raw lock events. | ||
| 24 | |||
| 25 | 'perf lock report' reports statistical data. | ||
| 26 | |||
| 27 | SEE ALSO | ||
| 28 | -------- | ||
| 29 | linkperf:perf[1] | ||
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index fb9ab2ad3f92..e12c844df1e2 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
| @@ -460,6 +460,150 @@ process_raw_event(void *data, int cpu, | |||
| 460 | process_lock_release_event(data, event, cpu, timestamp, thread); | 460 | process_lock_release_event(data, event, cpu, timestamp, thread); |
| 461 | } | 461 | } |
| 462 | 462 | ||
| 463 | struct raw_event_queue { | ||
| 464 | u64 timestamp; | ||
| 465 | int cpu; | ||
| 466 | void *data; | ||
| 467 | struct thread *thread; | ||
| 468 | struct list_head list; | ||
| 469 | }; | ||
| 470 | |||
| 471 | static LIST_HEAD(raw_event_head); | ||
| 472 | |||
| 473 | #define FLUSH_PERIOD (5 * NSEC_PER_SEC) | ||
| 474 | |||
| 475 | static u64 flush_limit = ULLONG_MAX; | ||
| 476 | static u64 last_flush = 0; | ||
| 477 | struct raw_event_queue *last_inserted; | ||
| 478 | |||
| 479 | static void flush_raw_event_queue(u64 limit) | ||
| 480 | { | ||
| 481 | struct raw_event_queue *tmp, *iter; | ||
| 482 | |||
| 483 | list_for_each_entry_safe(iter, tmp, &raw_event_head, list) { | ||
| 484 | if (iter->timestamp > limit) | ||
| 485 | return; | ||
| 486 | |||
| 487 | if (iter == last_inserted) | ||
| 488 | last_inserted = NULL; | ||
| 489 | |||
| 490 | process_raw_event(iter->data, iter->cpu, iter->timestamp, | ||
| 491 | iter->thread); | ||
| 492 | |||
| 493 | last_flush = iter->timestamp; | ||
| 494 | list_del(&iter->list); | ||
| 495 | free(iter->data); | ||
| 496 | free(iter); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | |||
| 500 | static void __queue_raw_event_end(struct raw_event_queue *new) | ||
| 501 | { | ||
| 502 | struct raw_event_queue *iter; | ||
| 503 | |||
| 504 | list_for_each_entry_reverse(iter, &raw_event_head, list) { | ||
| 505 | if (iter->timestamp < new->timestamp) { | ||
| 506 | list_add(&new->list, &iter->list); | ||
| 507 | return; | ||
| 508 | } | ||
| 509 | } | ||
| 510 | |||
| 511 | list_add(&new->list, &raw_event_head); | ||
| 512 | } | ||
| 513 | |||
| 514 | static void __queue_raw_event_before(struct raw_event_queue *new, | ||
| 515 | struct raw_event_queue *iter) | ||
| 516 | { | ||
| 517 | list_for_each_entry_continue_reverse(iter, &raw_event_head, list) { | ||
| 518 | if (iter->timestamp < new->timestamp) { | ||
| 519 | list_add(&new->list, &iter->list); | ||
| 520 | return; | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 524 | list_add(&new->list, &raw_event_head); | ||
| 525 | } | ||
| 526 | |||
| 527 | static void __queue_raw_event_after(struct raw_event_queue *new, | ||
| 528 | struct raw_event_queue *iter) | ||
| 529 | { | ||
| 530 | list_for_each_entry_continue(iter, &raw_event_head, list) { | ||
| 531 | if (iter->timestamp > new->timestamp) { | ||
| 532 | list_add_tail(&new->list, &iter->list); | ||
| 533 | return; | ||
| 534 | } | ||
| 535 | } | ||
| 536 | list_add_tail(&new->list, &raw_event_head); | ||
| 537 | } | ||
| 538 | |||
| 539 | /* The queue is ordered by time */ | ||
| 540 | static void __queue_raw_event(struct raw_event_queue *new) | ||
| 541 | { | ||
| 542 | if (!last_inserted) { | ||
| 543 | __queue_raw_event_end(new); | ||
| 544 | return; | ||
| 545 | } | ||
| 546 | |||
| 547 | /* | ||
| 548 | * Most of the time the current event has a timestamp | ||
| 549 | * very close to the last event inserted, unless we just switched | ||
| 550 | * to another event buffer. Having a sorting based on a list and | ||
| 551 | * on the last inserted event that is close to the current one is | ||
| 552 | * probably more efficient than an rbtree based sorting. | ||
| 553 | */ | ||
| 554 | if (last_inserted->timestamp >= new->timestamp) | ||
| 555 | __queue_raw_event_before(new, last_inserted); | ||
| 556 | else | ||
| 557 | __queue_raw_event_after(new, last_inserted); | ||
| 558 | } | ||
| 559 | |||
| 560 | static void queue_raw_event(void *data, int raw_size, int cpu, | ||
| 561 | u64 timestamp, struct thread *thread) | ||
| 562 | { | ||
| 563 | struct raw_event_queue *new; | ||
| 564 | |||
| 565 | if (flush_limit == ULLONG_MAX) | ||
| 566 | flush_limit = timestamp + FLUSH_PERIOD; | ||
| 567 | |||
| 568 | if (timestamp < last_flush) { | ||
| 569 | printf("Warning: Timestamp below last timeslice flush\n"); | ||
| 570 | return; | ||
| 571 | } | ||
| 572 | |||
| 573 | new = malloc(sizeof(*new)); | ||
| 574 | if (!new) | ||
| 575 | die("Not enough memory\n"); | ||
| 576 | |||
| 577 | new->timestamp = timestamp; | ||
| 578 | new->cpu = cpu; | ||
| 579 | new->thread = thread; | ||
| 580 | |||
| 581 | new->data = malloc(raw_size); | ||
| 582 | if (!new->data) | ||
| 583 | die("Not enough memory\n"); | ||
| 584 | |||
| 585 | memcpy(new->data, data, raw_size); | ||
| 586 | |||
| 587 | __queue_raw_event(new); | ||
| 588 | last_inserted = new; | ||
| 589 | |||
| 590 | /* | ||
| 591 | * We want to have a slice of events covering 2 * FLUSH_PERIOD | ||
| 592 | * If FLUSH_PERIOD is big enough, it ensures every events that occured | ||
| 593 | * in the first half of the timeslice have all been buffered and there | ||
| 594 | * are none remaining (we need that because of the weakly ordered | ||
| 595 | * event recording we have). Then once we reach the 2 * FLUSH_PERIOD | ||
| 596 | * timeslice, we flush the first half to be gentle with the memory | ||
| 597 | * (the second half can still get new events in the middle, so wait | ||
| 598 | * another period to flush it) | ||
| 599 | */ | ||
| 600 | if (new->timestamp > flush_limit && | ||
| 601 | new->timestamp - flush_limit > FLUSH_PERIOD) { | ||
| 602 | flush_limit += FLUSH_PERIOD; | ||
| 603 | flush_raw_event_queue(flush_limit); | ||
| 604 | } | ||
| 605 | } | ||
| 606 | |||
| 463 | static int process_sample_event(event_t *event, struct perf_session *session) | 607 | static int process_sample_event(event_t *event, struct perf_session *session) |
| 464 | { | 608 | { |
| 465 | struct thread *thread; | 609 | struct thread *thread; |
| @@ -480,7 +624,7 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
| 480 | if (profile_cpu != -1 && profile_cpu != (int) data.cpu) | 624 | if (profile_cpu != -1 && profile_cpu != (int) data.cpu) |
| 481 | return 0; | 625 | return 0; |
| 482 | 626 | ||
| 483 | process_raw_event(data.raw_data, data.cpu, data.time, thread); | 627 | queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread); |
| 484 | 628 | ||
| 485 | return 0; | 629 | return 0; |
| 486 | } | 630 | } |
| @@ -576,6 +720,7 @@ static void __cmd_report(void) | |||
| 576 | setup_pager(); | 720 | setup_pager(); |
| 577 | select_key(); | 721 | select_key(); |
| 578 | read_events(); | 722 | read_events(); |
| 723 | flush_raw_event_queue(ULLONG_MAX); | ||
| 579 | sort_result(); | 724 | sort_result(); |
| 580 | print_result(); | 725 | print_result(); |
| 581 | } | 726 | } |
| @@ -608,7 +753,6 @@ static const char *record_args[] = { | |||
| 608 | "record", | 753 | "record", |
| 609 | "-a", | 754 | "-a", |
| 610 | "-R", | 755 | "-R", |
| 611 | "-M", | ||
| 612 | "-f", | 756 | "-f", |
| 613 | "-m", "1024", | 757 | "-m", "1024", |
| 614 | "-c", "1", | 758 | "-c", "1", |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5db687fc13de..407041d20de0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
| @@ -573,7 +573,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
| 573 | 573 | ||
| 574 | if (symbol__init() < 0) | 574 | if (symbol__init() < 0) |
| 575 | return -1; | 575 | return -1; |
| 576 | setup_pager(); | 576 | if (!script_name) |
| 577 | setup_pager(); | ||
| 577 | 578 | ||
| 578 | session = perf_session__new(input_name, O_RDONLY, 0); | 579 | session = perf_session__new(input_name, O_RDONLY, 0); |
| 579 | if (session == NULL) | 580 | if (session == NULL) |
| @@ -608,7 +609,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
| 608 | return -1; | 609 | return -1; |
| 609 | } | 610 | } |
| 610 | 611 | ||
| 611 | perf_header__read(&session->header, input); | ||
| 612 | err = scripting_ops->generate_script("perf-trace"); | 612 | err = scripting_ops->generate_script("perf-trace"); |
| 613 | goto out; | 613 | goto out; |
| 614 | } | 614 | } |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 9afcff2e3ae5..db6ee94d4a8e 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
| @@ -18,3 +18,4 @@ perf-top mainporcelain common | |||
| 18 | perf-trace mainporcelain common | 18 | perf-trace mainporcelain common |
| 19 | perf-probe mainporcelain common | 19 | perf-probe mainporcelain common |
| 20 | perf-kmem mainporcelain common | 20 | perf-kmem mainporcelain common |
| 21 | perf-lock mainporcelain common | ||
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 45fbe2f07b15..910468e6e01c 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh | |||
| @@ -9,8 +9,9 @@ fi | |||
| 9 | 9 | ||
| 10 | DEBUGDIR=~/.debug/ | 10 | DEBUGDIR=~/.debug/ |
| 11 | BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) | 11 | BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) |
| 12 | NOBUILDID=0000000000000000000000000000000000000000 | ||
| 12 | 13 | ||
| 13 | perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS | 14 | perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS |
| 14 | if [ ! -s $BUILDIDS ] ; then | 15 | if [ ! -s $BUILDIDS ] ; then |
| 15 | echo "perf archive: no build-ids found" | 16 | echo "perf archive: no build-ids found" |
| 16 | rm -f $BUILDIDS | 17 | rm -f $BUILDIDS |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 57cb107c1f13..cd32c200cdb3 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
| @@ -445,7 +445,7 @@ int main(int argc, const char **argv) | |||
| 445 | 445 | ||
| 446 | /* | 446 | /* |
| 447 | * We use PATH to find perf commands, but we prepend some higher | 447 | * We use PATH to find perf commands, but we prepend some higher |
| 448 | * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH | 448 | * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH |
| 449 | * environment, and the $(perfexecdir) from the Makefile at build | 449 | * environment, and the $(perfexecdir) from the Makefile at build |
| 450 | * time. | 450 | * time. |
| 451 | */ | 451 | */ |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 75f941bfba9e..6fb379bc1d1f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
| @@ -65,9 +65,7 @@ | |||
| 65 | * Use the __kuser_memory_barrier helper in the CPU helper page. See | 65 | * Use the __kuser_memory_barrier helper in the CPU helper page. See |
| 66 | * arch/arm/kernel/entry-armv.S in the kernel source for details. | 66 | * arch/arm/kernel/entry-armv.S in the kernel source for details. |
| 67 | */ | 67 | */ |
| 68 | #define rmb() asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \ | 68 | #define rmb() ((void(*)(void))0xffff0fa0)() |
| 69 | "sub pc, r0, #95" ::: "r0", "lr", "cc", \ | ||
| 70 | "memory") | ||
| 71 | #define cpu_relax() asm volatile("":::"memory") | 69 | #define cpu_relax() asm volatile("":::"memory") |
| 72 | #endif | 70 | #endif |
| 73 | 71 | ||
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e8daf5ca6fd2..44408c2621cf 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
| @@ -321,7 +321,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, | |||
| 321 | new_depth_mask &= ~(1 << (depth - 1)); | 321 | new_depth_mask &= ~(1 << (depth - 1)); |
| 322 | 322 | ||
| 323 | /* | 323 | /* |
| 324 | * But we keep the older depth mask for the line seperator | 324 | * But we keep the older depth mask for the line separator |
| 325 | * to keep the level link until we reach the last child | 325 | * to keep the level link until we reach the last child |
| 326 | */ | 326 | */ |
| 327 | ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, | 327 | ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c971e81e9cbf..53181dbfe4a8 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
| @@ -508,8 +508,8 @@ void show_perf_probe_events(void) | |||
| 508 | struct str_node *ent; | 508 | struct str_node *ent; |
| 509 | 509 | ||
| 510 | setup_pager(); | 510 | setup_pager(); |
| 511 | |||
| 512 | memset(&pp, 0, sizeof(pp)); | 511 | memset(&pp, 0, sizeof(pp)); |
| 512 | |||
| 513 | fd = open_kprobe_events(O_RDONLY, 0); | 513 | fd = open_kprobe_events(O_RDONLY, 0); |
| 514 | rawlist = get_trace_kprobe_event_rawlist(fd); | 514 | rawlist = get_trace_kprobe_event_rawlist(fd); |
| 515 | close(fd); | 515 | close(fd); |
