diff options
author | Adrian Hunter <adrian.hunter@intel.com> | 2019-01-09 04:18:33 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-02-06 08:00:40 -0500 |
commit | f08046cb3082b313e7b08dc35838cf8bd902c36b (patch) | |
tree | 01ab2af620c66c6f595e14b235299679cabf24e0 | |
parent | 90c2cda7056e3a7555d874a27aae12fd46ca802e (diff) |
perf thread-stack: Represent jmps to the start of a different symbol
The compiler might optimize a call/ret combination by making it a jmp.
However the thread-stack does not presently cater for that, so that such
control flow is not visible in the call graph. Make it visible by
recording on the stack a branch to the start of a different symbol.
Note, that means when a ret pops the stack, all jmps must be popped off
first.
Example:
$ cat jmp-to-fn.c
__attribute__((noinline)) int bar(void)
{
return -1;
}
__attribute__((noinline)) int foo(void)
{
return bar() + 1;
}
int main()
{
return foo();
}
$ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c
$ objdump -d jmp-to-fn
<SNIP>
0000000000001040 <main>:
1040: 31 c0 xor %eax,%eax
1042: e9 09 01 00 00 jmpq 1150 <foo>
<SNIP>
0000000000001140 <bar>:
1140: b8 ff ff ff ff mov $0xffffffff,%eax
1145: c3 retq
<SNIP>
0000000000001150 <foo>:
1150: 31 c0 xor %eax,%eax
1152: e8 e9 ff ff ff callq 1140 <bar>
1157: 83 c0 01 add $0x1,%eax
115a: c3 retq
<SNIP>
$ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ]
$ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls
2019-01-08 13:24:58.783069 Creating database...
2019-01-08 13:24:58.794650 Writing records...
2019-01-08 13:24:59.008050 Adding indexes
2019-01-08 13:24:59.015802 Done
$ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db
Before:
main
-> bar
After:
main
-> foo
-> bar
Committer testing:
Install the python2-pyside package, then select these menu options
on the GUI:
"Reports"
"Context sensitive callgraphs"
Then go on expanding the symbols, to get, full picture when doing this
on a fedora:29 with gcc version 8.2.1 20181215 (Red Hat 8.2.1-6) (GCC):
jmp-to-fn
PID:TID
_start (ld-2.28.so)
__libc_start_main
main
foo
bar
To verify that indeed, this fixes the problem.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190109091835.5570-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/scripts/python/export-to-postgresql.py | 2 | ||||
-rw-r--r-- | tools/perf/scripts/python/export-to-sqlite.py | 2 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.c | 30 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.h | 3 |
4 files changed, 33 insertions, 4 deletions
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 0564dd7377f2..30130213da7e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py | |||
@@ -478,7 +478,7 @@ if perf_db_export_calls: | |||
478 | 'branch_count,' | 478 | 'branch_count,' |
479 | 'call_id,' | 479 | 'call_id,' |
480 | 'return_id,' | 480 | 'return_id,' |
481 | 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' | 481 | 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' |
482 | 'parent_call_path_id' | 482 | 'parent_call_path_id' |
483 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') | 483 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') |
484 | 484 | ||
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 245caf2643ed..ed237f2ed03f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py | |||
@@ -320,7 +320,7 @@ if perf_db_export_calls: | |||
320 | 'branch_count,' | 320 | 'branch_count,' |
321 | 'call_id,' | 321 | 'call_id,' |
322 | 'return_id,' | 322 | 'return_id,' |
323 | 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' | 323 | 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' |
324 | 'parent_call_path_id' | 324 | 'parent_call_path_id' |
325 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') | 325 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') |
326 | 326 | ||
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 7f8eff018c16..f52c0f90915d 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c | |||
@@ -38,6 +38,7 @@ | |||
38 | * @cp: call path | 38 | * @cp: call path |
39 | * @no_call: a 'call' was not seen | 39 | * @no_call: a 'call' was not seen |
40 | * @trace_end: a 'call' but trace ended | 40 | * @trace_end: a 'call' but trace ended |
41 | * @non_call: a branch but not a 'call' to the start of a different symbol | ||
41 | */ | 42 | */ |
42 | struct thread_stack_entry { | 43 | struct thread_stack_entry { |
43 | u64 ret_addr; | 44 | u64 ret_addr; |
@@ -47,6 +48,7 @@ struct thread_stack_entry { | |||
47 | struct call_path *cp; | 48 | struct call_path *cp; |
48 | bool no_call; | 49 | bool no_call; |
49 | bool trace_end; | 50 | bool trace_end; |
51 | bool non_call; | ||
50 | }; | 52 | }; |
51 | 53 | ||
52 | /** | 54 | /** |
@@ -268,6 +270,8 @@ static int thread_stack__call_return(struct thread *thread, | |||
268 | cr.flags |= CALL_RETURN_NO_CALL; | 270 | cr.flags |= CALL_RETURN_NO_CALL; |
269 | if (no_return) | 271 | if (no_return) |
270 | cr.flags |= CALL_RETURN_NO_RETURN; | 272 | cr.flags |= CALL_RETURN_NO_RETURN; |
273 | if (tse->non_call) | ||
274 | cr.flags |= CALL_RETURN_NON_CALL; | ||
271 | 275 | ||
272 | return crp->process(&cr, crp->data); | 276 | return crp->process(&cr, crp->data); |
273 | } | 277 | } |
@@ -510,6 +514,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, | |||
510 | tse->cp = cp; | 514 | tse->cp = cp; |
511 | tse->no_call = no_call; | 515 | tse->no_call = no_call; |
512 | tse->trace_end = trace_end; | 516 | tse->trace_end = trace_end; |
517 | tse->non_call = false; | ||
513 | 518 | ||
514 | return 0; | 519 | return 0; |
515 | } | 520 | } |
@@ -531,14 +536,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, | |||
531 | timestamp, ref, false); | 536 | timestamp, ref, false); |
532 | } | 537 | } |
533 | 538 | ||
534 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { | 539 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && |
540 | !ts->stack[ts->cnt - 1].non_call) { | ||
535 | return thread_stack__call_return(thread, ts, --ts->cnt, | 541 | return thread_stack__call_return(thread, ts, --ts->cnt, |
536 | timestamp, ref, false); | 542 | timestamp, ref, false); |
537 | } else { | 543 | } else { |
538 | size_t i = ts->cnt - 1; | 544 | size_t i = ts->cnt - 1; |
539 | 545 | ||
540 | while (i--) { | 546 | while (i--) { |
541 | if (ts->stack[i].ret_addr != ret_addr) | 547 | if (ts->stack[i].ret_addr != ret_addr || |
548 | ts->stack[i].non_call) | ||
542 | continue; | 549 | continue; |
543 | i += 1; | 550 | i += 1; |
544 | while (ts->cnt > i) { | 551 | while (ts->cnt > i) { |
@@ -757,6 +764,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
757 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); | 764 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); |
758 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { | 765 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { |
759 | err = thread_stack__trace_end(ts, sample, ref); | 766 | err = thread_stack__trace_end(ts, sample, ref); |
767 | } else if (sample->flags & PERF_IP_FLAG_BRANCH && | ||
768 | from_al->sym != to_al->sym && to_al->sym && | ||
769 | to_al->addr == to_al->sym->start) { | ||
770 | struct call_path_root *cpr = ts->crp->cpr; | ||
771 | struct call_path *cp; | ||
772 | |||
773 | /* | ||
774 | * The compiler might optimize a call/ret combination by making | ||
775 | * it a jmp. Make that visible by recording on the stack a | ||
776 | * branch to the start of a different symbol. Note, that means | ||
777 | * when a ret pops the stack, all jmps must be popped off first. | ||
778 | */ | ||
779 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | ||
780 | to_al->sym, sample->addr, | ||
781 | ts->kernel_start); | ||
782 | err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, | ||
783 | false); | ||
784 | if (!err) | ||
785 | ts->stack[ts->cnt - 1].non_call = true; | ||
760 | } | 786 | } |
761 | 787 | ||
762 | return err; | 788 | return err; |
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 1f626f4a1c40..b7c04e19ad41 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h | |||
@@ -35,10 +35,13 @@ struct call_path; | |||
35 | * | 35 | * |
36 | * CALL_RETURN_NO_CALL: 'return' but no matching 'call' | 36 | * CALL_RETURN_NO_CALL: 'return' but no matching 'call' |
37 | * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' | 37 | * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' |
38 | * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different | ||
39 | * symbol | ||
38 | */ | 40 | */ |
39 | enum { | 41 | enum { |
40 | CALL_RETURN_NO_CALL = 1 << 0, | 42 | CALL_RETURN_NO_CALL = 1 << 0, |
41 | CALL_RETURN_NO_RETURN = 1 << 1, | 43 | CALL_RETURN_NO_RETURN = 1 << 1, |
44 | CALL_RETURN_NON_CALL = 1 << 2, | ||
42 | }; | 45 | }; |
43 | 46 | ||
44 | /** | 47 | /** |