diff options
author | David Tolnay <dtolnay@gmail.com> | 2016-07-09 03:20:00 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-07-12 15:12:38 -0400 |
commit | cae15db74999edb96dd9f5bbd4d55849391dd92b (patch) | |
tree | c41cac808b5e8f19107b463eef3dc314754f5e47 /tools | |
parent | 1c1a3a4729aae712c55e001e151ef008d030d4a7 (diff) |
perf symbols: Add Rust demangling
Rust demangling is another step after bfd demangling. Add a diagnosis to
identify mangled Rust symbols based on the hash that the Rust mangler appends
as the last path component, as well as other characteristics. Add a demangler
to reconstruct the original symbol.
Committer notes:
How I tested it:
Enabled COPR on Fedora 24 and then installed the 'rust-binary' package,
with it:
$ cat src/main.rs
fn main() {
println!("Hello, world!");
}
$ cat Cargo.toml
[package]
name = "hello_world"
version = "0.0.1"
authors = [ "Arnaldo Carvalho de Melo <acme@kernel.org>" ]
$ perf record cargo bench
Compiling hello_world v0.0.1 (file:///home/acme/projects/hello_world)
Running target/release/hello_world-d4b9dab4b2a47d75
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.096 MB perf.data (1457 samples) ]
$
Before this patch:
$ perf report --stdio --dsos librbml-e8edd0fd.so
# dso: librbml-e8edd0fd.so
#
# Total Lost Samples: 0
#
# Samples: 1K of event 'cycles:u'
# Event count (approx.): 979599126
#
# Overhead Command Symbol
# ........ ....... .............................................................................................................
#
1.78% rustc [.] rbml::reader::maybe_get_doc::hb9d387df6024b15b
1.50% rustc [.] _$LT$reader..DocsIterator$LT$$u27$a$GT$$u20$as$u20$std..iter..Iterator$GT$::next::hd9af9e60d79a35c8
1.20% rustc [.] rbml::reader::doc_at::hc88107fba445af31
0.46% rustc [.] _$LT$reader..TaggedDocsIterator$LT$$u27$a$GT$$u20$as$u20$std..iter..Iterator$GT$::next::h0cb40e696e4bb489
0.35% rustc [.] rbml::reader::Decoder::_next_int::h66eef7825a398bc3
0.29% rustc [.] rbml::reader::Decoder::_next_sub::h8e5266005580b836
0.15% rustc [.] rbml::reader::get_doc::h094521c645459139
0.14% rustc [.] _$LT$reader..Decoder$LT$$u27$doc$GT$$u20$as$u20$serialize..Decoder$GT$::read_u32::h0acea2fff9669327
0.07% rustc [.] rbml::reader::Decoder::next_doc::h6714d469c9dfaf91
0.07% rustc [.] _ZN4rbml6reader10doc_as_u6417h930b740aa94f1d3aE@plt
0.06% rustc [.] _fini
$
After:
$ perf report --stdio --dsos librbml-e8edd0fd.so
# dso: librbml-e8edd0fd.so
#
# Total Lost Samples: 0
#
# Samples: 1K of event 'cycles:u'
# Event count (approx.): 979599126
#
# Overhead Command Symbol
# ........ ....... .................................................................
#
1.78% rustc [.] rbml::reader::maybe_get_doc
1.50% rustc [.] <reader::DocsIterator<'a> as std::iter::Iterator>::next
1.20% rustc [.] rbml::reader::doc_at
0.46% rustc [.] <reader::TaggedDocsIterator<'a> as std::iter::Iterator>::next
0.35% rustc [.] rbml::reader::Decoder::_next_int
0.29% rustc [.] rbml::reader::Decoder::_next_sub
0.15% rustc [.] rbml::reader::get_doc
0.14% rustc [.] <reader::Decoder<'doc> as serialize::Decoder>::read_u32
0.07% rustc [.] rbml::reader::Decoder::next_doc
0.07% rustc [.] _ZN4rbml6reader10doc_as_u6417h930b740aa94f1d3aE@plt
0.06% rustc [.] _fini
$
Signed-off-by: David Tolnay <dtolnay@gmail.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/5780B7FA.3030602@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/util/demangle-rust.c | 269 | ||||
-rw-r--r-- | tools/perf/util/demangle-rust.h | 7 | ||||
-rw-r--r-- | tools/perf/util/symbol-elf.c | 8 |
4 files changed, 285 insertions, 0 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eda68f582884..2fa7d8b69873 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
@@ -113,6 +113,7 @@ libperf-y += scripting-engines/ | |||
113 | libperf-$(CONFIG_ZLIB) += zlib.o | 113 | libperf-$(CONFIG_ZLIB) += zlib.o |
114 | libperf-$(CONFIG_LZMA) += lzma.o | 114 | libperf-$(CONFIG_LZMA) += lzma.o |
115 | libperf-y += demangle-java.o | 115 | libperf-y += demangle-java.o |
116 | libperf-y += demangle-rust.o | ||
116 | 117 | ||
117 | ifdef CONFIG_JITDUMP | 118 | ifdef CONFIG_JITDUMP |
118 | libperf-$(CONFIG_LIBELF) += jitdump.o | 119 | libperf-$(CONFIG_LIBELF) += jitdump.o |
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c new file mode 100644 index 000000000000..f9dafa888c06 --- /dev/null +++ b/tools/perf/util/demangle-rust.c | |||
@@ -0,0 +1,269 @@ | |||
1 | #include <string.h> | ||
2 | #include "util.h" | ||
3 | #include "debug.h" | ||
4 | |||
5 | #include "demangle-rust.h" | ||
6 | |||
7 | /* | ||
8 | * Mangled Rust symbols look like this: | ||
9 | * | ||
10 | * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a | ||
11 | * | ||
12 | * The original symbol is: | ||
13 | * | ||
14 | * <std::sys::fd::FileDesc as core::ops::Drop>::drop | ||
15 | * | ||
16 | * The last component of the path is a 64-bit hash in lowercase hex, prefixed | ||
17 | * with "h". Rust does not have a global namespace between crates, an illusion | ||
18 | * which Rust maintains by using the hash to distinguish things that would | ||
19 | * otherwise have the same symbol. | ||
20 | * | ||
21 | * Any path component not starting with a XID_Start character is prefixed with | ||
22 | * "_". | ||
23 | * | ||
24 | * The following escape sequences are used: | ||
25 | * | ||
26 | * "," => $C$ | ||
27 | * "@" => $SP$ | ||
28 | * "*" => $BP$ | ||
29 | * "&" => $RF$ | ||
30 | * "<" => $LT$ | ||
31 | * ">" => $GT$ | ||
32 | * "(" => $LP$ | ||
33 | * ")" => $RP$ | ||
34 | * " " => $u20$ | ||
35 | * "'" => $u27$ | ||
36 | * "[" => $u5b$ | ||
37 | * "]" => $u5d$ | ||
38 | * "~" => $u7e$ | ||
39 | * | ||
40 | * A double ".." means "::" and a single "." means "-". | ||
41 | * | ||
42 | * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ | ||
43 | */ | ||
44 | |||
45 | static const char *hash_prefix = "::h"; | ||
46 | static const size_t hash_prefix_len = 3; | ||
47 | static const size_t hash_len = 16; | ||
48 | |||
49 | static bool is_prefixed_hash(const char *start); | ||
50 | static bool looks_like_rust(const char *sym, size_t len); | ||
51 | static bool unescape(const char **in, char **out, const char *seq, char value); | ||
52 | |||
53 | /* | ||
54 | * INPUT: | ||
55 | * sym: symbol that has been through BFD-demangling | ||
56 | * | ||
57 | * This function looks for the following indicators: | ||
58 | * | ||
59 | * 1. The hash must consist of "h" followed by 16 lowercase hex digits. | ||
60 | * | ||
61 | * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible | ||
62 | * hex digits. This is true of 99.9998% of hashes so once in your life you | ||
63 | * may see a false negative. The point is to notice path components that | ||
64 | * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In | ||
65 | * this case a false positive (non-Rust symbol has an important path | ||
66 | * component removed because it looks like a Rust hash) is worse than a | ||
67 | * false negative (the rare Rust symbol is not demangled) so this sets the | ||
68 | * balance in favor of false negatives. | ||
69 | * | ||
70 | * 3. There must be no characters other than a-zA-Z0-9 and _.:$ | ||
71 | * | ||
72 | * 4. There must be no unrecognized $-sign sequences. | ||
73 | * | ||
74 | * 5. There must be no sequence of three or more dots in a row ("..."). | ||
75 | */ | ||
76 | bool | ||
77 | rust_is_mangled(const char *sym) | ||
78 | { | ||
79 | size_t len, len_without_hash; | ||
80 | |||
81 | if (!sym) | ||
82 | return false; | ||
83 | |||
84 | len = strlen(sym); | ||
85 | if (len <= hash_prefix_len + hash_len) | ||
86 | /* Not long enough to contain "::h" + hash + something else */ | ||
87 | return false; | ||
88 | |||
89 | len_without_hash = len - (hash_prefix_len + hash_len); | ||
90 | if (!is_prefixed_hash(sym + len_without_hash)) | ||
91 | return false; | ||
92 | |||
93 | return looks_like_rust(sym, len_without_hash); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex | ||
98 | * digits must comprise between 5 and 15 (inclusive) distinct digits. | ||
99 | */ | ||
100 | static bool is_prefixed_hash(const char *str) | ||
101 | { | ||
102 | const char *end; | ||
103 | bool seen[16]; | ||
104 | size_t i; | ||
105 | int count; | ||
106 | |||
107 | if (strncmp(str, hash_prefix, hash_prefix_len)) | ||
108 | return false; | ||
109 | str += hash_prefix_len; | ||
110 | |||
111 | memset(seen, false, sizeof(seen)); | ||
112 | for (end = str + hash_len; str < end; str++) | ||
113 | if (*str >= '0' && *str <= '9') | ||
114 | seen[*str - '0'] = true; | ||
115 | else if (*str >= 'a' && *str <= 'f') | ||
116 | seen[*str - 'a' + 10] = true; | ||
117 | else | ||
118 | return false; | ||
119 | |||
120 | /* Count how many distinct digits seen */ | ||
121 | count = 0; | ||
122 | for (i = 0; i < 16; i++) | ||
123 | if (seen[i]) | ||
124 | count++; | ||
125 | |||
126 | return count >= 5 && count <= 15; | ||
127 | } | ||
128 | |||
129 | static bool looks_like_rust(const char *str, size_t len) | ||
130 | { | ||
131 | const char *end = str + len; | ||
132 | |||
133 | while (str < end) | ||
134 | switch (*str) { | ||
135 | case '$': | ||
136 | if (!strncmp(str, "$C$", 3)) | ||
137 | str += 3; | ||
138 | else if (!strncmp(str, "$SP$", 4) | ||
139 | || !strncmp(str, "$BP$", 4) | ||
140 | || !strncmp(str, "$RF$", 4) | ||
141 | || !strncmp(str, "$LT$", 4) | ||
142 | || !strncmp(str, "$GT$", 4) | ||
143 | || !strncmp(str, "$LP$", 4) | ||
144 | || !strncmp(str, "$RP$", 4)) | ||
145 | str += 4; | ||
146 | else if (!strncmp(str, "$u20$", 5) | ||
147 | || !strncmp(str, "$u27$", 5) | ||
148 | || !strncmp(str, "$u5b$", 5) | ||
149 | || !strncmp(str, "$u5d$", 5) | ||
150 | || !strncmp(str, "$u7e$", 5)) | ||
151 | str += 5; | ||
152 | else | ||
153 | return false; | ||
154 | break; | ||
155 | case '.': | ||
156 | /* Do not allow three or more consecutive dots */ | ||
157 | if (!strncmp(str, "...", 3)) | ||
158 | return false; | ||
159 | /* Fall through */ | ||
160 | case 'a' ... 'z': | ||
161 | case 'A' ... 'Z': | ||
162 | case '0' ... '9': | ||
163 | case '_': | ||
164 | case ':': | ||
165 | str++; | ||
166 | break; | ||
167 | default: | ||
168 | return false; | ||
169 | } | ||
170 | |||
171 | return true; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * INPUT: | ||
176 | * sym: symbol for which rust_is_mangled(sym) returns true | ||
177 | * | ||
178 | * The input is demangled in-place because the mangled name is always longer | ||
179 | * than the demangled one. | ||
180 | */ | ||
181 | void | ||
182 | rust_demangle_sym(char *sym) | ||
183 | { | ||
184 | const char *in; | ||
185 | char *out; | ||
186 | const char *end; | ||
187 | |||
188 | if (!sym) | ||
189 | return; | ||
190 | |||
191 | in = sym; | ||
192 | out = sym; | ||
193 | end = sym + strlen(sym) - (hash_prefix_len + hash_len); | ||
194 | |||
195 | while (in < end) | ||
196 | switch (*in) { | ||
197 | case '$': | ||
198 | if (!(unescape(&in, &out, "$C$", ',') | ||
199 | || unescape(&in, &out, "$SP$", '@') | ||
200 | || unescape(&in, &out, "$BP$", '*') | ||
201 | || unescape(&in, &out, "$RF$", '&') | ||
202 | || unescape(&in, &out, "$LT$", '<') | ||
203 | || unescape(&in, &out, "$GT$", '>') | ||
204 | || unescape(&in, &out, "$LP$", '(') | ||
205 | || unescape(&in, &out, "$RP$", ')') | ||
206 | || unescape(&in, &out, "$u20$", ' ') | ||
207 | || unescape(&in, &out, "$u27$", '\'') | ||
208 | || unescape(&in, &out, "$u5b$", '[') | ||
209 | || unescape(&in, &out, "$u5d$", ']') | ||
210 | || unescape(&in, &out, "$u7e$", '~'))) { | ||
211 | pr_err("demangle-rust: unexpected escape sequence"); | ||
212 | goto done; | ||
213 | } | ||
214 | break; | ||
215 | case '_': | ||
216 | /* | ||
217 | * If this is the start of a path component and the next | ||
218 | * character is an escape sequence, ignore the | ||
219 | * underscore. The mangler inserts an underscore to make | ||
220 | * sure the path component begins with a XID_Start | ||
221 | * character. | ||
222 | */ | ||
223 | if ((in == sym || in[-1] == ':') && in[1] == '$') | ||
224 | in++; | ||
225 | else | ||
226 | *out++ = *in++; | ||
227 | break; | ||
228 | case '.': | ||
229 | if (in[1] == '.') { | ||
230 | /* ".." becomes "::" */ | ||
231 | *out++ = ':'; | ||
232 | *out++ = ':'; | ||
233 | in += 2; | ||
234 | } else { | ||
235 | /* "." becomes "-" */ | ||
236 | *out++ = '-'; | ||
237 | in++; | ||
238 | } | ||
239 | break; | ||
240 | case 'a' ... 'z': | ||
241 | case 'A' ... 'Z': | ||
242 | case '0' ... '9': | ||
243 | case ':': | ||
244 | *out++ = *in++; | ||
245 | break; | ||
246 | default: | ||
247 | pr_err("demangle-rust: unexpected character '%c' in symbol\n", | ||
248 | *in); | ||
249 | goto done; | ||
250 | } | ||
251 | |||
252 | done: | ||
253 | *out = '\0'; | ||
254 | } | ||
255 | |||
256 | static bool unescape(const char **in, char **out, const char *seq, char value) | ||
257 | { | ||
258 | size_t len = strlen(seq); | ||
259 | |||
260 | if (strncmp(*in, seq, len)) | ||
261 | return false; | ||
262 | |||
263 | **out = value; | ||
264 | |||
265 | *in += len; | ||
266 | *out += 1; | ||
267 | |||
268 | return true; | ||
269 | } | ||
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h new file mode 100644 index 000000000000..7b41ead7e0dd --- /dev/null +++ b/tools/perf/util/demangle-rust.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef __PERF_DEMANGLE_RUST | ||
2 | #define __PERF_DEMANGLE_RUST 1 | ||
3 | |||
4 | bool rust_is_mangled(const char *str); | ||
5 | void rust_demangle_sym(char *str); | ||
6 | |||
7 | #endif /* __PERF_DEMANGLE_RUST */ | ||
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index cebf98ec27bc..a34321e9b44d 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include "symbol.h" | 8 | #include "symbol.h" |
9 | #include "demangle-java.h" | 9 | #include "demangle-java.h" |
10 | #include "demangle-rust.h" | ||
10 | #include "machine.h" | 11 | #include "machine.h" |
11 | #include "vdso.h" | 12 | #include "vdso.h" |
12 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
@@ -1081,6 +1082,13 @@ new_symbol: | |||
1081 | demangled = bfd_demangle(NULL, elf_name, demangle_flags); | 1082 | demangled = bfd_demangle(NULL, elf_name, demangle_flags); |
1082 | if (demangled == NULL) | 1083 | if (demangled == NULL) |
1083 | demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); | 1084 | demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); |
1085 | else if (rust_is_mangled(demangled)) | ||
1086 | /* | ||
1087 | * Input to Rust demangling is the BFD-demangled | ||
1088 | * name which it Rust-demangles in place. | ||
1089 | */ | ||
1090 | rust_demangle_sym(demangled); | ||
1091 | |||
1084 | if (demangled != NULL) | 1092 | if (demangled != NULL) |
1085 | elf_name = demangled; | 1093 | elf_name = demangled; |
1086 | } | 1094 | } |