diff options
author | Namhyung Kim <namhyung.kim@lge.com> | 2013-10-11 01:15:36 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-10-21 16:33:23 -0400 |
commit | e369517ce5f796945c6af047b4e8b1d650e03458 (patch) | |
tree | 99078554225df0f002f754335185299b55943f9e /tools/perf/util | |
parent | f11cfc6f294dbd83b0d58037404df2bd16066238 (diff) |
perf callchain: Convert children list to rbtree
Current collapse stage has a scalability problem which can be reproduced
easily with a parallel kernel build.
This is because it needs to traverse every children of callchains
linearly during the collapse/merge stage.
Converting it to a rbtree reduced the overhead significantly.
On my 400MB perf.data file which recorded with make -j32 kernel build:
$ time perf --no-pager report --stdio > /dev/null
before:
real 6m22.073s
user 6m18.683s
sys 0m0.706s
after:
real 0m20.780s
user 0m19.962s
sys 0m0.689s
During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:
- 18.16% perf perf [.] append_chain_children
- append_chain_children
- 77.48% append_chain_children
+ 69.79% merge_chain_branch
- 22.96% append_chain_children
+ 67.44% merge_chain_branch
+ 30.15% append_chain_children
+ 2.41% callchain_append
+ 7.25% callchain_append
+ 12.26% callchain_append
+ 10.22% merge_chain_branch
+ 11.58% perf perf [.] dso__find_symbol
+ 8.02% perf perf [.] sort__comm_cmp
+ 5.48% perf libc-2.17.so [.] malloc_consolidate
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r-- | tools/perf/util/callchain.c | 147 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 11 |
2 files changed, 116 insertions, 42 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 482f68081cd8..e3970e3eaacf 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -21,12 +21,6 @@ | |||
21 | 21 | ||
22 | __thread struct callchain_cursor callchain_cursor; | 22 | __thread struct callchain_cursor callchain_cursor; |
23 | 23 | ||
24 | #define chain_for_each_child(child, parent) \ | ||
25 | list_for_each_entry(child, &parent->children, siblings) | ||
26 | |||
27 | #define chain_for_each_child_safe(child, next, parent) \ | ||
28 | list_for_each_entry_safe(child, next, &parent->children, siblings) | ||
29 | |||
30 | static void | 24 | static void |
31 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, | 25 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, |
32 | enum chain_mode mode) | 26 | enum chain_mode mode) |
@@ -71,10 +65,16 @@ static void | |||
71 | __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, | 65 | __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, |
72 | u64 min_hit) | 66 | u64 min_hit) |
73 | { | 67 | { |
68 | struct rb_node *n; | ||
74 | struct callchain_node *child; | 69 | struct callchain_node *child; |
75 | 70 | ||
76 | chain_for_each_child(child, node) | 71 | n = rb_first(&node->rb_root_in); |
72 | while (n) { | ||
73 | child = rb_entry(n, struct callchain_node, rb_node_in); | ||
74 | n = rb_next(n); | ||
75 | |||
77 | __sort_chain_flat(rb_root, child, min_hit); | 76 | __sort_chain_flat(rb_root, child, min_hit); |
77 | } | ||
78 | 78 | ||
79 | if (node->hit && node->hit >= min_hit) | 79 | if (node->hit && node->hit >= min_hit) |
80 | rb_insert_callchain(rb_root, node, CHAIN_FLAT); | 80 | rb_insert_callchain(rb_root, node, CHAIN_FLAT); |
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, | |||
94 | static void __sort_chain_graph_abs(struct callchain_node *node, | 94 | static void __sort_chain_graph_abs(struct callchain_node *node, |
95 | u64 min_hit) | 95 | u64 min_hit) |
96 | { | 96 | { |
97 | struct rb_node *n; | ||
97 | struct callchain_node *child; | 98 | struct callchain_node *child; |
98 | 99 | ||
99 | node->rb_root = RB_ROOT; | 100 | node->rb_root = RB_ROOT; |
101 | n = rb_first(&node->rb_root_in); | ||
102 | |||
103 | while (n) { | ||
104 | child = rb_entry(n, struct callchain_node, rb_node_in); | ||
105 | n = rb_next(n); | ||
100 | 106 | ||
101 | chain_for_each_child(child, node) { | ||
102 | __sort_chain_graph_abs(child, min_hit); | 107 | __sort_chain_graph_abs(child, min_hit); |
103 | if (callchain_cumul_hits(child) >= min_hit) | 108 | if (callchain_cumul_hits(child) >= min_hit) |
104 | rb_insert_callchain(&node->rb_root, child, | 109 | rb_insert_callchain(&node->rb_root, child, |
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, | |||
117 | static void __sort_chain_graph_rel(struct callchain_node *node, | 122 | static void __sort_chain_graph_rel(struct callchain_node *node, |
118 | double min_percent) | 123 | double min_percent) |
119 | { | 124 | { |
125 | struct rb_node *n; | ||
120 | struct callchain_node *child; | 126 | struct callchain_node *child; |
121 | u64 min_hit; | 127 | u64 min_hit; |
122 | 128 | ||
123 | node->rb_root = RB_ROOT; | 129 | node->rb_root = RB_ROOT; |
124 | min_hit = ceil(node->children_hit * min_percent); | 130 | min_hit = ceil(node->children_hit * min_percent); |
125 | 131 | ||
126 | chain_for_each_child(child, node) { | 132 | n = rb_first(&node->rb_root_in); |
133 | while (n) { | ||
134 | child = rb_entry(n, struct callchain_node, rb_node_in); | ||
135 | n = rb_next(n); | ||
136 | |||
127 | __sort_chain_graph_rel(child, min_percent); | 137 | __sort_chain_graph_rel(child, min_percent); |
128 | if (callchain_cumul_hits(child) >= min_hit) | 138 | if (callchain_cumul_hits(child) >= min_hit) |
129 | rb_insert_callchain(&node->rb_root, child, | 139 | rb_insert_callchain(&node->rb_root, child, |
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children) | |||
173 | return NULL; | 183 | return NULL; |
174 | } | 184 | } |
175 | new->parent = parent; | 185 | new->parent = parent; |
176 | INIT_LIST_HEAD(&new->children); | ||
177 | INIT_LIST_HEAD(&new->val); | 186 | INIT_LIST_HEAD(&new->val); |
178 | 187 | ||
179 | if (inherit_children) { | 188 | if (inherit_children) { |
180 | struct callchain_node *next; | 189 | struct rb_node *n; |
190 | struct callchain_node *child; | ||
191 | |||
192 | new->rb_root_in = parent->rb_root_in; | ||
193 | parent->rb_root_in = RB_ROOT; | ||
181 | 194 | ||
182 | list_splice(&parent->children, &new->children); | 195 | n = rb_first(&new->rb_root_in); |
183 | INIT_LIST_HEAD(&parent->children); | 196 | while (n) { |
197 | child = rb_entry(n, struct callchain_node, rb_node_in); | ||
198 | child->parent = new; | ||
199 | n = rb_next(n); | ||
200 | } | ||
184 | 201 | ||
185 | chain_for_each_child(next, new) | 202 | /* make it the first child */ |
186 | next->parent = new; | 203 | rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node); |
204 | rb_insert_color(&new->rb_node_in, &parent->rb_root_in); | ||
187 | } | 205 | } |
188 | list_add_tail(&new->siblings, &parent->children); | ||
189 | 206 | ||
190 | return new; | 207 | return new; |
191 | } | 208 | } |
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) | |||
223 | } | 240 | } |
224 | } | 241 | } |
225 | 242 | ||
226 | static void | 243 | static struct callchain_node * |
227 | add_child(struct callchain_node *parent, | 244 | add_child(struct callchain_node *parent, |
228 | struct callchain_cursor *cursor, | 245 | struct callchain_cursor *cursor, |
229 | u64 period) | 246 | u64 period) |
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent, | |||
235 | 252 | ||
236 | new->children_hit = 0; | 253 | new->children_hit = 0; |
237 | new->hit = period; | 254 | new->hit = period; |
255 | return new; | ||
256 | } | ||
257 | |||
258 | static s64 match_chain(struct callchain_cursor_node *node, | ||
259 | struct callchain_list *cnode) | ||
260 | { | ||
261 | struct symbol *sym = node->sym; | ||
262 | |||
263 | if (cnode->ms.sym && sym && | ||
264 | callchain_param.key == CCKEY_FUNCTION) | ||
265 | return cnode->ms.sym->start - sym->start; | ||
266 | else | ||
267 | return cnode->ip - node->ip; | ||
238 | } | 268 | } |
239 | 269 | ||
240 | /* | 270 | /* |
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent, | |||
272 | 302 | ||
273 | /* create a new child for the new branch if any */ | 303 | /* create a new child for the new branch if any */ |
274 | if (idx_total < cursor->nr) { | 304 | if (idx_total < cursor->nr) { |
305 | struct callchain_node *first; | ||
306 | struct callchain_list *cnode; | ||
307 | struct callchain_cursor_node *node; | ||
308 | struct rb_node *p, **pp; | ||
309 | |||
275 | parent->hit = 0; | 310 | parent->hit = 0; |
276 | add_child(parent, cursor, period); | ||
277 | parent->children_hit += period; | 311 | parent->children_hit += period; |
312 | |||
313 | node = callchain_cursor_current(cursor); | ||
314 | new = add_child(parent, cursor, period); | ||
315 | |||
316 | /* | ||
317 | * This is second child since we moved parent's children | ||
318 | * to new (first) child above. | ||
319 | */ | ||
320 | p = parent->rb_root_in.rb_node; | ||
321 | first = rb_entry(p, struct callchain_node, rb_node_in); | ||
322 | cnode = list_first_entry(&first->val, struct callchain_list, | ||
323 | list); | ||
324 | |||
325 | if (match_chain(node, cnode) < 0) | ||
326 | pp = &p->rb_left; | ||
327 | else | ||
328 | pp = &p->rb_right; | ||
329 | |||
330 | rb_link_node(&new->rb_node_in, p, pp); | ||
331 | rb_insert_color(&new->rb_node_in, &parent->rb_root_in); | ||
278 | } else { | 332 | } else { |
279 | parent->hit = period; | 333 | parent->hit = period; |
280 | } | 334 | } |
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root, | |||
291 | u64 period) | 345 | u64 period) |
292 | { | 346 | { |
293 | struct callchain_node *rnode; | 347 | struct callchain_node *rnode; |
348 | struct callchain_cursor_node *node; | ||
349 | struct rb_node **p = &root->rb_root_in.rb_node; | ||
350 | struct rb_node *parent = NULL; | ||
351 | |||
352 | node = callchain_cursor_current(cursor); | ||
353 | if (!node) | ||
354 | return; | ||
294 | 355 | ||
295 | /* lookup in childrens */ | 356 | /* lookup in childrens */ |
296 | chain_for_each_child(rnode, root) { | 357 | while (*p) { |
297 | unsigned int ret = append_chain(rnode, cursor, period); | 358 | s64 ret; |
359 | struct callchain_list *cnode; | ||
298 | 360 | ||
299 | if (!ret) | 361 | parent = *p; |
362 | rnode = rb_entry(parent, struct callchain_node, rb_node_in); | ||
363 | cnode = list_first_entry(&rnode->val, struct callchain_list, | ||
364 | list); | ||
365 | |||
366 | /* just check first entry */ | ||
367 | ret = match_chain(node, cnode); | ||
368 | if (ret == 0) { | ||
369 | append_chain(rnode, cursor, period); | ||
300 | goto inc_children_hit; | 370 | goto inc_children_hit; |
371 | } | ||
372 | |||
373 | if (ret < 0) | ||
374 | p = &parent->rb_left; | ||
375 | else | ||
376 | p = &parent->rb_right; | ||
301 | } | 377 | } |
302 | /* nothing in children, add to the current node */ | 378 | /* nothing in children, add to the current node */ |
303 | add_child(root, cursor, period); | 379 | rnode = add_child(root, cursor, period); |
380 | rb_link_node(&rnode->rb_node_in, parent, p); | ||
381 | rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); | ||
304 | 382 | ||
305 | inc_children_hit: | 383 | inc_children_hit: |
306 | root->children_hit += period; | 384 | root->children_hit += period; |
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root, | |||
325 | */ | 403 | */ |
326 | list_for_each_entry(cnode, &root->val, list) { | 404 | list_for_each_entry(cnode, &root->val, list) { |
327 | struct callchain_cursor_node *node; | 405 | struct callchain_cursor_node *node; |
328 | struct symbol *sym; | ||
329 | 406 | ||
330 | node = callchain_cursor_current(cursor); | 407 | node = callchain_cursor_current(cursor); |
331 | if (!node) | 408 | if (!node) |
332 | break; | 409 | break; |
333 | 410 | ||
334 | sym = node->sym; | 411 | if (match_chain(node, cnode) != 0) |
335 | |||
336 | if (cnode->ms.sym && sym && | ||
337 | callchain_param.key == CCKEY_FUNCTION) { | ||
338 | if (cnode->ms.sym->start != sym->start) | ||
339 | break; | ||
340 | } else if (cnode->ip != node->ip) | ||
341 | break; | 412 | break; |
342 | 413 | ||
343 | if (!found) | 414 | found = true; |
344 | found = true; | ||
345 | 415 | ||
346 | callchain_cursor_advance(cursor); | 416 | callchain_cursor_advance(cursor); |
347 | } | 417 | } |
348 | 418 | ||
349 | /* matches not, relay on the parent */ | 419 | /* matches not, relay no the parent */ |
350 | if (!found) { | 420 | if (!found) { |
351 | cursor->curr = curr_snap; | 421 | cursor->curr = curr_snap; |
352 | cursor->pos = start; | 422 | cursor->pos = start; |
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor, | |||
395 | struct callchain_node *dst, struct callchain_node *src) | 465 | struct callchain_node *dst, struct callchain_node *src) |
396 | { | 466 | { |
397 | struct callchain_cursor_node **old_last = cursor->last; | 467 | struct callchain_cursor_node **old_last = cursor->last; |
398 | struct callchain_node *child, *next_child; | 468 | struct callchain_node *child; |
399 | struct callchain_list *list, *next_list; | 469 | struct callchain_list *list, *next_list; |
470 | struct rb_node *n; | ||
400 | int old_pos = cursor->nr; | 471 | int old_pos = cursor->nr; |
401 | int err = 0; | 472 | int err = 0; |
402 | 473 | ||
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor, | |||
412 | append_chain_children(dst, cursor, src->hit); | 483 | append_chain_children(dst, cursor, src->hit); |
413 | } | 484 | } |
414 | 485 | ||
415 | chain_for_each_child_safe(child, next_child, src) { | 486 | n = rb_first(&src->rb_root_in); |
487 | while (n) { | ||
488 | child = container_of(n, struct callchain_node, rb_node_in); | ||
489 | n = rb_next(n); | ||
490 | rb_erase(&child->rb_node_in, &src->rb_root_in); | ||
491 | |||
416 | err = merge_chain_branch(cursor, dst, child); | 492 | err = merge_chain_branch(cursor, dst, child); |
417 | if (err) | 493 | if (err) |
418 | break; | 494 | break; |
419 | 495 | ||
420 | list_del(&child->siblings); | ||
421 | free(child); | 496 | free(child); |
422 | } | 497 | } |
423 | 498 | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2b585bc308cf..7bb36022377f 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -21,11 +21,11 @@ enum chain_order { | |||
21 | 21 | ||
22 | struct callchain_node { | 22 | struct callchain_node { |
23 | struct callchain_node *parent; | 23 | struct callchain_node *parent; |
24 | struct list_head siblings; | ||
25 | struct list_head children; | ||
26 | struct list_head val; | 24 | struct list_head val; |
27 | struct rb_node rb_node; /* to sort nodes in an rbtree */ | 25 | struct rb_node rb_node_in; /* to insert nodes in an rbtree */ |
28 | struct rb_root rb_root; /* sorted tree of children */ | 26 | struct rb_node rb_node; /* to sort nodes in an output tree */ |
27 | struct rb_root rb_root_in; /* input tree of children */ | ||
28 | struct rb_root rb_root; /* sorted output tree of children */ | ||
29 | unsigned int val_nr; | 29 | unsigned int val_nr; |
30 | u64 hit; | 30 | u64 hit; |
31 | u64 children_hit; | 31 | u64 children_hit; |
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor; | |||
86 | 86 | ||
87 | static inline void callchain_init(struct callchain_root *root) | 87 | static inline void callchain_init(struct callchain_root *root) |
88 | { | 88 | { |
89 | INIT_LIST_HEAD(&root->node.siblings); | ||
90 | INIT_LIST_HEAD(&root->node.children); | ||
91 | INIT_LIST_HEAD(&root->node.val); | 89 | INIT_LIST_HEAD(&root->node.val); |
92 | 90 | ||
93 | root->node.parent = NULL; | 91 | root->node.parent = NULL; |
94 | root->node.hit = 0; | 92 | root->node.hit = 0; |
95 | root->node.children_hit = 0; | 93 | root->node.children_hit = 0; |
94 | root->node.rb_root_in = RB_ROOT; | ||
96 | root->max_depth = 0; | 95 | root->max_depth = 0; |
97 | } | 96 | } |
98 | 97 | ||