aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung.kim@lge.com>2013-10-11 01:15:36 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2013-10-21 16:33:23 -0400
commite369517ce5f796945c6af047b4e8b1d650e03458 (patch)
tree99078554225df0f002f754335185299b55943f9e /tools/perf/util
parentf11cfc6f294dbd83b0d58037404df2bd16066238 (diff)
perf callchain: Convert children list to rbtree
Current collapse stage has a scalability problem which can be reproduced easily with a parallel kernel build. This is because it needs to traverse every children of callchains linearly during the collapse/merge stage. Converting it to a rbtree reduced the overhead significantly. On my 400MB perf.data file which recorded with make -j32 kernel build: $ time perf --no-pager report --stdio > /dev/null before: real 6m22.073s user 6m18.683s sys 0m0.706s after: real 0m20.780s user 0m19.962s sys 0m0.689s During the perf report the overhead on append_chain_children went down from 96.69% to 18.16%: - 18.16% perf perf [.] append_chain_children - append_chain_children - 77.48% append_chain_children + 69.79% merge_chain_branch - 22.96% append_chain_children + 67.44% merge_chain_branch + 30.15% append_chain_children + 2.41% callchain_append + 7.25% callchain_append + 12.26% callchain_append + 10.22% merge_chain_branch + 11.58% perf perf [.] dso__find_symbol + 8.02% perf perf [.] sort__comm_cmp + 5.48% perf libc-2.17.so [.] malloc_consolidate Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/callchain.c147
-rw-r--r--tools/perf/util/callchain.h11
2 files changed, 116 insertions, 42 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f68081cd8..e3970e3eaacf 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
21 21
22__thread struct callchain_cursor callchain_cursor; 22__thread struct callchain_cursor callchain_cursor;
23 23
24#define chain_for_each_child(child, parent) \
25 list_for_each_entry(child, &parent->children, siblings)
26
27#define chain_for_each_child_safe(child, next, parent) \
28 list_for_each_entry_safe(child, next, &parent->children, siblings)
29
30static void 24static void
31rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 25rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
32 enum chain_mode mode) 26 enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
71__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, 65__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
72 u64 min_hit) 66 u64 min_hit)
73{ 67{
68 struct rb_node *n;
74 struct callchain_node *child; 69 struct callchain_node *child;
75 70
76 chain_for_each_child(child, node) 71 n = rb_first(&node->rb_root_in);
72 while (n) {
73 child = rb_entry(n, struct callchain_node, rb_node_in);
74 n = rb_next(n);
75
77 __sort_chain_flat(rb_root, child, min_hit); 76 __sort_chain_flat(rb_root, child, min_hit);
77 }
78 78
79 if (node->hit && node->hit >= min_hit) 79 if (node->hit && node->hit >= min_hit)
80 rb_insert_callchain(rb_root, node, CHAIN_FLAT); 80 rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
94static void __sort_chain_graph_abs(struct callchain_node *node, 94static void __sort_chain_graph_abs(struct callchain_node *node,
95 u64 min_hit) 95 u64 min_hit)
96{ 96{
97 struct rb_node *n;
97 struct callchain_node *child; 98 struct callchain_node *child;
98 99
99 node->rb_root = RB_ROOT; 100 node->rb_root = RB_ROOT;
101 n = rb_first(&node->rb_root_in);
102
103 while (n) {
104 child = rb_entry(n, struct callchain_node, rb_node_in);
105 n = rb_next(n);
100 106
101 chain_for_each_child(child, node) {
102 __sort_chain_graph_abs(child, min_hit); 107 __sort_chain_graph_abs(child, min_hit);
103 if (callchain_cumul_hits(child) >= min_hit) 108 if (callchain_cumul_hits(child) >= min_hit)
104 rb_insert_callchain(&node->rb_root, child, 109 rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
117static void __sort_chain_graph_rel(struct callchain_node *node, 122static void __sort_chain_graph_rel(struct callchain_node *node,
118 double min_percent) 123 double min_percent)
119{ 124{
125 struct rb_node *n;
120 struct callchain_node *child; 126 struct callchain_node *child;
121 u64 min_hit; 127 u64 min_hit;
122 128
123 node->rb_root = RB_ROOT; 129 node->rb_root = RB_ROOT;
124 min_hit = ceil(node->children_hit * min_percent); 130 min_hit = ceil(node->children_hit * min_percent);
125 131
126 chain_for_each_child(child, node) { 132 n = rb_first(&node->rb_root_in);
133 while (n) {
134 child = rb_entry(n, struct callchain_node, rb_node_in);
135 n = rb_next(n);
136
127 __sort_chain_graph_rel(child, min_percent); 137 __sort_chain_graph_rel(child, min_percent);
128 if (callchain_cumul_hits(child) >= min_hit) 138 if (callchain_cumul_hits(child) >= min_hit)
129 rb_insert_callchain(&node->rb_root, child, 139 rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
173 return NULL; 183 return NULL;
174 } 184 }
175 new->parent = parent; 185 new->parent = parent;
176 INIT_LIST_HEAD(&new->children);
177 INIT_LIST_HEAD(&new->val); 186 INIT_LIST_HEAD(&new->val);
178 187
179 if (inherit_children) { 188 if (inherit_children) {
180 struct callchain_node *next; 189 struct rb_node *n;
190 struct callchain_node *child;
191
192 new->rb_root_in = parent->rb_root_in;
193 parent->rb_root_in = RB_ROOT;
181 194
182 list_splice(&parent->children, &new->children); 195 n = rb_first(&new->rb_root_in);
183 INIT_LIST_HEAD(&parent->children); 196 while (n) {
197 child = rb_entry(n, struct callchain_node, rb_node_in);
198 child->parent = new;
199 n = rb_next(n);
200 }
184 201
185 chain_for_each_child(next, new) 202 /* make it the first child */
186 next->parent = new; 203 rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
204 rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
187 } 205 }
188 list_add_tail(&new->siblings, &parent->children);
189 206
190 return new; 207 return new;
191} 208}
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
223 } 240 }
224} 241}
225 242
226static void 243static struct callchain_node *
227add_child(struct callchain_node *parent, 244add_child(struct callchain_node *parent,
228 struct callchain_cursor *cursor, 245 struct callchain_cursor *cursor,
229 u64 period) 246 u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
235 252
236 new->children_hit = 0; 253 new->children_hit = 0;
237 new->hit = period; 254 new->hit = period;
255 return new;
256}
257
258static s64 match_chain(struct callchain_cursor_node *node,
259 struct callchain_list *cnode)
260{
261 struct symbol *sym = node->sym;
262
263 if (cnode->ms.sym && sym &&
264 callchain_param.key == CCKEY_FUNCTION)
265 return cnode->ms.sym->start - sym->start;
266 else
267 return cnode->ip - node->ip;
238} 268}
239 269
240/* 270/*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
272 302
273 /* create a new child for the new branch if any */ 303 /* create a new child for the new branch if any */
274 if (idx_total < cursor->nr) { 304 if (idx_total < cursor->nr) {
305 struct callchain_node *first;
306 struct callchain_list *cnode;
307 struct callchain_cursor_node *node;
308 struct rb_node *p, **pp;
309
275 parent->hit = 0; 310 parent->hit = 0;
276 add_child(parent, cursor, period);
277 parent->children_hit += period; 311 parent->children_hit += period;
312
313 node = callchain_cursor_current(cursor);
314 new = add_child(parent, cursor, period);
315
316 /*
317 * This is second child since we moved parent's children
318 * to new (first) child above.
319 */
320 p = parent->rb_root_in.rb_node;
321 first = rb_entry(p, struct callchain_node, rb_node_in);
322 cnode = list_first_entry(&first->val, struct callchain_list,
323 list);
324
325 if (match_chain(node, cnode) < 0)
326 pp = &p->rb_left;
327 else
328 pp = &p->rb_right;
329
330 rb_link_node(&new->rb_node_in, p, pp);
331 rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
278 } else { 332 } else {
279 parent->hit = period; 333 parent->hit = period;
280 } 334 }
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
291 u64 period) 345 u64 period)
292{ 346{
293 struct callchain_node *rnode; 347 struct callchain_node *rnode;
348 struct callchain_cursor_node *node;
349 struct rb_node **p = &root->rb_root_in.rb_node;
350 struct rb_node *parent = NULL;
351
352 node = callchain_cursor_current(cursor);
353 if (!node)
354 return;
294 355
295 /* lookup in childrens */ 356 /* lookup in childrens */
296 chain_for_each_child(rnode, root) { 357 while (*p) {
297 unsigned int ret = append_chain(rnode, cursor, period); 358 s64 ret;
359 struct callchain_list *cnode;
298 360
299 if (!ret) 361 parent = *p;
362 rnode = rb_entry(parent, struct callchain_node, rb_node_in);
363 cnode = list_first_entry(&rnode->val, struct callchain_list,
364 list);
365
366 /* just check first entry */
367 ret = match_chain(node, cnode);
368 if (ret == 0) {
369 append_chain(rnode, cursor, period);
300 goto inc_children_hit; 370 goto inc_children_hit;
371 }
372
373 if (ret < 0)
374 p = &parent->rb_left;
375 else
376 p = &parent->rb_right;
301 } 377 }
302 /* nothing in children, add to the current node */ 378 /* nothing in children, add to the current node */
303 add_child(root, cursor, period); 379 rnode = add_child(root, cursor, period);
380 rb_link_node(&rnode->rb_node_in, parent, p);
381 rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
304 382
305inc_children_hit: 383inc_children_hit:
306 root->children_hit += period; 384 root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
325 */ 403 */
326 list_for_each_entry(cnode, &root->val, list) { 404 list_for_each_entry(cnode, &root->val, list) {
327 struct callchain_cursor_node *node; 405 struct callchain_cursor_node *node;
328 struct symbol *sym;
329 406
330 node = callchain_cursor_current(cursor); 407 node = callchain_cursor_current(cursor);
331 if (!node) 408 if (!node)
332 break; 409 break;
333 410
334 sym = node->sym; 411 if (match_chain(node, cnode) != 0)
335
336 if (cnode->ms.sym && sym &&
337 callchain_param.key == CCKEY_FUNCTION) {
338 if (cnode->ms.sym->start != sym->start)
339 break;
340 } else if (cnode->ip != node->ip)
341 break; 412 break;
342 413
343 if (!found) 414 found = true;
344 found = true;
345 415
346 callchain_cursor_advance(cursor); 416 callchain_cursor_advance(cursor);
347 } 417 }
348 418
349 /* matches not, relay on the parent */ 419 /* matches not, relay no the parent */
350 if (!found) { 420 if (!found) {
351 cursor->curr = curr_snap; 421 cursor->curr = curr_snap;
352 cursor->pos = start; 422 cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
395 struct callchain_node *dst, struct callchain_node *src) 465 struct callchain_node *dst, struct callchain_node *src)
396{ 466{
397 struct callchain_cursor_node **old_last = cursor->last; 467 struct callchain_cursor_node **old_last = cursor->last;
398 struct callchain_node *child, *next_child; 468 struct callchain_node *child;
399 struct callchain_list *list, *next_list; 469 struct callchain_list *list, *next_list;
470 struct rb_node *n;
400 int old_pos = cursor->nr; 471 int old_pos = cursor->nr;
401 int err = 0; 472 int err = 0;
402 473
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
412 append_chain_children(dst, cursor, src->hit); 483 append_chain_children(dst, cursor, src->hit);
413 } 484 }
414 485
415 chain_for_each_child_safe(child, next_child, src) { 486 n = rb_first(&src->rb_root_in);
487 while (n) {
488 child = container_of(n, struct callchain_node, rb_node_in);
489 n = rb_next(n);
490 rb_erase(&child->rb_node_in, &src->rb_root_in);
491
416 err = merge_chain_branch(cursor, dst, child); 492 err = merge_chain_branch(cursor, dst, child);
417 if (err) 493 if (err)
418 break; 494 break;
419 495
420 list_del(&child->siblings);
421 free(child); 496 free(child);
422 } 497 }
423 498
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc308cf..7bb36022377f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
21 21
22struct callchain_node { 22struct callchain_node {
23 struct callchain_node *parent; 23 struct callchain_node *parent;
24 struct list_head siblings;
25 struct list_head children;
26 struct list_head val; 24 struct list_head val;
27 struct rb_node rb_node; /* to sort nodes in an rbtree */ 25 struct rb_node rb_node_in; /* to insert nodes in an rbtree */
28 struct rb_root rb_root; /* sorted tree of children */ 26 struct rb_node rb_node; /* to sort nodes in an output tree */
27 struct rb_root rb_root_in; /* input tree of children */
28 struct rb_root rb_root; /* sorted output tree of children */
29 unsigned int val_nr; 29 unsigned int val_nr;
30 u64 hit; 30 u64 hit;
31 u64 children_hit; 31 u64 children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
86 86
87static inline void callchain_init(struct callchain_root *root) 87static inline void callchain_init(struct callchain_root *root)
88{ 88{
89 INIT_LIST_HEAD(&root->node.siblings);
90 INIT_LIST_HEAD(&root->node.children);
91 INIT_LIST_HEAD(&root->node.val); 89 INIT_LIST_HEAD(&root->node.val);
92 90
93 root->node.parent = NULL; 91 root->node.parent = NULL;
94 root->node.hit = 0; 92 root->node.hit = 0;
95 root->node.children_hit = 0; 93 root->node.children_hit = 0;
94 root->node.rb_root_in = RB_ROOT;
96 root->max_depth = 0; 95 root->max_depth = 0;
97} 96}
98 97