diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2011-01-13 22:51:58 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2011-01-22 16:56:31 -0500 |
commit | 1b3a0e9592ebf174af934b3908a2bf6a6fa86169 (patch) | |
tree | 22de930ec03920ea7a4d602d9a582f5125595916 /tools/perf/util/callchain.c | |
parent | de5fa3a8a05cd60f59622e88cfeb90416760d78e (diff) |
perf callchain: Feed callchains into a cursor
The callchains are fed with an array of a fixed size.
As a result we iterate over each callchains three times:
- 1st to resolve symbols
- 2nd to filter out context boundaries
- 3rd for the insertion into the tree
This also involves some pairs of memory allocation/deallocation
everytime we insert a callchain, for the filtered out array of
addresses and for the array of symbols that comes along.
Instead, feed the callchains through a linked list with persistent
allocations. It brings several pros like:
- Merge the 1st and 2nd iterations in one. That was possible before
but in a way that would involve allocating an array slightly taller
than necessary because we don't know in advance the number of context
boundaries to filter out.
- Much lesser allocations/deallocations. The linked list keeps
persistent empty entries for the next usages and is extendable at
will.
- Makes it easier for multiple sources of callchains to feed a
stacktrace together. This is deemed to pave the way for cfi based
callchains wherein traditional frame pointer based kernel
stacktraces will precede cfi based user ones, producing an overall
callchain which size is hardly predictable. This requirement
makes the static array obsolete and makes a linked list based
iterator a much more flexible fit.
Basic testing on a big perf file containing callchains (~ 176 MB)
has shown a throughput gain of about 11% with perf report.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1294977121-5700-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/callchain.c')
-rw-r--r-- | tools/perf/util/callchain.c | 204 |
1 files changed, 100 insertions, 104 deletions
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index e12d539417b2..53a49e0cfc6c 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com> | 2 | * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com> |
3 | * | 3 | * |
4 | * Handle the callchains from the stream in an ad-hoc radix tree and then | 4 | * Handle the callchains from the stream in an ad-hoc radix tree and then |
5 | * sort them in an rbtree. | 5 | * sort them in an rbtree. |
@@ -195,26 +195,21 @@ create_child(struct callchain_node *parent, bool inherit_children) | |||
195 | } | 195 | } |
196 | 196 | ||
197 | 197 | ||
198 | struct resolved_ip { | ||
199 | u64 ip; | ||
200 | struct map_symbol ms; | ||
201 | }; | ||
202 | |||
203 | struct resolved_chain { | ||
204 | u64 nr; | ||
205 | struct resolved_ip ips[0]; | ||
206 | }; | ||
207 | |||
208 | |||
209 | /* | 198 | /* |
210 | * Fill the node with callchain values | 199 | * Fill the node with callchain values |
211 | */ | 200 | */ |
212 | static void | 201 | static void |
213 | fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) | 202 | fill_node(struct callchain_node *node, struct callchain_cursor *cursor) |
214 | { | 203 | { |
215 | unsigned int i; | 204 | struct callchain_cursor_node *cursor_node; |
205 | |||
206 | node->val_nr = cursor->nr - cursor->pos; | ||
207 | if (!node->val_nr) | ||
208 | pr_warning("Warning: empty node in callchain tree\n"); | ||
216 | 209 | ||
217 | for (i = start; i < chain->nr; i++) { | 210 | cursor_node = callchain_cursor_current(cursor); |
211 | |||
212 | while (cursor_node) { | ||
218 | struct callchain_list *call; | 213 | struct callchain_list *call; |
219 | 214 | ||
220 | call = zalloc(sizeof(*call)); | 215 | call = zalloc(sizeof(*call)); |
@@ -222,23 +217,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) | |||
222 | perror("not enough memory for the code path tree"); | 217 | perror("not enough memory for the code path tree"); |
223 | return; | 218 | return; |
224 | } | 219 | } |
225 | call->ip = chain->ips[i].ip; | 220 | call->ip = cursor_node->ip; |
226 | call->ms = chain->ips[i].ms; | 221 | call->ms.sym = cursor_node->sym; |
222 | call->ms.map = cursor_node->map; | ||
227 | list_add_tail(&call->list, &node->val); | 223 | list_add_tail(&call->list, &node->val); |
224 | |||
225 | callchain_cursor_advance(cursor); | ||
226 | cursor_node = callchain_cursor_current(cursor); | ||
228 | } | 227 | } |
229 | node->val_nr = chain->nr - start; | ||
230 | if (!node->val_nr) | ||
231 | pr_warning("Warning: empty node in callchain tree\n"); | ||
232 | } | 228 | } |
233 | 229 | ||
234 | static void | 230 | static void |
235 | add_child(struct callchain_node *parent, struct resolved_chain *chain, | 231 | add_child(struct callchain_node *parent, |
236 | int start, u64 period) | 232 | struct callchain_cursor *cursor, |
233 | u64 period) | ||
237 | { | 234 | { |
238 | struct callchain_node *new; | 235 | struct callchain_node *new; |
239 | 236 | ||
240 | new = create_child(parent, false); | 237 | new = create_child(parent, false); |
241 | fill_node(new, chain, start); | 238 | fill_node(new, cursor); |
242 | 239 | ||
243 | new->children_hit = 0; | 240 | new->children_hit = 0; |
244 | new->hit = period; | 241 | new->hit = period; |
@@ -250,9 +247,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain, | |||
250 | * Then create another child to host the given callchain of new branch | 247 | * Then create another child to host the given callchain of new branch |
251 | */ | 248 | */ |
252 | static void | 249 | static void |
253 | split_add_child(struct callchain_node *parent, struct resolved_chain *chain, | 250 | split_add_child(struct callchain_node *parent, |
254 | struct callchain_list *to_split, int idx_parents, int idx_local, | 251 | struct callchain_cursor *cursor, |
255 | u64 period) | 252 | struct callchain_list *to_split, |
253 | u64 idx_parents, u64 idx_local, u64 period) | ||
256 | { | 254 | { |
257 | struct callchain_node *new; | 255 | struct callchain_node *new; |
258 | struct list_head *old_tail; | 256 | struct list_head *old_tail; |
@@ -277,9 +275,9 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, | |||
277 | parent->val_nr = idx_local; | 275 | parent->val_nr = idx_local; |
278 | 276 | ||
279 | /* create a new child for the new branch if any */ | 277 | /* create a new child for the new branch if any */ |
280 | if (idx_total < chain->nr) { | 278 | if (idx_total < cursor->nr) { |
281 | parent->hit = 0; | 279 | parent->hit = 0; |
282 | add_child(parent, chain, idx_total, period); | 280 | add_child(parent, cursor, period); |
283 | parent->children_hit += period; | 281 | parent->children_hit += period; |
284 | } else { | 282 | } else { |
285 | parent->hit = period; | 283 | parent->hit = period; |
@@ -287,36 +285,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, | |||
287 | } | 285 | } |
288 | 286 | ||
289 | static int | 287 | static int |
290 | append_chain(struct callchain_node *root, struct resolved_chain *chain, | 288 | append_chain(struct callchain_node *root, |
291 | unsigned int start, u64 period); | 289 | struct callchain_cursor *cursor, |
290 | u64 period); | ||
292 | 291 | ||
293 | static void | 292 | static void |
294 | append_chain_children(struct callchain_node *root, struct resolved_chain *chain, | 293 | append_chain_children(struct callchain_node *root, |
295 | unsigned int start, u64 period) | 294 | struct callchain_cursor *cursor, |
295 | u64 period) | ||
296 | { | 296 | { |
297 | struct callchain_node *rnode; | 297 | struct callchain_node *rnode; |
298 | 298 | ||
299 | /* lookup in childrens */ | 299 | /* lookup in childrens */ |
300 | chain_for_each_child(rnode, root) { | 300 | chain_for_each_child(rnode, root) { |
301 | unsigned int ret = append_chain(rnode, chain, start, period); | 301 | unsigned int ret = append_chain(rnode, cursor, period); |
302 | 302 | ||
303 | if (!ret) | 303 | if (!ret) |
304 | goto inc_children_hit; | 304 | goto inc_children_hit; |
305 | } | 305 | } |
306 | /* nothing in children, add to the current node */ | 306 | /* nothing in children, add to the current node */ |
307 | add_child(root, chain, start, period); | 307 | add_child(root, cursor, period); |
308 | 308 | ||
309 | inc_children_hit: | 309 | inc_children_hit: |
310 | root->children_hit += period; | 310 | root->children_hit += period; |
311 | } | 311 | } |
312 | 312 | ||
313 | static int | 313 | static int |
314 | append_chain(struct callchain_node *root, struct resolved_chain *chain, | 314 | append_chain(struct callchain_node *root, |
315 | unsigned int start, u64 period) | 315 | struct callchain_cursor *cursor, |
316 | u64 period) | ||
316 | { | 317 | { |
318 | struct callchain_cursor_node *curr_snap = cursor->curr; | ||
317 | struct callchain_list *cnode; | 319 | struct callchain_list *cnode; |
318 | unsigned int i = start; | 320 | u64 start = cursor->pos; |
319 | bool found = false; | 321 | bool found = false; |
322 | u64 matches; | ||
320 | 323 | ||
321 | /* | 324 | /* |
322 | * Lookup in the current node | 325 | * Lookup in the current node |
@@ -324,114 +327,95 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain, | |||
324 | * anywhere inside a function. | 327 | * anywhere inside a function. |
325 | */ | 328 | */ |
326 | list_for_each_entry(cnode, &root->val, list) { | 329 | list_for_each_entry(cnode, &root->val, list) { |
330 | struct callchain_cursor_node *node; | ||
327 | struct symbol *sym; | 331 | struct symbol *sym; |
328 | 332 | ||
329 | if (i == chain->nr) | 333 | node = callchain_cursor_current(cursor); |
334 | if (!node) | ||
330 | break; | 335 | break; |
331 | 336 | ||
332 | sym = chain->ips[i].ms.sym; | 337 | sym = node->sym; |
333 | 338 | ||
334 | if (cnode->ms.sym && sym) { | 339 | if (cnode->ms.sym && sym) { |
335 | if (cnode->ms.sym->start != sym->start) | 340 | if (cnode->ms.sym->start != sym->start) |
336 | break; | 341 | break; |
337 | } else if (cnode->ip != chain->ips[i].ip) | 342 | } else if (cnode->ip != node->ip) |
338 | break; | 343 | break; |
339 | 344 | ||
340 | if (!found) | 345 | if (!found) |
341 | found = true; | 346 | found = true; |
342 | i++; | 347 | |
348 | callchain_cursor_advance(cursor); | ||
343 | } | 349 | } |
344 | 350 | ||
345 | /* matches not, relay on the parent */ | 351 | /* matches not, relay on the parent */ |
346 | if (!found) | 352 | if (!found) { |
353 | cursor->curr = curr_snap; | ||
354 | cursor->pos = start; | ||
347 | return -1; | 355 | return -1; |
356 | } | ||
357 | |||
358 | matches = cursor->pos - start; | ||
348 | 359 | ||
349 | /* we match only a part of the node. Split it and add the new chain */ | 360 | /* we match only a part of the node. Split it and add the new chain */ |
350 | if (i - start < root->val_nr) { | 361 | if (matches < root->val_nr) { |
351 | split_add_child(root, chain, cnode, start, i - start, period); | 362 | split_add_child(root, cursor, cnode, start, matches, period); |
352 | return 0; | 363 | return 0; |
353 | } | 364 | } |
354 | 365 | ||
355 | /* we match 100% of the path, increment the hit */ | 366 | /* we match 100% of the path, increment the hit */ |
356 | if (i - start == root->val_nr && i == chain->nr) { | 367 | if (matches == root->val_nr && cursor->pos == cursor->nr) { |
357 | root->hit += period; | 368 | root->hit += period; |
358 | return 0; | 369 | return 0; |
359 | } | 370 | } |
360 | 371 | ||
361 | /* We match the node and still have a part remaining */ | 372 | /* We match the node and still have a part remaining */ |
362 | append_chain_children(root, chain, i, period); | 373 | append_chain_children(root, cursor, period); |
363 | 374 | ||
364 | return 0; | 375 | return 0; |
365 | } | 376 | } |
366 | 377 | ||
367 | static void filter_context(struct ip_callchain *old, struct resolved_chain *new, | 378 | int callchain_append(struct callchain_root *root, |
368 | struct map_symbol *syms) | 379 | struct callchain_cursor *cursor, |
369 | { | 380 | u64 period) |
370 | int i, j = 0; | ||
371 | |||
372 | for (i = 0; i < (int)old->nr; i++) { | ||
373 | if (old->ips[i] >= PERF_CONTEXT_MAX) | ||
374 | continue; | ||
375 | |||
376 | new->ips[j].ip = old->ips[i]; | ||
377 | new->ips[j].ms = syms[i]; | ||
378 | j++; | ||
379 | } | ||
380 | |||
381 | new->nr = j; | ||
382 | } | ||
383 | |||
384 | |||
385 | int callchain_append(struct callchain_root *root, struct ip_callchain *chain, | ||
386 | struct map_symbol *syms, u64 period) | ||
387 | { | 381 | { |
388 | struct resolved_chain *filtered; | 382 | if (!cursor->nr) |
389 | |||
390 | if (!chain->nr) | ||
391 | return 0; | 383 | return 0; |
392 | 384 | ||
393 | filtered = zalloc(sizeof(*filtered) + | 385 | callchain_cursor_commit(cursor); |
394 | chain->nr * sizeof(struct resolved_ip)); | ||
395 | if (!filtered) | ||
396 | return -ENOMEM; | ||
397 | |||
398 | filter_context(chain, filtered, syms); | ||
399 | |||
400 | if (!filtered->nr) | ||
401 | goto end; | ||
402 | 386 | ||
403 | append_chain_children(&root->node, filtered, 0, period); | 387 | append_chain_children(&root->node, cursor, period); |
404 | 388 | ||
405 | if (filtered->nr > root->max_depth) | 389 | if (cursor->nr > root->max_depth) |
406 | root->max_depth = filtered->nr; | 390 | root->max_depth = cursor->nr; |
407 | end: | ||
408 | free(filtered); | ||
409 | 391 | ||
410 | return 0; | 392 | return 0; |
411 | } | 393 | } |
412 | 394 | ||
413 | static int | 395 | static int |
414 | merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, | 396 | merge_chain_branch(struct callchain_cursor *cursor, |
415 | struct resolved_chain *chain) | 397 | struct callchain_node *dst, struct callchain_node *src) |
416 | { | 398 | { |
399 | struct callchain_cursor_node **old_last = cursor->last; | ||
417 | struct callchain_node *child, *next_child; | 400 | struct callchain_node *child, *next_child; |
418 | struct callchain_list *list, *next_list; | 401 | struct callchain_list *list, *next_list; |
419 | int old_pos = chain->nr; | 402 | int old_pos = cursor->nr; |
420 | int err = 0; | 403 | int err = 0; |
421 | 404 | ||
422 | list_for_each_entry_safe(list, next_list, &src->val, list) { | 405 | list_for_each_entry_safe(list, next_list, &src->val, list) { |
423 | chain->ips[chain->nr].ip = list->ip; | 406 | callchain_cursor_append(cursor, list->ip, |
424 | chain->ips[chain->nr].ms = list->ms; | 407 | list->ms.map, list->ms.sym); |
425 | chain->nr++; | ||
426 | list_del(&list->list); | 408 | list_del(&list->list); |
427 | free(list); | 409 | free(list); |
428 | } | 410 | } |
429 | 411 | ||
430 | if (src->hit) | 412 | if (src->hit) { |
431 | append_chain_children(dst, chain, 0, src->hit); | 413 | callchain_cursor_commit(cursor); |
414 | append_chain_children(dst, cursor, src->hit); | ||
415 | } | ||
432 | 416 | ||
433 | chain_for_each_child_safe(child, next_child, src) { | 417 | chain_for_each_child_safe(child, next_child, src) { |
434 | err = merge_chain_branch(dst, child, chain); | 418 | err = merge_chain_branch(cursor, dst, child); |
435 | if (err) | 419 | if (err) |
436 | break; | 420 | break; |
437 | 421 | ||
@@ -439,26 +423,38 @@ merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, | |||
439 | free(child); | 423 | free(child); |
440 | } | 424 | } |
441 | 425 | ||
442 | chain->nr = old_pos; | 426 | cursor->nr = old_pos; |
427 | cursor->last = old_last; | ||
443 | 428 | ||
444 | return err; | 429 | return err; |
445 | } | 430 | } |
446 | 431 | ||
447 | int callchain_merge(struct callchain_root *dst, struct callchain_root *src) | 432 | int callchain_merge(struct callchain_cursor *cursor, |
433 | struct callchain_root *dst, struct callchain_root *src) | ||
434 | { | ||
435 | return merge_chain_branch(cursor, &dst->node, &src->node); | ||
436 | } | ||
437 | |||
438 | int callchain_cursor_append(struct callchain_cursor *cursor, | ||
439 | u64 ip, struct map *map, struct symbol *sym) | ||
448 | { | 440 | { |
449 | struct resolved_chain *chain; | 441 | struct callchain_cursor_node *node = *cursor->last; |
450 | int err; | ||
451 | 442 | ||
452 | chain = malloc(sizeof(*chain) + | 443 | if (!node) { |
453 | src->max_depth * sizeof(struct resolved_ip)); | 444 | node = calloc(sizeof(*node), 1); |
454 | if (!chain) | 445 | if (!node) |
455 | return -ENOMEM; | 446 | return -ENOMEM; |
456 | 447 | ||
457 | chain->nr = 0; | 448 | *cursor->last = node; |
449 | } | ||
458 | 450 | ||
459 | err = merge_chain_branch(&dst->node, &src->node, chain); | 451 | node->ip = ip; |
452 | node->map = map; | ||
453 | node->sym = sym; | ||
460 | 454 | ||
461 | free(chain); | 455 | cursor->nr++; |
462 | 456 | ||
463 | return err; | 457 | cursor->last = &node->next; |
458 | |||
459 | return 0; | ||
464 | } | 460 | } |