diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 11:26:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 11:26:46 -0400 |
commit | 96b90f27bcf22f1d06cc16d9475cefa6ea4c4718 (patch) | |
tree | a886ad5f611dea36c6d4b615dfdcdbbcf5bd3135 /tools | |
parent | 396c9df2231865ef55aa031e3f5df9d99e036869 (diff) | |
parent | 0c99241c93b8060441f3c8434848e54b5338f922 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"This update has mostly fixes, but also other bits:
- perf tooling fixes
- PMU driver fixes
- Intel Broadwell PMU driver HW-enablement for LBR callstacks
- a late coming 'perf kmem' tool update that enables it to also
analyze page allocation data. Note, this comes with MM tracepoint
changes that we believe to not break anything: because it changes
the formerly opaque 'struct page *' field that uniquely identifies
pages to 'pfn' which identifies pages uniquely too, but isn't as
opaque and can be used for other purposes as well"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/pt: Fix and clean up error handling in pt_event_add()
perf/x86/intel: Add Broadwell support for the LBR callstack
perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units
perf/x86/intel: Fix Core2,Atom,NHM,WSM cycles:pp events
perf/x86: Fix hw_perf_event::flags collision
perf probe: Fix segfault when probe with lazy_line to file
perf probe: Find compilation directory path for lazy matching
perf probe: Set retprobe flag when probe in address-based alternative mode
perf kmem: Analyze page allocator events also
tracing, mm: Record pfn instead of pointer to struct page
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-kmem.txt | 8 | ||||
-rw-r--r-- | tools/perf/builtin-kmem.c | 500 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 60 | ||||
-rw-r--r-- | tools/perf/util/probe-finder.c | 73 | ||||
-rw-r--r-- | tools/perf/util/probe-finder.h | 4 |
5 files changed, 567 insertions, 78 deletions
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 150253cc3c97..23219c65c16f 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt | |||
@@ -3,7 +3,7 @@ perf-kmem(1) | |||
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-kmem - Tool to trace/measure kernel memory(slab) properties | 6 | perf-kmem - Tool to trace/measure kernel memory properties |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
@@ -46,6 +46,12 @@ OPTIONS | |||
46 | --raw-ip:: | 46 | --raw-ip:: |
47 | Print raw ip instead of symbol | 47 | Print raw ip instead of symbol |
48 | 48 | ||
49 | --slab:: | ||
50 | Analyze SLAB allocator events. | ||
51 | |||
52 | --page:: | ||
53 | Analyze page allocator events | ||
54 | |||
49 | SEE ALSO | 55 | SEE ALSO |
50 | -------- | 56 | -------- |
51 | linkperf:perf-record[1] | 57 | linkperf:perf-record[1] |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4ebf65c79434..63ea01349b6e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -22,6 +22,11 @@ | |||
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <locale.h> | 23 | #include <locale.h> |
24 | 24 | ||
25 | static int kmem_slab; | ||
26 | static int kmem_page; | ||
27 | |||
28 | static long kmem_page_size; | ||
29 | |||
25 | struct alloc_stat; | 30 | struct alloc_stat; |
26 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); | 31 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); |
27 | 32 | ||
@@ -226,6 +231,244 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, | |||
226 | return 0; | 231 | return 0; |
227 | } | 232 | } |
228 | 233 | ||
234 | static u64 total_page_alloc_bytes; | ||
235 | static u64 total_page_free_bytes; | ||
236 | static u64 total_page_nomatch_bytes; | ||
237 | static u64 total_page_fail_bytes; | ||
238 | static unsigned long nr_page_allocs; | ||
239 | static unsigned long nr_page_frees; | ||
240 | static unsigned long nr_page_fails; | ||
241 | static unsigned long nr_page_nomatch; | ||
242 | |||
243 | static bool use_pfn; | ||
244 | |||
245 | #define MAX_MIGRATE_TYPES 6 | ||
246 | #define MAX_PAGE_ORDER 11 | ||
247 | |||
248 | static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; | ||
249 | |||
250 | struct page_stat { | ||
251 | struct rb_node node; | ||
252 | u64 page; | ||
253 | int order; | ||
254 | unsigned gfp_flags; | ||
255 | unsigned migrate_type; | ||
256 | u64 alloc_bytes; | ||
257 | u64 free_bytes; | ||
258 | int nr_alloc; | ||
259 | int nr_free; | ||
260 | }; | ||
261 | |||
262 | static struct rb_root page_tree; | ||
263 | static struct rb_root page_alloc_tree; | ||
264 | static struct rb_root page_alloc_sorted; | ||
265 | |||
266 | static struct page_stat *search_page(unsigned long page, bool create) | ||
267 | { | ||
268 | struct rb_node **node = &page_tree.rb_node; | ||
269 | struct rb_node *parent = NULL; | ||
270 | struct page_stat *data; | ||
271 | |||
272 | while (*node) { | ||
273 | s64 cmp; | ||
274 | |||
275 | parent = *node; | ||
276 | data = rb_entry(*node, struct page_stat, node); | ||
277 | |||
278 | cmp = data->page - page; | ||
279 | if (cmp < 0) | ||
280 | node = &parent->rb_left; | ||
281 | else if (cmp > 0) | ||
282 | node = &parent->rb_right; | ||
283 | else | ||
284 | return data; | ||
285 | } | ||
286 | |||
287 | if (!create) | ||
288 | return NULL; | ||
289 | |||
290 | data = zalloc(sizeof(*data)); | ||
291 | if (data != NULL) { | ||
292 | data->page = page; | ||
293 | |||
294 | rb_link_node(&data->node, parent, node); | ||
295 | rb_insert_color(&data->node, &page_tree); | ||
296 | } | ||
297 | |||
298 | return data; | ||
299 | } | ||
300 | |||
301 | static int page_stat_cmp(struct page_stat *a, struct page_stat *b) | ||
302 | { | ||
303 | if (a->page > b->page) | ||
304 | return -1; | ||
305 | if (a->page < b->page) | ||
306 | return 1; | ||
307 | if (a->order > b->order) | ||
308 | return -1; | ||
309 | if (a->order < b->order) | ||
310 | return 1; | ||
311 | if (a->migrate_type > b->migrate_type) | ||
312 | return -1; | ||
313 | if (a->migrate_type < b->migrate_type) | ||
314 | return 1; | ||
315 | if (a->gfp_flags > b->gfp_flags) | ||
316 | return -1; | ||
317 | if (a->gfp_flags < b->gfp_flags) | ||
318 | return 1; | ||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create) | ||
323 | { | ||
324 | struct rb_node **node = &page_alloc_tree.rb_node; | ||
325 | struct rb_node *parent = NULL; | ||
326 | struct page_stat *data; | ||
327 | |||
328 | while (*node) { | ||
329 | s64 cmp; | ||
330 | |||
331 | parent = *node; | ||
332 | data = rb_entry(*node, struct page_stat, node); | ||
333 | |||
334 | cmp = page_stat_cmp(data, stat); | ||
335 | if (cmp < 0) | ||
336 | node = &parent->rb_left; | ||
337 | else if (cmp > 0) | ||
338 | node = &parent->rb_right; | ||
339 | else | ||
340 | return data; | ||
341 | } | ||
342 | |||
343 | if (!create) | ||
344 | return NULL; | ||
345 | |||
346 | data = zalloc(sizeof(*data)); | ||
347 | if (data != NULL) { | ||
348 | data->page = stat->page; | ||
349 | data->order = stat->order; | ||
350 | data->gfp_flags = stat->gfp_flags; | ||
351 | data->migrate_type = stat->migrate_type; | ||
352 | |||
353 | rb_link_node(&data->node, parent, node); | ||
354 | rb_insert_color(&data->node, &page_alloc_tree); | ||
355 | } | ||
356 | |||
357 | return data; | ||
358 | } | ||
359 | |||
360 | static bool valid_page(u64 pfn_or_page) | ||
361 | { | ||
362 | if (use_pfn && pfn_or_page == -1UL) | ||
363 | return false; | ||
364 | if (!use_pfn && pfn_or_page == 0) | ||
365 | return false; | ||
366 | return true; | ||
367 | } | ||
368 | |||
369 | static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, | ||
370 | struct perf_sample *sample) | ||
371 | { | ||
372 | u64 page; | ||
373 | unsigned int order = perf_evsel__intval(evsel, sample, "order"); | ||
374 | unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); | ||
375 | unsigned int migrate_type = perf_evsel__intval(evsel, sample, | ||
376 | "migratetype"); | ||
377 | u64 bytes = kmem_page_size << order; | ||
378 | struct page_stat *stat; | ||
379 | struct page_stat this = { | ||
380 | .order = order, | ||
381 | .gfp_flags = gfp_flags, | ||
382 | .migrate_type = migrate_type, | ||
383 | }; | ||
384 | |||
385 | if (use_pfn) | ||
386 | page = perf_evsel__intval(evsel, sample, "pfn"); | ||
387 | else | ||
388 | page = perf_evsel__intval(evsel, sample, "page"); | ||
389 | |||
390 | nr_page_allocs++; | ||
391 | total_page_alloc_bytes += bytes; | ||
392 | |||
393 | if (!valid_page(page)) { | ||
394 | nr_page_fails++; | ||
395 | total_page_fail_bytes += bytes; | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
400 | /* | ||
401 | * This is to find the current page (with correct gfp flags and | ||
402 | * migrate type) at free event. | ||
403 | */ | ||
404 | stat = search_page(page, true); | ||
405 | if (stat == NULL) | ||
406 | return -ENOMEM; | ||
407 | |||
408 | stat->order = order; | ||
409 | stat->gfp_flags = gfp_flags; | ||
410 | stat->migrate_type = migrate_type; | ||
411 | |||
412 | this.page = page; | ||
413 | stat = search_page_alloc_stat(&this, true); | ||
414 | if (stat == NULL) | ||
415 | return -ENOMEM; | ||
416 | |||
417 | stat->nr_alloc++; | ||
418 | stat->alloc_bytes += bytes; | ||
419 | |||
420 | order_stats[order][migrate_type]++; | ||
421 | |||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, | ||
426 | struct perf_sample *sample) | ||
427 | { | ||
428 | u64 page; | ||
429 | unsigned int order = perf_evsel__intval(evsel, sample, "order"); | ||
430 | u64 bytes = kmem_page_size << order; | ||
431 | struct page_stat *stat; | ||
432 | struct page_stat this = { | ||
433 | .order = order, | ||
434 | }; | ||
435 | |||
436 | if (use_pfn) | ||
437 | page = perf_evsel__intval(evsel, sample, "pfn"); | ||
438 | else | ||
439 | page = perf_evsel__intval(evsel, sample, "page"); | ||
440 | |||
441 | nr_page_frees++; | ||
442 | total_page_free_bytes += bytes; | ||
443 | |||
444 | stat = search_page(page, false); | ||
445 | if (stat == NULL) { | ||
446 | pr_debug2("missing free at page %"PRIx64" (order: %d)\n", | ||
447 | page, order); | ||
448 | |||
449 | nr_page_nomatch++; | ||
450 | total_page_nomatch_bytes += bytes; | ||
451 | |||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | this.page = page; | ||
456 | this.gfp_flags = stat->gfp_flags; | ||
457 | this.migrate_type = stat->migrate_type; | ||
458 | |||
459 | rb_erase(&stat->node, &page_tree); | ||
460 | free(stat); | ||
461 | |||
462 | stat = search_page_alloc_stat(&this, false); | ||
463 | if (stat == NULL) | ||
464 | return -ENOENT; | ||
465 | |||
466 | stat->nr_free++; | ||
467 | stat->free_bytes += bytes; | ||
468 | |||
469 | return 0; | ||
470 | } | ||
471 | |||
229 | typedef int (*tracepoint_handler)(struct perf_evsel *evsel, | 472 | typedef int (*tracepoint_handler)(struct perf_evsel *evsel, |
230 | struct perf_sample *sample); | 473 | struct perf_sample *sample); |
231 | 474 | ||
@@ -270,8 +513,9 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc) | |||
270 | return 100.0 - (100.0 * n_req / n_alloc); | 513 | return 100.0 - (100.0 * n_req / n_alloc); |
271 | } | 514 | } |
272 | 515 | ||
273 | static void __print_result(struct rb_root *root, struct perf_session *session, | 516 | static void __print_slab_result(struct rb_root *root, |
274 | int n_lines, int is_caller) | 517 | struct perf_session *session, |
518 | int n_lines, int is_caller) | ||
275 | { | 519 | { |
276 | struct rb_node *next; | 520 | struct rb_node *next; |
277 | struct machine *machine = &session->machines.host; | 521 | struct machine *machine = &session->machines.host; |
@@ -323,9 +567,56 @@ static void __print_result(struct rb_root *root, struct perf_session *session, | |||
323 | printf("%.105s\n", graph_dotted_line); | 567 | printf("%.105s\n", graph_dotted_line); |
324 | } | 568 | } |
325 | 569 | ||
326 | static void print_summary(void) | 570 | static const char * const migrate_type_str[] = { |
571 | "UNMOVABL", | ||
572 | "RECLAIM", | ||
573 | "MOVABLE", | ||
574 | "RESERVED", | ||
575 | "CMA/ISLT", | ||
576 | "UNKNOWN", | ||
577 | }; | ||
578 | |||
579 | static void __print_page_result(struct rb_root *root, | ||
580 | struct perf_session *session __maybe_unused, | ||
581 | int n_lines) | ||
582 | { | ||
583 | struct rb_node *next = rb_first(root); | ||
584 | const char *format; | ||
585 | |||
586 | printf("\n%.80s\n", graph_dotted_line); | ||
587 | printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n", | ||
588 | use_pfn ? "PFN" : "Page"); | ||
589 | printf("%.80s\n", graph_dotted_line); | ||
590 | |||
591 | if (use_pfn) | ||
592 | format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n"; | ||
593 | else | ||
594 | format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n"; | ||
595 | |||
596 | while (next && n_lines--) { | ||
597 | struct page_stat *data; | ||
598 | |||
599 | data = rb_entry(next, struct page_stat, node); | ||
600 | |||
601 | printf(format, (unsigned long long)data->page, | ||
602 | (unsigned long long)data->alloc_bytes / 1024, | ||
603 | data->nr_alloc, data->order, | ||
604 | migrate_type_str[data->migrate_type], | ||
605 | (unsigned long)data->gfp_flags); | ||
606 | |||
607 | next = rb_next(next); | ||
608 | } | ||
609 | |||
610 | if (n_lines == -1) | ||
611 | printf(" ... | ... | ... | ... | ... | ... \n"); | ||
612 | |||
613 | printf("%.80s\n", graph_dotted_line); | ||
614 | } | ||
615 | |||
616 | static void print_slab_summary(void) | ||
327 | { | 617 | { |
328 | printf("\nSUMMARY\n=======\n"); | 618 | printf("\nSUMMARY (SLAB allocator)"); |
619 | printf("\n========================\n"); | ||
329 | printf("Total bytes requested: %'lu\n", total_requested); | 620 | printf("Total bytes requested: %'lu\n", total_requested); |
330 | printf("Total bytes allocated: %'lu\n", total_allocated); | 621 | printf("Total bytes allocated: %'lu\n", total_allocated); |
331 | printf("Total bytes wasted on internal fragmentation: %'lu\n", | 622 | printf("Total bytes wasted on internal fragmentation: %'lu\n", |
@@ -335,13 +626,73 @@ static void print_summary(void) | |||
335 | printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); | 626 | printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); |
336 | } | 627 | } |
337 | 628 | ||
338 | static void print_result(struct perf_session *session) | 629 | static void print_page_summary(void) |
630 | { | ||
631 | int o, m; | ||
632 | u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; | ||
633 | u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; | ||
634 | |||
635 | printf("\nSUMMARY (page allocator)"); | ||
636 | printf("\n========================\n"); | ||
637 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", | ||
638 | nr_page_allocs, total_page_alloc_bytes / 1024); | ||
639 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", | ||
640 | nr_page_frees, total_page_free_bytes / 1024); | ||
641 | printf("\n"); | ||
642 | |||
643 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", | ||
644 | nr_alloc_freed, (total_alloc_freed_bytes) / 1024); | ||
645 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", | ||
646 | nr_page_allocs - nr_alloc_freed, | ||
647 | (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); | ||
648 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", | ||
649 | nr_page_nomatch, total_page_nomatch_bytes / 1024); | ||
650 | printf("\n"); | ||
651 | |||
652 | printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", | ||
653 | nr_page_fails, total_page_fail_bytes / 1024); | ||
654 | printf("\n"); | ||
655 | |||
656 | printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", | ||
657 | "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); | ||
658 | printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, | ||
659 | graph_dotted_line, graph_dotted_line, graph_dotted_line, | ||
660 | graph_dotted_line, graph_dotted_line); | ||
661 | |||
662 | for (o = 0; o < MAX_PAGE_ORDER; o++) { | ||
663 | printf("%5d", o); | ||
664 | for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { | ||
665 | if (order_stats[o][m]) | ||
666 | printf(" %'12d", order_stats[o][m]); | ||
667 | else | ||
668 | printf(" %12c", '.'); | ||
669 | } | ||
670 | printf("\n"); | ||
671 | } | ||
672 | } | ||
673 | |||
674 | static void print_slab_result(struct perf_session *session) | ||
339 | { | 675 | { |
340 | if (caller_flag) | 676 | if (caller_flag) |
341 | __print_result(&root_caller_sorted, session, caller_lines, 1); | 677 | __print_slab_result(&root_caller_sorted, session, caller_lines, 1); |
678 | if (alloc_flag) | ||
679 | __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); | ||
680 | print_slab_summary(); | ||
681 | } | ||
682 | |||
683 | static void print_page_result(struct perf_session *session) | ||
684 | { | ||
342 | if (alloc_flag) | 685 | if (alloc_flag) |
343 | __print_result(&root_alloc_sorted, session, alloc_lines, 0); | 686 | __print_page_result(&page_alloc_sorted, session, alloc_lines); |
344 | print_summary(); | 687 | print_page_summary(); |
688 | } | ||
689 | |||
690 | static void print_result(struct perf_session *session) | ||
691 | { | ||
692 | if (kmem_slab) | ||
693 | print_slab_result(session); | ||
694 | if (kmem_page) | ||
695 | print_page_result(session); | ||
345 | } | 696 | } |
346 | 697 | ||
347 | struct sort_dimension { | 698 | struct sort_dimension { |
@@ -353,8 +704,8 @@ struct sort_dimension { | |||
353 | static LIST_HEAD(caller_sort); | 704 | static LIST_HEAD(caller_sort); |
354 | static LIST_HEAD(alloc_sort); | 705 | static LIST_HEAD(alloc_sort); |
355 | 706 | ||
356 | static void sort_insert(struct rb_root *root, struct alloc_stat *data, | 707 | static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, |
357 | struct list_head *sort_list) | 708 | struct list_head *sort_list) |
358 | { | 709 | { |
359 | struct rb_node **new = &(root->rb_node); | 710 | struct rb_node **new = &(root->rb_node); |
360 | struct rb_node *parent = NULL; | 711 | struct rb_node *parent = NULL; |
@@ -383,8 +734,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, | |||
383 | rb_insert_color(&data->node, root); | 734 | rb_insert_color(&data->node, root); |
384 | } | 735 | } |
385 | 736 | ||
386 | static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | 737 | static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, |
387 | struct list_head *sort_list) | 738 | struct list_head *sort_list) |
388 | { | 739 | { |
389 | struct rb_node *node; | 740 | struct rb_node *node; |
390 | struct alloc_stat *data; | 741 | struct alloc_stat *data; |
@@ -396,26 +747,79 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | |||
396 | 747 | ||
397 | rb_erase(node, root); | 748 | rb_erase(node, root); |
398 | data = rb_entry(node, struct alloc_stat, node); | 749 | data = rb_entry(node, struct alloc_stat, node); |
399 | sort_insert(root_sorted, data, sort_list); | 750 | sort_slab_insert(root_sorted, data, sort_list); |
751 | } | ||
752 | } | ||
753 | |||
754 | static void sort_page_insert(struct rb_root *root, struct page_stat *data) | ||
755 | { | ||
756 | struct rb_node **new = &root->rb_node; | ||
757 | struct rb_node *parent = NULL; | ||
758 | |||
759 | while (*new) { | ||
760 | struct page_stat *this; | ||
761 | int cmp = 0; | ||
762 | |||
763 | this = rb_entry(*new, struct page_stat, node); | ||
764 | parent = *new; | ||
765 | |||
766 | /* TODO: support more sort key */ | ||
767 | cmp = data->alloc_bytes - this->alloc_bytes; | ||
768 | |||
769 | if (cmp > 0) | ||
770 | new = &parent->rb_left; | ||
771 | else | ||
772 | new = &parent->rb_right; | ||
773 | } | ||
774 | |||
775 | rb_link_node(&data->node, parent, new); | ||
776 | rb_insert_color(&data->node, root); | ||
777 | } | ||
778 | |||
779 | static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted) | ||
780 | { | ||
781 | struct rb_node *node; | ||
782 | struct page_stat *data; | ||
783 | |||
784 | for (;;) { | ||
785 | node = rb_first(root); | ||
786 | if (!node) | ||
787 | break; | ||
788 | |||
789 | rb_erase(node, root); | ||
790 | data = rb_entry(node, struct page_stat, node); | ||
791 | sort_page_insert(root_sorted, data); | ||
400 | } | 792 | } |
401 | } | 793 | } |
402 | 794 | ||
403 | static void sort_result(void) | 795 | static void sort_result(void) |
404 | { | 796 | { |
405 | __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); | 797 | if (kmem_slab) { |
406 | __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); | 798 | __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, |
799 | &alloc_sort); | ||
800 | __sort_slab_result(&root_caller_stat, &root_caller_sorted, | ||
801 | &caller_sort); | ||
802 | } | ||
803 | if (kmem_page) { | ||
804 | __sort_page_result(&page_alloc_tree, &page_alloc_sorted); | ||
805 | } | ||
407 | } | 806 | } |
408 | 807 | ||
409 | static int __cmd_kmem(struct perf_session *session) | 808 | static int __cmd_kmem(struct perf_session *session) |
410 | { | 809 | { |
411 | int err = -EINVAL; | 810 | int err = -EINVAL; |
811 | struct perf_evsel *evsel; | ||
412 | const struct perf_evsel_str_handler kmem_tracepoints[] = { | 812 | const struct perf_evsel_str_handler kmem_tracepoints[] = { |
813 | /* slab allocator */ | ||
413 | { "kmem:kmalloc", perf_evsel__process_alloc_event, }, | 814 | { "kmem:kmalloc", perf_evsel__process_alloc_event, }, |
414 | { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, | 815 | { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, |
415 | { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, | 816 | { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, |
416 | { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, | 817 | { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, |
417 | { "kmem:kfree", perf_evsel__process_free_event, }, | 818 | { "kmem:kfree", perf_evsel__process_free_event, }, |
418 | { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, | 819 | { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, |
820 | /* page allocator */ | ||
821 | { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, | ||
822 | { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, | ||
419 | }; | 823 | }; |
420 | 824 | ||
421 | if (!perf_session__has_traces(session, "kmem record")) | 825 | if (!perf_session__has_traces(session, "kmem record")) |
@@ -426,10 +830,20 @@ static int __cmd_kmem(struct perf_session *session) | |||
426 | goto out; | 830 | goto out; |
427 | } | 831 | } |
428 | 832 | ||
833 | evlist__for_each(session->evlist, evsel) { | ||
834 | if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && | ||
835 | perf_evsel__field(evsel, "pfn")) { | ||
836 | use_pfn = true; | ||
837 | break; | ||
838 | } | ||
839 | } | ||
840 | |||
429 | setup_pager(); | 841 | setup_pager(); |
430 | err = perf_session__process_events(session); | 842 | err = perf_session__process_events(session); |
431 | if (err != 0) | 843 | if (err != 0) { |
844 | pr_err("error during process events: %d\n", err); | ||
432 | goto out; | 845 | goto out; |
846 | } | ||
433 | sort_result(); | 847 | sort_result(); |
434 | print_result(session); | 848 | print_result(session); |
435 | out: | 849 | out: |
@@ -612,6 +1026,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused, | |||
612 | return 0; | 1026 | return 0; |
613 | } | 1027 | } |
614 | 1028 | ||
1029 | static int parse_slab_opt(const struct option *opt __maybe_unused, | ||
1030 | const char *arg __maybe_unused, | ||
1031 | int unset __maybe_unused) | ||
1032 | { | ||
1033 | kmem_slab = (kmem_page + 1); | ||
1034 | return 0; | ||
1035 | } | ||
1036 | |||
1037 | static int parse_page_opt(const struct option *opt __maybe_unused, | ||
1038 | const char *arg __maybe_unused, | ||
1039 | int unset __maybe_unused) | ||
1040 | { | ||
1041 | kmem_page = (kmem_slab + 1); | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
615 | static int parse_line_opt(const struct option *opt __maybe_unused, | 1045 | static int parse_line_opt(const struct option *opt __maybe_unused, |
616 | const char *arg, int unset __maybe_unused) | 1046 | const char *arg, int unset __maybe_unused) |
617 | { | 1047 | { |
@@ -634,6 +1064,8 @@ static int __cmd_record(int argc, const char **argv) | |||
634 | { | 1064 | { |
635 | const char * const record_args[] = { | 1065 | const char * const record_args[] = { |
636 | "record", "-a", "-R", "-c", "1", | 1066 | "record", "-a", "-R", "-c", "1", |
1067 | }; | ||
1068 | const char * const slab_events[] = { | ||
637 | "-e", "kmem:kmalloc", | 1069 | "-e", "kmem:kmalloc", |
638 | "-e", "kmem:kmalloc_node", | 1070 | "-e", "kmem:kmalloc_node", |
639 | "-e", "kmem:kfree", | 1071 | "-e", "kmem:kfree", |
@@ -641,10 +1073,19 @@ static int __cmd_record(int argc, const char **argv) | |||
641 | "-e", "kmem:kmem_cache_alloc_node", | 1073 | "-e", "kmem:kmem_cache_alloc_node", |
642 | "-e", "kmem:kmem_cache_free", | 1074 | "-e", "kmem:kmem_cache_free", |
643 | }; | 1075 | }; |
1076 | const char * const page_events[] = { | ||
1077 | "-e", "kmem:mm_page_alloc", | ||
1078 | "-e", "kmem:mm_page_free", | ||
1079 | }; | ||
644 | unsigned int rec_argc, i, j; | 1080 | unsigned int rec_argc, i, j; |
645 | const char **rec_argv; | 1081 | const char **rec_argv; |
646 | 1082 | ||
647 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 1083 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
1084 | if (kmem_slab) | ||
1085 | rec_argc += ARRAY_SIZE(slab_events); | ||
1086 | if (kmem_page) | ||
1087 | rec_argc += ARRAY_SIZE(page_events); | ||
1088 | |||
648 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 1089 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
649 | 1090 | ||
650 | if (rec_argv == NULL) | 1091 | if (rec_argv == NULL) |
@@ -653,6 +1094,15 @@ static int __cmd_record(int argc, const char **argv) | |||
653 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 1094 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
654 | rec_argv[i] = strdup(record_args[i]); | 1095 | rec_argv[i] = strdup(record_args[i]); |
655 | 1096 | ||
1097 | if (kmem_slab) { | ||
1098 | for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) | ||
1099 | rec_argv[i] = strdup(slab_events[j]); | ||
1100 | } | ||
1101 | if (kmem_page) { | ||
1102 | for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) | ||
1103 | rec_argv[i] = strdup(page_events[j]); | ||
1104 | } | ||
1105 | |||
656 | for (j = 1; j < (unsigned int)argc; j++, i++) | 1106 | for (j = 1; j < (unsigned int)argc; j++, i++) |
657 | rec_argv[i] = argv[j]; | 1107 | rec_argv[i] = argv[j]; |
658 | 1108 | ||
@@ -679,6 +1129,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
679 | OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), | 1129 | OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), |
680 | OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), | 1130 | OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), |
681 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), | 1131 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), |
1132 | OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", | ||
1133 | parse_slab_opt), | ||
1134 | OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", | ||
1135 | parse_page_opt), | ||
682 | OPT_END() | 1136 | OPT_END() |
683 | }; | 1137 | }; |
684 | const char *const kmem_subcommands[] = { "record", "stat", NULL }; | 1138 | const char *const kmem_subcommands[] = { "record", "stat", NULL }; |
@@ -695,6 +1149,9 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
695 | if (!argc) | 1149 | if (!argc) |
696 | usage_with_options(kmem_usage, kmem_options); | 1150 | usage_with_options(kmem_usage, kmem_options); |
697 | 1151 | ||
1152 | if (kmem_slab == 0 && kmem_page == 0) | ||
1153 | kmem_slab = 1; /* for backward compatibility */ | ||
1154 | |||
698 | if (!strncmp(argv[0], "rec", 3)) { | 1155 | if (!strncmp(argv[0], "rec", 3)) { |
699 | symbol__init(NULL); | 1156 | symbol__init(NULL); |
700 | return __cmd_record(argc, argv); | 1157 | return __cmd_record(argc, argv); |
@@ -706,6 +1163,17 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) | |||
706 | if (session == NULL) | 1163 | if (session == NULL) |
707 | return -1; | 1164 | return -1; |
708 | 1165 | ||
1166 | if (kmem_page) { | ||
1167 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | ||
1168 | |||
1169 | if (evsel == NULL || evsel->tp_format == NULL) { | ||
1170 | pr_err("invalid event found.. aborting\n"); | ||
1171 | return -1; | ||
1172 | } | ||
1173 | |||
1174 | kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); | ||
1175 | } | ||
1176 | |||
709 | symbol__init(&session->header.env); | 1177 | symbol__init(&session->header.env); |
710 | 1178 | ||
711 | if (!strcmp(argv[0], "stat")) { | 1179 | if (!strcmp(argv[0], "stat")) { |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 30545ce2c712..d8bb616ff57c 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -332,6 +332,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, | |||
332 | else { | 332 | else { |
333 | result->offset += pp->offset; | 333 | result->offset += pp->offset; |
334 | result->line += pp->line; | 334 | result->line += pp->line; |
335 | result->retprobe = pp->retprobe; | ||
335 | ret = 0; | 336 | ret = 0; |
336 | } | 337 | } |
337 | 338 | ||
@@ -654,65 +655,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
654 | return ntevs; | 655 | return ntevs; |
655 | } | 656 | } |
656 | 657 | ||
657 | /* | ||
658 | * Find a src file from a DWARF tag path. Prepend optional source path prefix | ||
659 | * and chop off leading directories that do not exist. Result is passed back as | ||
660 | * a newly allocated path on success. | ||
661 | * Return 0 if file was found and readable, -errno otherwise. | ||
662 | */ | ||
663 | static int get_real_path(const char *raw_path, const char *comp_dir, | ||
664 | char **new_path) | ||
665 | { | ||
666 | const char *prefix = symbol_conf.source_prefix; | ||
667 | |||
668 | if (!prefix) { | ||
669 | if (raw_path[0] != '/' && comp_dir) | ||
670 | /* If not an absolute path, try to use comp_dir */ | ||
671 | prefix = comp_dir; | ||
672 | else { | ||
673 | if (access(raw_path, R_OK) == 0) { | ||
674 | *new_path = strdup(raw_path); | ||
675 | return *new_path ? 0 : -ENOMEM; | ||
676 | } else | ||
677 | return -errno; | ||
678 | } | ||
679 | } | ||
680 | |||
681 | *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); | ||
682 | if (!*new_path) | ||
683 | return -ENOMEM; | ||
684 | |||
685 | for (;;) { | ||
686 | sprintf(*new_path, "%s/%s", prefix, raw_path); | ||
687 | |||
688 | if (access(*new_path, R_OK) == 0) | ||
689 | return 0; | ||
690 | |||
691 | if (!symbol_conf.source_prefix) { | ||
692 | /* In case of searching comp_dir, don't retry */ | ||
693 | zfree(new_path); | ||
694 | return -errno; | ||
695 | } | ||
696 | |||
697 | switch (errno) { | ||
698 | case ENAMETOOLONG: | ||
699 | case ENOENT: | ||
700 | case EROFS: | ||
701 | case EFAULT: | ||
702 | raw_path = strchr(++raw_path, '/'); | ||
703 | if (!raw_path) { | ||
704 | zfree(new_path); | ||
705 | return -ENOENT; | ||
706 | } | ||
707 | continue; | ||
708 | |||
709 | default: | ||
710 | zfree(new_path); | ||
711 | return -errno; | ||
712 | } | ||
713 | } | ||
714 | } | ||
715 | |||
716 | #define LINEBUF_SIZE 256 | 658 | #define LINEBUF_SIZE 256 |
717 | #define NR_ADDITIONAL_LINES 2 | 659 | #define NR_ADDITIONAL_LINES 2 |
718 | 660 | ||
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e3074230f236..b5bf9d5efeaf 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -855,11 +855,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno, | |||
855 | static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) | 855 | static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) |
856 | { | 856 | { |
857 | int ret = 0; | 857 | int ret = 0; |
858 | char *fpath; | ||
858 | 859 | ||
859 | if (intlist__empty(pf->lcache)) { | 860 | if (intlist__empty(pf->lcache)) { |
861 | const char *comp_dir; | ||
862 | |||
863 | comp_dir = cu_get_comp_dir(&pf->cu_die); | ||
864 | ret = get_real_path(pf->fname, comp_dir, &fpath); | ||
865 | if (ret < 0) { | ||
866 | pr_warning("Failed to find source file path.\n"); | ||
867 | return ret; | ||
868 | } | ||
869 | |||
860 | /* Matching lazy line pattern */ | 870 | /* Matching lazy line pattern */ |
861 | ret = find_lazy_match_lines(pf->lcache, pf->fname, | 871 | ret = find_lazy_match_lines(pf->lcache, fpath, |
862 | pf->pev->point.lazy_line); | 872 | pf->pev->point.lazy_line); |
873 | free(fpath); | ||
863 | if (ret <= 0) | 874 | if (ret <= 0) |
864 | return ret; | 875 | return ret; |
865 | } | 876 | } |
@@ -1055,7 +1066,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, | |||
1055 | if (pp->function) | 1066 | if (pp->function) |
1056 | ret = find_probe_point_by_func(pf); | 1067 | ret = find_probe_point_by_func(pf); |
1057 | else if (pp->lazy_line) | 1068 | else if (pp->lazy_line) |
1058 | ret = find_probe_point_lazy(NULL, pf); | 1069 | ret = find_probe_point_lazy(&pf->cu_die, pf); |
1059 | else { | 1070 | else { |
1060 | pf->lno = pp->line; | 1071 | pf->lno = pp->line; |
1061 | ret = find_probe_point_by_line(pf); | 1072 | ret = find_probe_point_by_line(pf); |
@@ -1622,3 +1633,61 @@ found: | |||
1622 | return (ret < 0) ? ret : lf.found; | 1633 | return (ret < 0) ? ret : lf.found; |
1623 | } | 1634 | } |
1624 | 1635 | ||
1636 | /* | ||
1637 | * Find a src file from a DWARF tag path. Prepend optional source path prefix | ||
1638 | * and chop off leading directories that do not exist. Result is passed back as | ||
1639 | * a newly allocated path on success. | ||
1640 | * Return 0 if file was found and readable, -errno otherwise. | ||
1641 | */ | ||
1642 | int get_real_path(const char *raw_path, const char *comp_dir, | ||
1643 | char **new_path) | ||
1644 | { | ||
1645 | const char *prefix = symbol_conf.source_prefix; | ||
1646 | |||
1647 | if (!prefix) { | ||
1648 | if (raw_path[0] != '/' && comp_dir) | ||
1649 | /* If not an absolute path, try to use comp_dir */ | ||
1650 | prefix = comp_dir; | ||
1651 | else { | ||
1652 | if (access(raw_path, R_OK) == 0) { | ||
1653 | *new_path = strdup(raw_path); | ||
1654 | return *new_path ? 0 : -ENOMEM; | ||
1655 | } else | ||
1656 | return -errno; | ||
1657 | } | ||
1658 | } | ||
1659 | |||
1660 | *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); | ||
1661 | if (!*new_path) | ||
1662 | return -ENOMEM; | ||
1663 | |||
1664 | for (;;) { | ||
1665 | sprintf(*new_path, "%s/%s", prefix, raw_path); | ||
1666 | |||
1667 | if (access(*new_path, R_OK) == 0) | ||
1668 | return 0; | ||
1669 | |||
1670 | if (!symbol_conf.source_prefix) { | ||
1671 | /* In case of searching comp_dir, don't retry */ | ||
1672 | zfree(new_path); | ||
1673 | return -errno; | ||
1674 | } | ||
1675 | |||
1676 | switch (errno) { | ||
1677 | case ENAMETOOLONG: | ||
1678 | case ENOENT: | ||
1679 | case EROFS: | ||
1680 | case EFAULT: | ||
1681 | raw_path = strchr(++raw_path, '/'); | ||
1682 | if (!raw_path) { | ||
1683 | zfree(new_path); | ||
1684 | return -ENOENT; | ||
1685 | } | ||
1686 | continue; | ||
1687 | |||
1688 | default: | ||
1689 | zfree(new_path); | ||
1690 | return -errno; | ||
1691 | } | ||
1692 | } | ||
1693 | } | ||
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 92590b2c7e1c..ebf8c8c81453 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h | |||
@@ -55,6 +55,10 @@ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, | |||
55 | struct variable_list **vls, | 55 | struct variable_list **vls, |
56 | int max_points, bool externs); | 56 | int max_points, bool externs); |
57 | 57 | ||
58 | /* Find a src file from a DWARF tag path */ | ||
59 | int get_real_path(const char *raw_path, const char *comp_dir, | ||
60 | char **new_path); | ||
61 | |||
58 | struct probe_finder { | 62 | struct probe_finder { |
59 | struct perf_probe_event *pev; /* Target probe event */ | 63 | struct perf_probe_event *pev; /* Target probe event */ |
60 | 64 | ||