aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaiman Long <Waiman.Long@hp.com>2014-09-30 13:36:15 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2014-10-01 13:39:57 -0400
commit4598a0a6d22fadfb7b37f2b44ee7fdcb24632fcf (patch)
treefa1e74148d31382b71588a7fbbc679f165e4a6cd
parent8fa7d87f91479f7124142ca4ad93a37b80f8c1c0 (diff)
perf symbols: Improve DSO long names lookup speed with rbtree
With workload that spawns and destroys many threads and processes, it was found that perf-mem could took a long time to post-process the perf data after the target workload had completed its operation. The performance bottleneck was found to be the lookup and insertion of the new DSO structures (thousands of them in this case). In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread), the perf profile below shows what perf was doing after the profiled AIM7 shared workload completed: - 83.94% perf libc-2.11.3.so [.] __strcmp_sse42 - __strcmp_sse42 - 99.82% map__new machine__process_mmap_event perf_session_deliver_event perf_session__process_event __perf_session__process_events cmd_record cmd_mem run_builtin main __libc_start_main - 13.17% perf perf [.] __dsos__findnew __dsos__findnew map__new machine__process_mmap_event perf_session_deliver_event perf_session__process_event __perf_session__process_events cmd_record cmd_mem run_builtin main __libc_start_main So about 97% of CPU times were spent in the map__new() function trying to insert new DSO entry into the DSO linked list. The whole post-processing step took about 9 minutes. The DSO structures are currently searched linearly. So the total processing time will be proportional to n^2. To overcome this performance problem, the DSO code is modified to also put the DSO structures in a RB tree sorted by its long name in additional to being in a simple linked list. With this change, the processing time will become proportional to n*log(n) which will be much quicker for large n. However, the short name will still be searched using the old linear searching method. With that patch in place, the same perf-mem post-processing step took less than 30 seconds to complete. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Don Zickus <dzickus@redhat.com> Cc: Douglas Hatch <doug.hatch@hp.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Scott J Norton <scott.norton@hp.com> Link: http://lkml.kernel.org/r/1412098575-27863-3-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/util/dso.c70
-rw-r--r--tools/perf/util/dso.h5
-rw-r--r--tools/perf/util/machine.c1
3 files changed, 71 insertions, 5 deletions
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 901a58fa3f22..0247acfdfaca 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -653,6 +653,65 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
653 return dso; 653 return dso;
654} 654}
655 655
656/*
657 * Find a matching entry and/or link current entry to RB tree.
658 * Either one of the dso or name parameter must be non-NULL or the
659 * function will not work.
660 */
661static struct dso *dso__findlink_by_longname(struct rb_root *root,
662 struct dso *dso, const char *name)
663{
664 struct rb_node **p = &root->rb_node;
665 struct rb_node *parent = NULL;
666
667 if (!name)
668 name = dso->long_name;
669 /*
670 * Find node with the matching name
671 */
672 while (*p) {
673 struct dso *this = rb_entry(*p, struct dso, rb_node);
674 int rc = strcmp(name, this->long_name);
675
676 parent = *p;
677 if (rc == 0) {
678 /*
679 * In case the new DSO is a duplicate of an existing
680 * one, print an one-time warning & put the new entry
681 * at the end of the list of duplicates.
682 */
683 if (!dso || (dso == this))
684 return this; /* Find matching dso */
685 /*
686 * The core kernel DSOs may have duplicated long name.
687 * In this case, the short name should be different.
688 * Comparing the short names to differentiate the DSOs.
689 */
690 rc = strcmp(dso->short_name, this->short_name);
691 if (rc == 0) {
692 pr_err("Duplicated dso name: %s\n", name);
693 return NULL;
694 }
695 }
696 if (rc < 0)
697 p = &parent->rb_left;
698 else
699 p = &parent->rb_right;
700 }
701 if (dso) {
702 /* Add new node and rebalance tree */
703 rb_link_node(&dso->rb_node, parent, p);
704 rb_insert_color(&dso->rb_node, root);
705 }
706 return NULL;
707}
708
709static inline struct dso *
710dso__find_by_longname(const struct rb_root *root, const char *name)
711{
712 return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
713}
714
656void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) 715void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
657{ 716{
658 if (name == NULL) 717 if (name == NULL)
@@ -755,6 +814,7 @@ struct dso *dso__new(const char *name)
755 dso->a2l_fails = 1; 814 dso->a2l_fails = 1;
756 dso->kernel = DSO_TYPE_USER; 815 dso->kernel = DSO_TYPE_USER;
757 dso->needs_swap = DSO_SWAP__UNSET; 816 dso->needs_swap = DSO_SWAP__UNSET;
817 RB_CLEAR_NODE(&dso->rb_node);
758 INIT_LIST_HEAD(&dso->node); 818 INIT_LIST_HEAD(&dso->node);
759 INIT_LIST_HEAD(&dso->data.open_entry); 819 INIT_LIST_HEAD(&dso->data.open_entry);
760 } 820 }
@@ -765,6 +825,10 @@ struct dso *dso__new(const char *name)
765void dso__delete(struct dso *dso) 825void dso__delete(struct dso *dso)
766{ 826{
767 int i; 827 int i;
828
829 if (!RB_EMPTY_NODE(&dso->rb_node))
830 pr_err("DSO %s is still in rbtree when being deleted!\n",
831 dso->long_name);
768 for (i = 0; i < MAP__NR_TYPES; ++i) 832 for (i = 0; i < MAP__NR_TYPES; ++i)
769 symbols__delete(&dso->symbols[i]); 833 symbols__delete(&dso->symbols[i]);
770 834
@@ -854,6 +918,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
854void dsos__add(struct dsos *dsos, struct dso *dso) 918void dsos__add(struct dsos *dsos, struct dso *dso)
855{ 919{
856 list_add_tail(&dso->node, &dsos->head); 920 list_add_tail(&dso->node, &dsos->head);
921 dso__findlink_by_longname(&dsos->root, dso, NULL);
857} 922}
858 923
859struct dso *dsos__find(const struct dsos *dsos, const char *name, 924struct dso *dsos__find(const struct dsos *dsos, const char *name,
@@ -867,10 +932,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
867 return pos; 932 return pos;
868 return NULL; 933 return NULL;
869 } 934 }
870 list_for_each_entry(pos, &dsos->head, node) 935 return dso__find_by_longname(&dsos->root, name);
871 if (strcmp(pos->long_name, name) == 0)
872 return pos;
873 return NULL;
874} 936}
875 937
876struct dso *__dsos__findnew(struct dsos *dsos, const char *name) 938struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index b63dc98ad71d..acb651acc7fd 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -91,14 +91,17 @@ struct dso_cache {
91}; 91};
92 92
93/* 93/*
94 * DSOs are put into a list for fast iteration. 94 * DSOs are put into both a list for fast iteration and rbtree for fast
95 * long name lookup.
95 */ 96 */
96struct dsos { 97struct dsos {
97 struct list_head head; 98 struct list_head head;
99 struct rb_root root; /* rbtree root sorted by long name */
98}; 100};
99 101
100struct dso { 102struct dso {
101 struct list_head node; 103 struct list_head node;
104 struct rb_node rb_node; /* rbtree node sorted by long name */
102 struct rb_root symbols[MAP__NR_TYPES]; 105 struct rb_root symbols[MAP__NR_TYPES];
103 struct rb_root symbol_names[MAP__NR_TYPES]; 106 struct rb_root symbol_names[MAP__NR_TYPES];
104 void *a2l; 107 void *a2l;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 49a75ec4c47b..b7d477fbda02 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -77,6 +77,7 @@ static void dsos__delete(struct dsos *dsos)
77 struct dso *pos, *n; 77 struct dso *pos, *n;
78 78
79 list_for_each_entry_safe(pos, n, &dsos->head, node) { 79 list_for_each_entry_safe(pos, n, &dsos->head, node) {
80 RB_CLEAR_NODE(&pos->rb_node);
80 list_del(&pos->node); 81 list_del(&pos->node);
81 dso__delete(pos); 82 dso__delete(pos);
82 } 83 }