aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2014-06-01 09:38:29 -0400
committerJiri Olsa <jolsa@kernel.org>2014-06-09 07:34:49 -0400
commit9b32ba71ba905b90610fc2aad77cb98a373c5624 (patch)
tree452c7fd4bd5d494d8b170fd5e840a5ae004e65ba /tools
parent2b1b71003ea809e619bd73e74dfc2a73069de66f (diff)
perf tools: Add dcacheline sort
In perf's 'mem-mode', one can get access to a whole bunch of details specific to a particular sample instruction. A bunch of those details relate to the data address. One interesting thing you can do with data addresses is to convert them into a unique cacheline they belong too. Organizing these data cachelines into similar groups and sorting them can reveal cache contention. This patch creates an alogorithm based on various sample details that can help group entries together into data cachelines and allows 'perf report' to sort on it. The algorithm relies on having proper mmap2 support in the kernel to help determine if the memory map the data address belongs to is private to a pid or globally shared. The alogortithm is as follows: o group cpumodes together o group entries with discovered maps together o sort on major, minor, inode and inode generation numbers o if userspace anon, then sort on pid o sort on cachelines based on data addresses The 'dcacheline' sort option in 'perf report' only works in 'mem-mode'. Sample output: # # Samples: 206 of event 'cpu/mem-loads/pp' # Total weight : 2534 # Sort order : dcacheline,pid # # Overhead Samples Data Cacheline Command: Pid # ........ ............ ...................................................................... .................. # 13.22% 1 [k] 0xffff88042f08ebc0 swapper: 0 9.27% 1 [k] 0xffff88082e8cea80 swapper: 0 3.59% 2 [k] 0xffffffff819ba180 swapper: 0 0.32% 1 [k] arch_trigger_all_cpu_backtrace_handler_na.23901+0xffffffffffffffe0 swapper: 0 0.32% 1 [k] timekeeper_seq+0xfffffffffffffff8 swapper: 0 Note: Added a '+1' to symlen size in hists__calc_col_len to prevent the next column from prematurely tabbing over and mis-aligning. Not sure what the problem is. Signed-off-by: Don Zickus <dzickus@redhat.com> Link: http://lkml.kernel.org/r/1401208087-181977-8-git-send-email-dzickus@redhat.com Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-report.txt3
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/sort.c107
-rw-r--r--tools/perf/util/sort.h1
5 files changed, 113 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 00fbfb6d7f14..d2b59af62bc0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -119,7 +119,7 @@ OPTIONS
119 119
120 If --mem-mode option is used, following sort keys are also available 120 If --mem-mode option is used, following sort keys are also available
121 (incompatible with --branch-stack): 121 (incompatible with --branch-stack):
122 symbol_daddr, dso_daddr, locked, tlb, mem, snoop. 122 symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
123 123
124 - symbol_daddr: name of data symbol being executed on at the time of sample 124 - symbol_daddr: name of data symbol being executed on at the time of sample
125 - dso_daddr: name of library or module containing the data being executed 125 - dso_daddr: name of library or module containing the data being executed
@@ -128,6 +128,7 @@ OPTIONS
128 - tlb: type of tlb access for the data at the time of sample 128 - tlb: type of tlb access for the data at the time of sample
129 - mem: type of memory access for the data at the time of sample 129 - mem: type of memory access for the data at the time of sample
130 - snoop: type of snoop (if any) for the data at the time of sample 130 - snoop: type of snoop (if any) for the data at the time of sample
131 - dcacheline: the cacheline the data address is on at the time of sample
131 132
132 And default sort keys are changed to local_weight, mem, sym, dso, 133 And default sort keys are changed to local_weight, mem, sym, dso,
133 symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. 134 symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index d5f47a47c4bf..30df6187ee02 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -128,6 +128,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
128 + unresolved_col_width + 2; 128 + unresolved_col_width + 2;
129 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, 129 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
130 symlen); 130 symlen);
131 hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
132 symlen + 1);
131 } else { 133 } else {
132 symlen = unresolved_col_width + 4 + 2; 134 symlen = unresolved_col_width + 4 + 2;
133 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, 135 hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d2bf03575d5f..742f49a85725 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -72,6 +72,7 @@ enum hist_column {
72 HISTC_MEM_TLB, 72 HISTC_MEM_TLB,
73 HISTC_MEM_LVL, 73 HISTC_MEM_LVL,
74 HISTC_MEM_SNOOP, 74 HISTC_MEM_SNOOP,
75 HISTC_MEM_DCACHELINE,
75 HISTC_TRANSACTION, 76 HISTC_TRANSACTION,
76 HISTC_NR_COLS, /* Last entry */ 77 HISTC_NR_COLS, /* Last entry */
77}; 78};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 45512baaab67..1ec57dd82284 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,3 +1,4 @@
1#include <sys/mman.h>
1#include "sort.h" 2#include "sort.h"
2#include "hist.h" 3#include "hist.h"
3#include "comm.h" 4#include "comm.h"
@@ -784,6 +785,104 @@ static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
784 return repsep_snprintf(bf, size, "%-*s", width, out); 785 return repsep_snprintf(bf, size, "%-*s", width, out);
785} 786}
786 787
788static inline u64 cl_address(u64 address)
789{
790 /* return the cacheline of the address */
791 return (address & ~(cacheline_size - 1));
792}
793
794static int64_t
795sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
796{
797 u64 l, r;
798 struct map *l_map, *r_map;
799
800 if (!left->mem_info) return -1;
801 if (!right->mem_info) return 1;
802
803 /* group event types together */
804 if (left->cpumode > right->cpumode) return -1;
805 if (left->cpumode < right->cpumode) return 1;
806
807 l_map = left->mem_info->daddr.map;
808 r_map = right->mem_info->daddr.map;
809
810 /* if both are NULL, jump to sort on al_addr instead */
811 if (!l_map && !r_map)
812 goto addr;
813
814 if (!l_map) return -1;
815 if (!r_map) return 1;
816
817 if (l_map->maj > r_map->maj) return -1;
818 if (l_map->maj < r_map->maj) return 1;
819
820 if (l_map->min > r_map->min) return -1;
821 if (l_map->min < r_map->min) return 1;
822
823 if (l_map->ino > r_map->ino) return -1;
824 if (l_map->ino < r_map->ino) return 1;
825
826 if (l_map->ino_generation > r_map->ino_generation) return -1;
827 if (l_map->ino_generation < r_map->ino_generation) return 1;
828
829 /*
830 * Addresses with no major/minor numbers are assumed to be
831 * anonymous in userspace. Sort those on pid then address.
832 *
833 * The kernel and non-zero major/minor mapped areas are
834 * assumed to be unity mapped. Sort those on address.
835 */
836
837 if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
838 (!(l_map->flags & MAP_SHARED)) &&
839 !l_map->maj && !l_map->min && !l_map->ino &&
840 !l_map->ino_generation) {
841 /* userspace anonymous */
842
843 if (left->thread->pid_ > right->thread->pid_) return -1;
844 if (left->thread->pid_ < right->thread->pid_) return 1;
845 }
846
847addr:
848 /* al_addr does all the right addr - start + offset calculations */
849 l = cl_address(left->mem_info->daddr.al_addr);
850 r = cl_address(right->mem_info->daddr.al_addr);
851
852 if (l > r) return -1;
853 if (l < r) return 1;
854
855 return 0;
856}
857
858static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
859 size_t size, unsigned int width)
860{
861
862 uint64_t addr = 0;
863 struct map *map = NULL;
864 struct symbol *sym = NULL;
865 char level = he->level;
866
867 if (he->mem_info) {
868 addr = cl_address(he->mem_info->daddr.al_addr);
869 map = he->mem_info->daddr.map;
870 sym = he->mem_info->daddr.sym;
871
872 /* print [s] for shared data mmaps */
873 if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
874 map && (map->type == MAP__VARIABLE) &&
875 (map->flags & MAP_SHARED) &&
876 (map->maj || map->min || map->ino ||
877 map->ino_generation))
878 level = 's';
879 else if (!map)
880 level = 'X';
881 }
882 return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
883 width);
884}
885
787struct sort_entry sort_mispredict = { 886struct sort_entry sort_mispredict = {
788 .se_header = "Branch Mispredicted", 887 .se_header = "Branch Mispredicted",
789 .se_cmp = sort__mispredict_cmp, 888 .se_cmp = sort__mispredict_cmp,
@@ -876,6 +975,13 @@ struct sort_entry sort_mem_snoop = {
876 .se_width_idx = HISTC_MEM_SNOOP, 975 .se_width_idx = HISTC_MEM_SNOOP,
877}; 976};
878 977
978struct sort_entry sort_mem_dcacheline = {
979 .se_header = "Data Cacheline",
980 .se_cmp = sort__dcacheline_cmp,
981 .se_snprintf = hist_entry__dcacheline_snprintf,
982 .se_width_idx = HISTC_MEM_DCACHELINE,
983};
984
879static int64_t 985static int64_t
880sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) 986sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
881{ 987{
@@ -1043,6 +1149,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
1043 DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), 1149 DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
1044 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), 1150 DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
1045 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), 1151 DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
1152 DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
1046}; 1153};
1047 1154
1048#undef DIM 1155#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 6de22f838854..041f0c9cea2b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -186,6 +186,7 @@ enum sort_type {
186 SORT_MEM_TLB, 186 SORT_MEM_TLB,
187 SORT_MEM_LVL, 187 SORT_MEM_LVL,
188 SORT_MEM_SNOOP, 188 SORT_MEM_SNOOP,
189 SORT_MEM_DCACHELINE,
189}; 190};
190 191
191/* 192/*