aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-kmem.c')
-rw-r--r--tools/perf/builtin-kmem.c234
1 files changed, 120 insertions, 114 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ce35015f2dc6..bc912c68f49a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1,6 +1,8 @@
1#include "builtin.h" 1#include "builtin.h"
2#include "perf.h" 2#include "perf.h"
3 3
4#include "util/evlist.h"
5#include "util/evsel.h"
4#include "util/util.h" 6#include "util/util.h"
5#include "util/cache.h" 7#include "util/cache.h"
6#include "util/symbol.h" 8#include "util/symbol.h"
@@ -57,46 +59,52 @@ static unsigned long nr_allocs, nr_cross_allocs;
57 59
58#define PATH_SYS_NODE "/sys/devices/system/node" 60#define PATH_SYS_NODE "/sys/devices/system/node"
59 61
60struct perf_kmem { 62static int init_cpunode_map(void)
61 struct perf_tool tool;
62 struct perf_session *session;
63};
64
65static void init_cpunode_map(void)
66{ 63{
67 FILE *fp; 64 FILE *fp;
68 int i; 65 int i, err = -1;
69 66
70 fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); 67 fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
71 if (!fp) { 68 if (!fp) {
72 max_cpu_num = 4096; 69 max_cpu_num = 4096;
73 return; 70 return 0;
71 }
72
73 if (fscanf(fp, "%d", &max_cpu_num) < 1) {
74 pr_err("Failed to read 'kernel_max' from sysfs");
75 goto out_close;
74 } 76 }
75 77
76 if (fscanf(fp, "%d", &max_cpu_num) < 1)
77 die("Failed to read 'kernel_max' from sysfs");
78 max_cpu_num++; 78 max_cpu_num++;
79 79
80 cpunode_map = calloc(max_cpu_num, sizeof(int)); 80 cpunode_map = calloc(max_cpu_num, sizeof(int));
81 if (!cpunode_map) 81 if (!cpunode_map) {
82 die("calloc"); 82 pr_err("%s: calloc failed\n", __func__);
83 goto out_close;
84 }
85
83 for (i = 0; i < max_cpu_num; i++) 86 for (i = 0; i < max_cpu_num; i++)
84 cpunode_map[i] = -1; 87 cpunode_map[i] = -1;
88
89 err = 0;
90out_close:
85 fclose(fp); 91 fclose(fp);
92 return err;
86} 93}
87 94
88static void setup_cpunode_map(void) 95static int setup_cpunode_map(void)
89{ 96{
90 struct dirent *dent1, *dent2; 97 struct dirent *dent1, *dent2;
91 DIR *dir1, *dir2; 98 DIR *dir1, *dir2;
92 unsigned int cpu, mem; 99 unsigned int cpu, mem;
93 char buf[PATH_MAX]; 100 char buf[PATH_MAX];
94 101
95 init_cpunode_map(); 102 if (init_cpunode_map())
103 return -1;
96 104
97 dir1 = opendir(PATH_SYS_NODE); 105 dir1 = opendir(PATH_SYS_NODE);
98 if (!dir1) 106 if (!dir1)
99 return; 107 return -1;
100 108
101 while ((dent1 = readdir(dir1)) != NULL) { 109 while ((dent1 = readdir(dir1)) != NULL) {
102 if (dent1->d_type != DT_DIR || 110 if (dent1->d_type != DT_DIR ||
@@ -116,10 +124,11 @@ static void setup_cpunode_map(void)
116 closedir(dir2); 124 closedir(dir2);
117 } 125 }
118 closedir(dir1); 126 closedir(dir1);
127 return 0;
119} 128}
120 129
121static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, 130static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
122 int bytes_req, int bytes_alloc, int cpu) 131 int bytes_req, int bytes_alloc, int cpu)
123{ 132{
124 struct rb_node **node = &root_alloc_stat.rb_node; 133 struct rb_node **node = &root_alloc_stat.rb_node;
125 struct rb_node *parent = NULL; 134 struct rb_node *parent = NULL;
@@ -143,8 +152,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
143 data->bytes_alloc += bytes_alloc; 152 data->bytes_alloc += bytes_alloc;
144 } else { 153 } else {
145 data = malloc(sizeof(*data)); 154 data = malloc(sizeof(*data));
146 if (!data) 155 if (!data) {
147 die("malloc"); 156 pr_err("%s: malloc failed\n", __func__);
157 return -1;
158 }
148 data->ptr = ptr; 159 data->ptr = ptr;
149 data->pingpong = 0; 160 data->pingpong = 0;
150 data->hit = 1; 161 data->hit = 1;
@@ -156,9 +167,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
156 } 167 }
157 data->call_site = call_site; 168 data->call_site = call_site;
158 data->alloc_cpu = cpu; 169 data->alloc_cpu = cpu;
170 return 0;
159} 171}
160 172
161static void insert_caller_stat(unsigned long call_site, 173static int insert_caller_stat(unsigned long call_site,
162 int bytes_req, int bytes_alloc) 174 int bytes_req, int bytes_alloc)
163{ 175{
164 struct rb_node **node = &root_caller_stat.rb_node; 176 struct rb_node **node = &root_caller_stat.rb_node;
@@ -183,8 +195,10 @@ static void insert_caller_stat(unsigned long call_site,
183 data->bytes_alloc += bytes_alloc; 195 data->bytes_alloc += bytes_alloc;
184 } else { 196 } else {
185 data = malloc(sizeof(*data)); 197 data = malloc(sizeof(*data));
186 if (!data) 198 if (!data) {
187 die("malloc"); 199 pr_err("%s: malloc failed\n", __func__);
200 return -1;
201 }
188 data->call_site = call_site; 202 data->call_site = call_site;
189 data->pingpong = 0; 203 data->pingpong = 0;
190 data->hit = 1; 204 data->hit = 1;
@@ -194,39 +208,43 @@ static void insert_caller_stat(unsigned long call_site,
194 rb_link_node(&data->node, parent, node); 208 rb_link_node(&data->node, parent, node);
195 rb_insert_color(&data->node, &root_caller_stat); 209 rb_insert_color(&data->node, &root_caller_stat);
196 } 210 }
211
212 return 0;
197} 213}
198 214
199static void process_alloc_event(void *data, 215static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
200 struct event_format *event, 216 struct perf_sample *sample)
201 int cpu,
202 u64 timestamp __used,
203 struct thread *thread __used,
204 int node)
205{ 217{
206 unsigned long call_site; 218 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
207 unsigned long ptr; 219 call_site = perf_evsel__intval(evsel, sample, "call_site");
208 int bytes_req; 220 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
209 int bytes_alloc; 221 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
210 int node1, node2;
211
212 ptr = raw_field_value(event, "ptr", data);
213 call_site = raw_field_value(event, "call_site", data);
214 bytes_req = raw_field_value(event, "bytes_req", data);
215 bytes_alloc = raw_field_value(event, "bytes_alloc", data);
216 222
217 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); 223 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
218 insert_caller_stat(call_site, bytes_req, bytes_alloc); 224 insert_caller_stat(call_site, bytes_req, bytes_alloc))
225 return -1;
219 226
220 total_requested += bytes_req; 227 total_requested += bytes_req;
221 total_allocated += bytes_alloc; 228 total_allocated += bytes_alloc;
222 229
223 if (node) { 230 nr_allocs++;
224 node1 = cpunode_map[cpu]; 231 return 0;
225 node2 = raw_field_value(event, "node", data); 232}
233
234static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
235 struct perf_sample *sample)
236{
237 int ret = perf_evsel__process_alloc_event(evsel, sample);
238
239 if (!ret) {
240 int node1 = cpunode_map[sample->cpu],
241 node2 = perf_evsel__intval(evsel, sample, "node");
242
226 if (node1 != node2) 243 if (node1 != node2)
227 nr_cross_allocs++; 244 nr_cross_allocs++;
228 } 245 }
229 nr_allocs++; 246
247 return ret;
230} 248}
231 249
232static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); 250static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
@@ -257,66 +275,37 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
257 return NULL; 275 return NULL;
258} 276}
259 277
260static void process_free_event(void *data, 278static int perf_evsel__process_free_event(struct perf_evsel *evsel,
261 struct event_format *event, 279 struct perf_sample *sample)
262 int cpu,
263 u64 timestamp __used,
264 struct thread *thread __used)
265{ 280{
266 unsigned long ptr; 281 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
267 struct alloc_stat *s_alloc, *s_caller; 282 struct alloc_stat *s_alloc, *s_caller;
268 283
269 ptr = raw_field_value(event, "ptr", data);
270
271 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); 284 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
272 if (!s_alloc) 285 if (!s_alloc)
273 return; 286 return 0;
274 287
275 if (cpu != s_alloc->alloc_cpu) { 288 if ((short)sample->cpu != s_alloc->alloc_cpu) {
276 s_alloc->pingpong++; 289 s_alloc->pingpong++;
277 290
278 s_caller = search_alloc_stat(0, s_alloc->call_site, 291 s_caller = search_alloc_stat(0, s_alloc->call_site,
279 &root_caller_stat, callsite_cmp); 292 &root_caller_stat, callsite_cmp);
280 assert(s_caller); 293 if (!s_caller)
294 return -1;
281 s_caller->pingpong++; 295 s_caller->pingpong++;
282 } 296 }
283 s_alloc->alloc_cpu = -1; 297 s_alloc->alloc_cpu = -1;
284}
285 298
286static void process_raw_event(struct perf_tool *tool, 299 return 0;
287 union perf_event *raw_event __used, void *data,
288 int cpu, u64 timestamp, struct thread *thread)
289{
290 struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool);
291 struct event_format *event;
292 int type;
293
294 type = trace_parse_common_type(kmem->session->pevent, data);
295 event = pevent_find_event(kmem->session->pevent, type);
296
297 if (!strcmp(event->name, "kmalloc") ||
298 !strcmp(event->name, "kmem_cache_alloc")) {
299 process_alloc_event(data, event, cpu, timestamp, thread, 0);
300 return;
301 }
302
303 if (!strcmp(event->name, "kmalloc_node") ||
304 !strcmp(event->name, "kmem_cache_alloc_node")) {
305 process_alloc_event(data, event, cpu, timestamp, thread, 1);
306 return;
307 }
308
309 if (!strcmp(event->name, "kfree") ||
310 !strcmp(event->name, "kmem_cache_free")) {
311 process_free_event(data, event, cpu, timestamp, thread);
312 return;
313 }
314} 300}
315 301
316static int process_sample_event(struct perf_tool *tool, 302typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
303 struct perf_sample *sample);
304
305static int process_sample_event(struct perf_tool *tool __maybe_unused,
317 union perf_event *event, 306 union perf_event *event,
318 struct perf_sample *sample, 307 struct perf_sample *sample,
319 struct perf_evsel *evsel __used, 308 struct perf_evsel *evsel,
320 struct machine *machine) 309 struct machine *machine)
321{ 310{
322 struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 311 struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
@@ -329,18 +318,18 @@ static int process_sample_event(struct perf_tool *tool,
329 318
330 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 319 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
331 320
332 process_raw_event(tool, event, sample->raw_data, sample->cpu, 321 if (evsel->handler.func != NULL) {
333 sample->time, thread); 322 tracepoint_handler f = evsel->handler.func;
323 return f(evsel, sample);
324 }
334 325
335 return 0; 326 return 0;
336} 327}
337 328
338static struct perf_kmem perf_kmem = { 329static struct perf_tool perf_kmem = {
339 .tool = { 330 .sample = process_sample_event,
340 .sample = process_sample_event, 331 .comm = perf_event__process_comm,
341 .comm = perf_event__process_comm, 332 .ordered_samples = true,
342 .ordered_samples = true,
343 },
344}; 333};
345 334
346static double fragmentation(unsigned long n_req, unsigned long n_alloc) 335static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -496,22 +485,32 @@ static int __cmd_kmem(void)
496{ 485{
497 int err = -EINVAL; 486 int err = -EINVAL;
498 struct perf_session *session; 487 struct perf_session *session;
499 488 const struct perf_evsel_str_handler kmem_tracepoints[] = {
500 session = perf_session__new(input_name, O_RDONLY, 0, false, 489 { "kmem:kmalloc", perf_evsel__process_alloc_event, },
501 &perf_kmem.tool); 490 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
491 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
492 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
493 { "kmem:kfree", perf_evsel__process_free_event, },
494 { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
495 };
496
497 session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem);
502 if (session == NULL) 498 if (session == NULL)
503 return -ENOMEM; 499 return -ENOMEM;
504 500
505 perf_kmem.session = session;
506
507 if (perf_session__create_kernel_maps(session) < 0) 501 if (perf_session__create_kernel_maps(session) < 0)
508 goto out_delete; 502 goto out_delete;
509 503
510 if (!perf_session__has_traces(session, "kmem record")) 504 if (!perf_session__has_traces(session, "kmem record"))
511 goto out_delete; 505 goto out_delete;
512 506
507 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
508 pr_err("Initializing perf session tracepoint handlers failed\n");
509 return -1;
510 }
511
513 setup_pager(); 512 setup_pager();
514 err = perf_session__process_events(session, &perf_kmem.tool); 513 err = perf_session__process_events(session, &perf_kmem);
515 if (err != 0) 514 if (err != 0)
516 goto out_delete; 515 goto out_delete;
517 sort_result(); 516 sort_result();
@@ -635,8 +634,10 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
635 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 634 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
636 if (!strcmp(avail_sorts[i]->name, tok)) { 635 if (!strcmp(avail_sorts[i]->name, tok)) {
637 sort = malloc(sizeof(*sort)); 636 sort = malloc(sizeof(*sort));
638 if (!sort) 637 if (!sort) {
639 die("malloc"); 638 pr_err("%s: malloc failed\n", __func__);
639 return -1;
640 }
640 memcpy(sort, avail_sorts[i], sizeof(*sort)); 641 memcpy(sort, avail_sorts[i], sizeof(*sort));
641 list_add_tail(&sort->list, list); 642 list_add_tail(&sort->list, list);
642 return 0; 643 return 0;
@@ -651,8 +652,10 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
651 char *tok; 652 char *tok;
652 char *str = strdup(arg); 653 char *str = strdup(arg);
653 654
654 if (!str) 655 if (!str) {
655 die("strdup"); 656 pr_err("%s: strdup failed\n", __func__);
657 return -1;
658 }
656 659
657 while (true) { 660 while (true) {
658 tok = strsep(&str, ","); 661 tok = strsep(&str, ",");
@@ -669,8 +672,8 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
669 return 0; 672 return 0;
670} 673}
671 674
672static int parse_sort_opt(const struct option *opt __used, 675static int parse_sort_opt(const struct option *opt __maybe_unused,
673 const char *arg, int unset __used) 676 const char *arg, int unset __maybe_unused)
674{ 677{
675 if (!arg) 678 if (!arg)
676 return -1; 679 return -1;
@@ -683,22 +686,24 @@ static int parse_sort_opt(const struct option *opt __used,
683 return 0; 686 return 0;
684} 687}
685 688
686static int parse_caller_opt(const struct option *opt __used, 689static int parse_caller_opt(const struct option *opt __maybe_unused,
687 const char *arg __used, int unset __used) 690 const char *arg __maybe_unused,
691 int unset __maybe_unused)
688{ 692{
689 caller_flag = (alloc_flag + 1); 693 caller_flag = (alloc_flag + 1);
690 return 0; 694 return 0;
691} 695}
692 696
693static int parse_alloc_opt(const struct option *opt __used, 697static int parse_alloc_opt(const struct option *opt __maybe_unused,
694 const char *arg __used, int unset __used) 698 const char *arg __maybe_unused,
699 int unset __maybe_unused)
695{ 700{
696 alloc_flag = (caller_flag + 1); 701 alloc_flag = (caller_flag + 1);
697 return 0; 702 return 0;
698} 703}
699 704
700static int parse_line_opt(const struct option *opt __used, 705static int parse_line_opt(const struct option *opt __maybe_unused,
701 const char *arg, int unset __used) 706 const char *arg, int unset __maybe_unused)
702{ 707{
703 int lines; 708 int lines;
704 709
@@ -768,7 +773,7 @@ static int __cmd_record(int argc, const char **argv)
768 return cmd_record(i, rec_argv, NULL); 773 return cmd_record(i, rec_argv, NULL);
769} 774}
770 775
771int cmd_kmem(int argc, const char **argv, const char *prefix __used) 776int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
772{ 777{
773 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); 778 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
774 779
@@ -780,7 +785,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
780 if (!strncmp(argv[0], "rec", 3)) { 785 if (!strncmp(argv[0], "rec", 3)) {
781 return __cmd_record(argc, argv); 786 return __cmd_record(argc, argv);
782 } else if (!strcmp(argv[0], "stat")) { 787 } else if (!strcmp(argv[0], "stat")) {
783 setup_cpunode_map(); 788 if (setup_cpunode_map())
789 return -1;
784 790
785 if (list_empty(&caller_sort)) 791 if (list_empty(&caller_sort))
786 setup_sorting(&caller_sort, default_sort_order); 792 setup_sorting(&caller_sort, default_sort_order);