diff options
Diffstat (limited to 'tools/perf/builtin-kmem.c')
-rw-r--r-- | tools/perf/builtin-kmem.c | 234 |
1 files changed, 120 insertions, 114 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ce35015f2dc6..bc912c68f49a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -1,6 +1,8 @@ | |||
1 | #include "builtin.h" | 1 | #include "builtin.h" |
2 | #include "perf.h" | 2 | #include "perf.h" |
3 | 3 | ||
4 | #include "util/evlist.h" | ||
5 | #include "util/evsel.h" | ||
4 | #include "util/util.h" | 6 | #include "util/util.h" |
5 | #include "util/cache.h" | 7 | #include "util/cache.h" |
6 | #include "util/symbol.h" | 8 | #include "util/symbol.h" |
@@ -57,46 +59,52 @@ static unsigned long nr_allocs, nr_cross_allocs; | |||
57 | 59 | ||
58 | #define PATH_SYS_NODE "/sys/devices/system/node" | 60 | #define PATH_SYS_NODE "/sys/devices/system/node" |
59 | 61 | ||
60 | struct perf_kmem { | 62 | static int init_cpunode_map(void) |
61 | struct perf_tool tool; | ||
62 | struct perf_session *session; | ||
63 | }; | ||
64 | |||
65 | static void init_cpunode_map(void) | ||
66 | { | 63 | { |
67 | FILE *fp; | 64 | FILE *fp; |
68 | int i; | 65 | int i, err = -1; |
69 | 66 | ||
70 | fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); | 67 | fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); |
71 | if (!fp) { | 68 | if (!fp) { |
72 | max_cpu_num = 4096; | 69 | max_cpu_num = 4096; |
73 | return; | 70 | return 0; |
71 | } | ||
72 | |||
73 | if (fscanf(fp, "%d", &max_cpu_num) < 1) { | ||
74 | pr_err("Failed to read 'kernel_max' from sysfs"); | ||
75 | goto out_close; | ||
74 | } | 76 | } |
75 | 77 | ||
76 | if (fscanf(fp, "%d", &max_cpu_num) < 1) | ||
77 | die("Failed to read 'kernel_max' from sysfs"); | ||
78 | max_cpu_num++; | 78 | max_cpu_num++; |
79 | 79 | ||
80 | cpunode_map = calloc(max_cpu_num, sizeof(int)); | 80 | cpunode_map = calloc(max_cpu_num, sizeof(int)); |
81 | if (!cpunode_map) | 81 | if (!cpunode_map) { |
82 | die("calloc"); | 82 | pr_err("%s: calloc failed\n", __func__); |
83 | goto out_close; | ||
84 | } | ||
85 | |||
83 | for (i = 0; i < max_cpu_num; i++) | 86 | for (i = 0; i < max_cpu_num; i++) |
84 | cpunode_map[i] = -1; | 87 | cpunode_map[i] = -1; |
88 | |||
89 | err = 0; | ||
90 | out_close: | ||
85 | fclose(fp); | 91 | fclose(fp); |
92 | return err; | ||
86 | } | 93 | } |
87 | 94 | ||
88 | static void setup_cpunode_map(void) | 95 | static int setup_cpunode_map(void) |
89 | { | 96 | { |
90 | struct dirent *dent1, *dent2; | 97 | struct dirent *dent1, *dent2; |
91 | DIR *dir1, *dir2; | 98 | DIR *dir1, *dir2; |
92 | unsigned int cpu, mem; | 99 | unsigned int cpu, mem; |
93 | char buf[PATH_MAX]; | 100 | char buf[PATH_MAX]; |
94 | 101 | ||
95 | init_cpunode_map(); | 102 | if (init_cpunode_map()) |
103 | return -1; | ||
96 | 104 | ||
97 | dir1 = opendir(PATH_SYS_NODE); | 105 | dir1 = opendir(PATH_SYS_NODE); |
98 | if (!dir1) | 106 | if (!dir1) |
99 | return; | 107 | return -1; |
100 | 108 | ||
101 | while ((dent1 = readdir(dir1)) != NULL) { | 109 | while ((dent1 = readdir(dir1)) != NULL) { |
102 | if (dent1->d_type != DT_DIR || | 110 | if (dent1->d_type != DT_DIR || |
@@ -116,10 +124,11 @@ static void setup_cpunode_map(void) | |||
116 | closedir(dir2); | 124 | closedir(dir2); |
117 | } | 125 | } |
118 | closedir(dir1); | 126 | closedir(dir1); |
127 | return 0; | ||
119 | } | 128 | } |
120 | 129 | ||
121 | static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, | 130 | static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, |
122 | int bytes_req, int bytes_alloc, int cpu) | 131 | int bytes_req, int bytes_alloc, int cpu) |
123 | { | 132 | { |
124 | struct rb_node **node = &root_alloc_stat.rb_node; | 133 | struct rb_node **node = &root_alloc_stat.rb_node; |
125 | struct rb_node *parent = NULL; | 134 | struct rb_node *parent = NULL; |
@@ -143,8 +152,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, | |||
143 | data->bytes_alloc += bytes_alloc; | 152 | data->bytes_alloc += bytes_alloc; |
144 | } else { | 153 | } else { |
145 | data = malloc(sizeof(*data)); | 154 | data = malloc(sizeof(*data)); |
146 | if (!data) | 155 | if (!data) { |
147 | die("malloc"); | 156 | pr_err("%s: malloc failed\n", __func__); |
157 | return -1; | ||
158 | } | ||
148 | data->ptr = ptr; | 159 | data->ptr = ptr; |
149 | data->pingpong = 0; | 160 | data->pingpong = 0; |
150 | data->hit = 1; | 161 | data->hit = 1; |
@@ -156,9 +167,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, | |||
156 | } | 167 | } |
157 | data->call_site = call_site; | 168 | data->call_site = call_site; |
158 | data->alloc_cpu = cpu; | 169 | data->alloc_cpu = cpu; |
170 | return 0; | ||
159 | } | 171 | } |
160 | 172 | ||
161 | static void insert_caller_stat(unsigned long call_site, | 173 | static int insert_caller_stat(unsigned long call_site, |
162 | int bytes_req, int bytes_alloc) | 174 | int bytes_req, int bytes_alloc) |
163 | { | 175 | { |
164 | struct rb_node **node = &root_caller_stat.rb_node; | 176 | struct rb_node **node = &root_caller_stat.rb_node; |
@@ -183,8 +195,10 @@ static void insert_caller_stat(unsigned long call_site, | |||
183 | data->bytes_alloc += bytes_alloc; | 195 | data->bytes_alloc += bytes_alloc; |
184 | } else { | 196 | } else { |
185 | data = malloc(sizeof(*data)); | 197 | data = malloc(sizeof(*data)); |
186 | if (!data) | 198 | if (!data) { |
187 | die("malloc"); | 199 | pr_err("%s: malloc failed\n", __func__); |
200 | return -1; | ||
201 | } | ||
188 | data->call_site = call_site; | 202 | data->call_site = call_site; |
189 | data->pingpong = 0; | 203 | data->pingpong = 0; |
190 | data->hit = 1; | 204 | data->hit = 1; |
@@ -194,39 +208,43 @@ static void insert_caller_stat(unsigned long call_site, | |||
194 | rb_link_node(&data->node, parent, node); | 208 | rb_link_node(&data->node, parent, node); |
195 | rb_insert_color(&data->node, &root_caller_stat); | 209 | rb_insert_color(&data->node, &root_caller_stat); |
196 | } | 210 | } |
211 | |||
212 | return 0; | ||
197 | } | 213 | } |
198 | 214 | ||
199 | static void process_alloc_event(void *data, | 215 | static int perf_evsel__process_alloc_event(struct perf_evsel *evsel, |
200 | struct event_format *event, | 216 | struct perf_sample *sample) |
201 | int cpu, | ||
202 | u64 timestamp __used, | ||
203 | struct thread *thread __used, | ||
204 | int node) | ||
205 | { | 217 | { |
206 | unsigned long call_site; | 218 | unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), |
207 | unsigned long ptr; | 219 | call_site = perf_evsel__intval(evsel, sample, "call_site"); |
208 | int bytes_req; | 220 | int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), |
209 | int bytes_alloc; | 221 | bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); |
210 | int node1, node2; | ||
211 | |||
212 | ptr = raw_field_value(event, "ptr", data); | ||
213 | call_site = raw_field_value(event, "call_site", data); | ||
214 | bytes_req = raw_field_value(event, "bytes_req", data); | ||
215 | bytes_alloc = raw_field_value(event, "bytes_alloc", data); | ||
216 | 222 | ||
217 | insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); | 223 | if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || |
218 | insert_caller_stat(call_site, bytes_req, bytes_alloc); | 224 | insert_caller_stat(call_site, bytes_req, bytes_alloc)) |
225 | return -1; | ||
219 | 226 | ||
220 | total_requested += bytes_req; | 227 | total_requested += bytes_req; |
221 | total_allocated += bytes_alloc; | 228 | total_allocated += bytes_alloc; |
222 | 229 | ||
223 | if (node) { | 230 | nr_allocs++; |
224 | node1 = cpunode_map[cpu]; | 231 | return 0; |
225 | node2 = raw_field_value(event, "node", data); | 232 | } |
233 | |||
234 | static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, | ||
235 | struct perf_sample *sample) | ||
236 | { | ||
237 | int ret = perf_evsel__process_alloc_event(evsel, sample); | ||
238 | |||
239 | if (!ret) { | ||
240 | int node1 = cpunode_map[sample->cpu], | ||
241 | node2 = perf_evsel__intval(evsel, sample, "node"); | ||
242 | |||
226 | if (node1 != node2) | 243 | if (node1 != node2) |
227 | nr_cross_allocs++; | 244 | nr_cross_allocs++; |
228 | } | 245 | } |
229 | nr_allocs++; | 246 | |
247 | return ret; | ||
230 | } | 248 | } |
231 | 249 | ||
232 | static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); | 250 | static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); |
@@ -257,66 +275,37 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, | |||
257 | return NULL; | 275 | return NULL; |
258 | } | 276 | } |
259 | 277 | ||
260 | static void process_free_event(void *data, | 278 | static int perf_evsel__process_free_event(struct perf_evsel *evsel, |
261 | struct event_format *event, | 279 | struct perf_sample *sample) |
262 | int cpu, | ||
263 | u64 timestamp __used, | ||
264 | struct thread *thread __used) | ||
265 | { | 280 | { |
266 | unsigned long ptr; | 281 | unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); |
267 | struct alloc_stat *s_alloc, *s_caller; | 282 | struct alloc_stat *s_alloc, *s_caller; |
268 | 283 | ||
269 | ptr = raw_field_value(event, "ptr", data); | ||
270 | |||
271 | s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); | 284 | s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); |
272 | if (!s_alloc) | 285 | if (!s_alloc) |
273 | return; | 286 | return 0; |
274 | 287 | ||
275 | if (cpu != s_alloc->alloc_cpu) { | 288 | if ((short)sample->cpu != s_alloc->alloc_cpu) { |
276 | s_alloc->pingpong++; | 289 | s_alloc->pingpong++; |
277 | 290 | ||
278 | s_caller = search_alloc_stat(0, s_alloc->call_site, | 291 | s_caller = search_alloc_stat(0, s_alloc->call_site, |
279 | &root_caller_stat, callsite_cmp); | 292 | &root_caller_stat, callsite_cmp); |
280 | assert(s_caller); | 293 | if (!s_caller) |
294 | return -1; | ||
281 | s_caller->pingpong++; | 295 | s_caller->pingpong++; |
282 | } | 296 | } |
283 | s_alloc->alloc_cpu = -1; | 297 | s_alloc->alloc_cpu = -1; |
284 | } | ||
285 | 298 | ||
286 | static void process_raw_event(struct perf_tool *tool, | 299 | return 0; |
287 | union perf_event *raw_event __used, void *data, | ||
288 | int cpu, u64 timestamp, struct thread *thread) | ||
289 | { | ||
290 | struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); | ||
291 | struct event_format *event; | ||
292 | int type; | ||
293 | |||
294 | type = trace_parse_common_type(kmem->session->pevent, data); | ||
295 | event = pevent_find_event(kmem->session->pevent, type); | ||
296 | |||
297 | if (!strcmp(event->name, "kmalloc") || | ||
298 | !strcmp(event->name, "kmem_cache_alloc")) { | ||
299 | process_alloc_event(data, event, cpu, timestamp, thread, 0); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | if (!strcmp(event->name, "kmalloc_node") || | ||
304 | !strcmp(event->name, "kmem_cache_alloc_node")) { | ||
305 | process_alloc_event(data, event, cpu, timestamp, thread, 1); | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | if (!strcmp(event->name, "kfree") || | ||
310 | !strcmp(event->name, "kmem_cache_free")) { | ||
311 | process_free_event(data, event, cpu, timestamp, thread); | ||
312 | return; | ||
313 | } | ||
314 | } | 300 | } |
315 | 301 | ||
316 | static int process_sample_event(struct perf_tool *tool, | 302 | typedef int (*tracepoint_handler)(struct perf_evsel *evsel, |
303 | struct perf_sample *sample); | ||
304 | |||
305 | static int process_sample_event(struct perf_tool *tool __maybe_unused, | ||
317 | union perf_event *event, | 306 | union perf_event *event, |
318 | struct perf_sample *sample, | 307 | struct perf_sample *sample, |
319 | struct perf_evsel *evsel __used, | 308 | struct perf_evsel *evsel, |
320 | struct machine *machine) | 309 | struct machine *machine) |
321 | { | 310 | { |
322 | struct thread *thread = machine__findnew_thread(machine, event->ip.pid); | 311 | struct thread *thread = machine__findnew_thread(machine, event->ip.pid); |
@@ -329,18 +318,18 @@ static int process_sample_event(struct perf_tool *tool, | |||
329 | 318 | ||
330 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 319 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
331 | 320 | ||
332 | process_raw_event(tool, event, sample->raw_data, sample->cpu, | 321 | if (evsel->handler.func != NULL) { |
333 | sample->time, thread); | 322 | tracepoint_handler f = evsel->handler.func; |
323 | return f(evsel, sample); | ||
324 | } | ||
334 | 325 | ||
335 | return 0; | 326 | return 0; |
336 | } | 327 | } |
337 | 328 | ||
338 | static struct perf_kmem perf_kmem = { | 329 | static struct perf_tool perf_kmem = { |
339 | .tool = { | 330 | .sample = process_sample_event, |
340 | .sample = process_sample_event, | 331 | .comm = perf_event__process_comm, |
341 | .comm = perf_event__process_comm, | 332 | .ordered_samples = true, |
342 | .ordered_samples = true, | ||
343 | }, | ||
344 | }; | 333 | }; |
345 | 334 | ||
346 | static double fragmentation(unsigned long n_req, unsigned long n_alloc) | 335 | static double fragmentation(unsigned long n_req, unsigned long n_alloc) |
@@ -496,22 +485,32 @@ static int __cmd_kmem(void) | |||
496 | { | 485 | { |
497 | int err = -EINVAL; | 486 | int err = -EINVAL; |
498 | struct perf_session *session; | 487 | struct perf_session *session; |
499 | 488 | const struct perf_evsel_str_handler kmem_tracepoints[] = { | |
500 | session = perf_session__new(input_name, O_RDONLY, 0, false, | 489 | { "kmem:kmalloc", perf_evsel__process_alloc_event, }, |
501 | &perf_kmem.tool); | 490 | { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, |
491 | { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, | ||
492 | { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, | ||
493 | { "kmem:kfree", perf_evsel__process_free_event, }, | ||
494 | { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, | ||
495 | }; | ||
496 | |||
497 | session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); | ||
502 | if (session == NULL) | 498 | if (session == NULL) |
503 | return -ENOMEM; | 499 | return -ENOMEM; |
504 | 500 | ||
505 | perf_kmem.session = session; | ||
506 | |||
507 | if (perf_session__create_kernel_maps(session) < 0) | 501 | if (perf_session__create_kernel_maps(session) < 0) |
508 | goto out_delete; | 502 | goto out_delete; |
509 | 503 | ||
510 | if (!perf_session__has_traces(session, "kmem record")) | 504 | if (!perf_session__has_traces(session, "kmem record")) |
511 | goto out_delete; | 505 | goto out_delete; |
512 | 506 | ||
507 | if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { | ||
508 | pr_err("Initializing perf session tracepoint handlers failed\n"); | ||
509 | return -1; | ||
510 | } | ||
511 | |||
513 | setup_pager(); | 512 | setup_pager(); |
514 | err = perf_session__process_events(session, &perf_kmem.tool); | 513 | err = perf_session__process_events(session, &perf_kmem); |
515 | if (err != 0) | 514 | if (err != 0) |
516 | goto out_delete; | 515 | goto out_delete; |
517 | sort_result(); | 516 | sort_result(); |
@@ -635,8 +634,10 @@ static int sort_dimension__add(const char *tok, struct list_head *list) | |||
635 | for (i = 0; i < NUM_AVAIL_SORTS; i++) { | 634 | for (i = 0; i < NUM_AVAIL_SORTS; i++) { |
636 | if (!strcmp(avail_sorts[i]->name, tok)) { | 635 | if (!strcmp(avail_sorts[i]->name, tok)) { |
637 | sort = malloc(sizeof(*sort)); | 636 | sort = malloc(sizeof(*sort)); |
638 | if (!sort) | 637 | if (!sort) { |
639 | die("malloc"); | 638 | pr_err("%s: malloc failed\n", __func__); |
639 | return -1; | ||
640 | } | ||
640 | memcpy(sort, avail_sorts[i], sizeof(*sort)); | 641 | memcpy(sort, avail_sorts[i], sizeof(*sort)); |
641 | list_add_tail(&sort->list, list); | 642 | list_add_tail(&sort->list, list); |
642 | return 0; | 643 | return 0; |
@@ -651,8 +652,10 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) | |||
651 | char *tok; | 652 | char *tok; |
652 | char *str = strdup(arg); | 653 | char *str = strdup(arg); |
653 | 654 | ||
654 | if (!str) | 655 | if (!str) { |
655 | die("strdup"); | 656 | pr_err("%s: strdup failed\n", __func__); |
657 | return -1; | ||
658 | } | ||
656 | 659 | ||
657 | while (true) { | 660 | while (true) { |
658 | tok = strsep(&str, ","); | 661 | tok = strsep(&str, ","); |
@@ -669,8 +672,8 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) | |||
669 | return 0; | 672 | return 0; |
670 | } | 673 | } |
671 | 674 | ||
672 | static int parse_sort_opt(const struct option *opt __used, | 675 | static int parse_sort_opt(const struct option *opt __maybe_unused, |
673 | const char *arg, int unset __used) | 676 | const char *arg, int unset __maybe_unused) |
674 | { | 677 | { |
675 | if (!arg) | 678 | if (!arg) |
676 | return -1; | 679 | return -1; |
@@ -683,22 +686,24 @@ static int parse_sort_opt(const struct option *opt __used, | |||
683 | return 0; | 686 | return 0; |
684 | } | 687 | } |
685 | 688 | ||
686 | static int parse_caller_opt(const struct option *opt __used, | 689 | static int parse_caller_opt(const struct option *opt __maybe_unused, |
687 | const char *arg __used, int unset __used) | 690 | const char *arg __maybe_unused, |
691 | int unset __maybe_unused) | ||
688 | { | 692 | { |
689 | caller_flag = (alloc_flag + 1); | 693 | caller_flag = (alloc_flag + 1); |
690 | return 0; | 694 | return 0; |
691 | } | 695 | } |
692 | 696 | ||
693 | static int parse_alloc_opt(const struct option *opt __used, | 697 | static int parse_alloc_opt(const struct option *opt __maybe_unused, |
694 | const char *arg __used, int unset __used) | 698 | const char *arg __maybe_unused, |
699 | int unset __maybe_unused) | ||
695 | { | 700 | { |
696 | alloc_flag = (caller_flag + 1); | 701 | alloc_flag = (caller_flag + 1); |
697 | return 0; | 702 | return 0; |
698 | } | 703 | } |
699 | 704 | ||
700 | static int parse_line_opt(const struct option *opt __used, | 705 | static int parse_line_opt(const struct option *opt __maybe_unused, |
701 | const char *arg, int unset __used) | 706 | const char *arg, int unset __maybe_unused) |
702 | { | 707 | { |
703 | int lines; | 708 | int lines; |
704 | 709 | ||
@@ -768,7 +773,7 @@ static int __cmd_record(int argc, const char **argv) | |||
768 | return cmd_record(i, rec_argv, NULL); | 773 | return cmd_record(i, rec_argv, NULL); |
769 | } | 774 | } |
770 | 775 | ||
771 | int cmd_kmem(int argc, const char **argv, const char *prefix __used) | 776 | int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) |
772 | { | 777 | { |
773 | argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); | 778 | argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); |
774 | 779 | ||
@@ -780,7 +785,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) | |||
780 | if (!strncmp(argv[0], "rec", 3)) { | 785 | if (!strncmp(argv[0], "rec", 3)) { |
781 | return __cmd_record(argc, argv); | 786 | return __cmd_record(argc, argv); |
782 | } else if (!strcmp(argv[0], "stat")) { | 787 | } else if (!strcmp(argv[0], "stat")) { |
783 | setup_cpunode_map(); | 788 | if (setup_cpunode_map()) |
789 | return -1; | ||
784 | 790 | ||
785 | if (list_empty(&caller_sort)) | 791 | if (list_empty(&caller_sort)) |
786 | setup_sorting(&caller_sort, default_sort_order); | 792 | setup_sorting(&caller_sort, default_sort_order); |