diff options
author | Li Zefan <lizf@cn.fujitsu.com> | 2009-11-24 00:26:55 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-11-24 02:49:50 -0500 |
commit | 079d3f653134e2f2ac99dae28b08c0cc64268103 (patch) | |
tree | f2b5b4a7ca276f7db6c96c14eaa87d1fe01aef93 | |
parent | 7d0d39459dab20bf60cac30a1a7d50b286c60cc1 (diff) |
perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites
Show statistics for allocations and frees on different cpus:
------------------------------------------------------------------------------------------------------
Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag
------------------------------------------------------------------------------------------------------
perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11 | 0 | 5.011%
alloc_buffer_head+16 | 288/57 | 280/56 | 5 | 0 | 2.778%
radix_tree_preload+51 | 296/296 | 288/288 | 1 | 0 | 2.703%
tracepoint_add_probe+32e | 157/31 | 154/30 | 5 | 0 | 1.911%
do_maps_open+0 | 796/12 | 792/12 | 66 | 0 | 0.503%
sock_alloc_send_pskb+16e | 23780/495 | 23744/494 | 48 | 38 | 0.151%
anon_vma_prepare+9a | 3744/44 | 3740/44 | 85 | 0 | 0.107%
d_alloc+21 | 64948/164 | 64944/164 | 396 | 0 | 0.006%
proc_alloc_inode+23 | 262292/676 | 262288/676 | 388 | 0 | 0.002%
create_object+28 | 459600/200 | 459600/200 | 2298 | 71 | 0.000%
journal_start+67 | 14440/40 | 14440/40 | 361 | 0 | 0.000%
get_empty_filp+df | 53504/256 | 53504/256 | 209 | 0 | 0.000%
getname+2a | 823296/4096 | 823296/4096 | 201 | 0 | 0.000%
seq_read+2b0 | 544768/4096 | 544768/4096 | 133 | 0 | 0.000%
seq_open+6d | 17024/128 | 17024/128 | 133 | 0 | 0.000%
mmap_region+2e6 | 11704/88 | 11704/88 | 133 | 0 | 0.000%
single_open+0 | 1072/16 | 1072/16 | 67 | 0 | 0.000%
__alloc_skb+2e | 12544/256 | 12544/256 | 49 | 38 | 0.000%
__sigqueue_alloc+4a | 1296/144 | 1296/144 | 9 | 8 | 0.000%
tracepoint_add_probe+6f | 80/16 | 80/16 | 5 | 0 | 0.000%
------------------------------------------------------------------------------------------------------
...
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: linux-mm@kvack.org <linux-mm@kvack.org>
LKML-Reference: <4B0B6E9F.6020309@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | tools/perf/builtin-kmem.c | 122 |
1 files changed, 94 insertions, 28 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1ecf3f4415ce..173d6db42ecb 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -40,13 +40,14 @@ static int *cpunode_map; | |||
40 | static int max_cpu_num; | 40 | static int max_cpu_num; |
41 | 41 | ||
42 | struct alloc_stat { | 42 | struct alloc_stat { |
43 | union { | 43 | u64 call_site; |
44 | u64 call_site; | 44 | u64 ptr; |
45 | u64 ptr; | ||
46 | }; | ||
47 | u64 bytes_req; | 45 | u64 bytes_req; |
48 | u64 bytes_alloc; | 46 | u64 bytes_alloc; |
49 | u32 hit; | 47 | u32 hit; |
48 | u32 pingpong; | ||
49 | |||
50 | short alloc_cpu; | ||
50 | 51 | ||
51 | struct rb_node node; | 52 | struct rb_node node; |
52 | }; | 53 | }; |
@@ -144,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) | |||
144 | return 0; | 145 | return 0; |
145 | } | 146 | } |
146 | 147 | ||
147 | static void insert_alloc_stat(unsigned long ptr, | 148 | static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, |
148 | int bytes_req, int bytes_alloc) | 149 | int bytes_req, int bytes_alloc, int cpu) |
149 | { | 150 | { |
150 | struct rb_node **node = &root_alloc_stat.rb_node; | 151 | struct rb_node **node = &root_alloc_stat.rb_node; |
151 | struct rb_node *parent = NULL; | 152 | struct rb_node *parent = NULL; |
152 | struct alloc_stat *data = NULL; | 153 | struct alloc_stat *data = NULL; |
153 | 154 | ||
154 | if (!alloc_flag) | ||
155 | return; | ||
156 | |||
157 | while (*node) { | 155 | while (*node) { |
158 | parent = *node; | 156 | parent = *node; |
159 | data = rb_entry(*node, struct alloc_stat, node); | 157 | data = rb_entry(*node, struct alloc_stat, node); |
@@ -172,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr, | |||
172 | data->bytes_alloc += bytes_req; | 170 | data->bytes_alloc += bytes_req; |
173 | } else { | 171 | } else { |
174 | data = malloc(sizeof(*data)); | 172 | data = malloc(sizeof(*data)); |
173 | if (!data) | ||
174 | die("malloc"); | ||
175 | data->ptr = ptr; | 175 | data->ptr = ptr; |
176 | data->pingpong = 0; | ||
176 | data->hit = 1; | 177 | data->hit = 1; |
177 | data->bytes_req = bytes_req; | 178 | data->bytes_req = bytes_req; |
178 | data->bytes_alloc = bytes_alloc; | 179 | data->bytes_alloc = bytes_alloc; |
@@ -180,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr, | |||
180 | rb_link_node(&data->node, parent, node); | 181 | rb_link_node(&data->node, parent, node); |
181 | rb_insert_color(&data->node, &root_alloc_stat); | 182 | rb_insert_color(&data->node, &root_alloc_stat); |
182 | } | 183 | } |
184 | data->call_site = call_site; | ||
185 | data->alloc_cpu = cpu; | ||
183 | } | 186 | } |
184 | 187 | ||
185 | static void insert_caller_stat(unsigned long call_site, | 188 | static void insert_caller_stat(unsigned long call_site, |
@@ -189,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site, | |||
189 | struct rb_node *parent = NULL; | 192 | struct rb_node *parent = NULL; |
190 | struct alloc_stat *data = NULL; | 193 | struct alloc_stat *data = NULL; |
191 | 194 | ||
192 | if (!caller_flag) | ||
193 | return; | ||
194 | |||
195 | while (*node) { | 195 | while (*node) { |
196 | parent = *node; | 196 | parent = *node; |
197 | data = rb_entry(*node, struct alloc_stat, node); | 197 | data = rb_entry(*node, struct alloc_stat, node); |
@@ -210,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site, | |||
210 | data->bytes_alloc += bytes_req; | 210 | data->bytes_alloc += bytes_req; |
211 | } else { | 211 | } else { |
212 | data = malloc(sizeof(*data)); | 212 | data = malloc(sizeof(*data)); |
213 | if (!data) | ||
214 | die("malloc"); | ||
213 | data->call_site = call_site; | 215 | data->call_site = call_site; |
216 | data->pingpong = 0; | ||
214 | data->hit = 1; | 217 | data->hit = 1; |
215 | data->bytes_req = bytes_req; | 218 | data->bytes_req = bytes_req; |
216 | data->bytes_alloc = bytes_alloc; | 219 | data->bytes_alloc = bytes_alloc; |
@@ -238,7 +241,7 @@ static void process_alloc_event(struct raw_event_sample *raw, | |||
238 | bytes_req = raw_field_value(event, "bytes_req", raw->data); | 241 | bytes_req = raw_field_value(event, "bytes_req", raw->data); |
239 | bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); | 242 | bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); |
240 | 243 | ||
241 | insert_alloc_stat(ptr, bytes_req, bytes_alloc); | 244 | insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); |
242 | insert_caller_stat(call_site, bytes_req, bytes_alloc); | 245 | insert_caller_stat(call_site, bytes_req, bytes_alloc); |
243 | 246 | ||
244 | total_requested += bytes_req; | 247 | total_requested += bytes_req; |
@@ -253,12 +256,58 @@ static void process_alloc_event(struct raw_event_sample *raw, | |||
253 | nr_allocs++; | 256 | nr_allocs++; |
254 | } | 257 | } |
255 | 258 | ||
256 | static void process_free_event(struct raw_event_sample *raw __used, | 259 | static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); |
257 | struct event *event __used, | 260 | static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); |
258 | int cpu __used, | 261 | |
262 | static struct alloc_stat *search_alloc_stat(unsigned long ptr, | ||
263 | unsigned long call_site, | ||
264 | struct rb_root *root, | ||
265 | sort_fn_t sort_fn) | ||
266 | { | ||
267 | struct rb_node *node = root->rb_node; | ||
268 | struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; | ||
269 | |||
270 | while (node) { | ||
271 | struct alloc_stat *data; | ||
272 | int cmp; | ||
273 | |||
274 | data = rb_entry(node, struct alloc_stat, node); | ||
275 | |||
276 | cmp = sort_fn(&key, data); | ||
277 | if (cmp < 0) | ||
278 | node = node->rb_left; | ||
279 | else if (cmp > 0) | ||
280 | node = node->rb_right; | ||
281 | else | ||
282 | return data; | ||
283 | } | ||
284 | return NULL; | ||
285 | } | ||
286 | |||
287 | static void process_free_event(struct raw_event_sample *raw, | ||
288 | struct event *event, | ||
289 | int cpu, | ||
259 | u64 timestamp __used, | 290 | u64 timestamp __used, |
260 | struct thread *thread __used) | 291 | struct thread *thread __used) |
261 | { | 292 | { |
293 | unsigned long ptr; | ||
294 | struct alloc_stat *s_alloc, *s_caller; | ||
295 | |||
296 | ptr = raw_field_value(event, "ptr", raw->data); | ||
297 | |||
298 | s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); | ||
299 | if (!s_alloc) | ||
300 | return; | ||
301 | |||
302 | if (cpu != s_alloc->alloc_cpu) { | ||
303 | s_alloc->pingpong++; | ||
304 | |||
305 | s_caller = search_alloc_stat(0, s_alloc->call_site, | ||
306 | &root_caller_stat, callsite_cmp); | ||
307 | assert(s_caller); | ||
308 | s_caller->pingpong++; | ||
309 | } | ||
310 | s_alloc->alloc_cpu = -1; | ||
262 | } | 311 | } |
263 | 312 | ||
264 | static void | 313 | static void |
@@ -379,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) | |||
379 | { | 428 | { |
380 | struct rb_node *next; | 429 | struct rb_node *next; |
381 | 430 | ||
382 | printf("%.78s\n", graph_dotted_line); | 431 | printf("%.102s\n", graph_dotted_line); |
383 | printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); | 432 | printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); |
384 | printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); | 433 | printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); |
385 | printf("%.78s\n", graph_dotted_line); | 434 | printf("%.102s\n", graph_dotted_line); |
386 | 435 | ||
387 | next = rb_first(root); | 436 | next = rb_first(root); |
388 | 437 | ||
@@ -390,7 +439,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) | |||
390 | struct alloc_stat *data = rb_entry(next, struct alloc_stat, | 439 | struct alloc_stat *data = rb_entry(next, struct alloc_stat, |
391 | node); | 440 | node); |
392 | struct symbol *sym = NULL; | 441 | struct symbol *sym = NULL; |
393 | char bf[BUFSIZ]; | 442 | char buf[BUFSIZ]; |
394 | u64 addr; | 443 | u64 addr; |
395 | 444 | ||
396 | if (is_caller) { | 445 | if (is_caller) { |
@@ -402,26 +451,28 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) | |||
402 | addr = data->ptr; | 451 | addr = data->ptr; |
403 | 452 | ||
404 | if (sym != NULL) | 453 | if (sym != NULL) |
405 | snprintf(bf, sizeof(bf), "%s+%Lx", sym->name, | 454 | snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, |
406 | addr - sym->start); | 455 | addr - sym->start); |
407 | else | 456 | else |
408 | snprintf(bf, sizeof(bf), "%#Lx", addr); | 457 | snprintf(buf, sizeof(buf), "%#Lx", addr); |
458 | printf(" %-34s |", buf); | ||
409 | 459 | ||
410 | printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", | 460 | printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", |
411 | bf, (unsigned long long)data->bytes_alloc, | 461 | (unsigned long long)data->bytes_alloc, |
412 | (unsigned long)data->bytes_alloc / data->hit, | 462 | (unsigned long)data->bytes_alloc / data->hit, |
413 | (unsigned long long)data->bytes_req, | 463 | (unsigned long long)data->bytes_req, |
414 | (unsigned long)data->bytes_req / data->hit, | 464 | (unsigned long)data->bytes_req / data->hit, |
415 | (unsigned long)data->hit, | 465 | (unsigned long)data->hit, |
466 | (unsigned long)data->pingpong, | ||
416 | fragmentation(data->bytes_req, data->bytes_alloc)); | 467 | fragmentation(data->bytes_req, data->bytes_alloc)); |
417 | 468 | ||
418 | next = rb_next(next); | 469 | next = rb_next(next); |
419 | } | 470 | } |
420 | 471 | ||
421 | if (n_lines == -1) | 472 | if (n_lines == -1) |
422 | printf(" ... | ... | ... | ... | ... \n"); | 473 | printf(" ... | ... | ... | ... | ... | ... \n"); |
423 | 474 | ||
424 | printf("%.78s\n", graph_dotted_line); | 475 | printf("%.102s\n", graph_dotted_line); |
425 | } | 476 | } |
426 | 477 | ||
427 | static void print_summary(void) | 478 | static void print_summary(void) |
@@ -597,12 +648,27 @@ static struct sort_dimension frag_sort_dimension = { | |||
597 | .cmp = frag_cmp, | 648 | .cmp = frag_cmp, |
598 | }; | 649 | }; |
599 | 650 | ||
651 | static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) | ||
652 | { | ||
653 | if (l->pingpong < r->pingpong) | ||
654 | return -1; | ||
655 | else if (l->pingpong > r->pingpong) | ||
656 | return 1; | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static struct sort_dimension pingpong_sort_dimension = { | ||
661 | .name = "pingpong", | ||
662 | .cmp = pingpong_cmp, | ||
663 | }; | ||
664 | |||
600 | static struct sort_dimension *avail_sorts[] = { | 665 | static struct sort_dimension *avail_sorts[] = { |
601 | &ptr_sort_dimension, | 666 | &ptr_sort_dimension, |
602 | &callsite_sort_dimension, | 667 | &callsite_sort_dimension, |
603 | &hit_sort_dimension, | 668 | &hit_sort_dimension, |
604 | &bytes_sort_dimension, | 669 | &bytes_sort_dimension, |
605 | &frag_sort_dimension, | 670 | &frag_sort_dimension, |
671 | &pingpong_sort_dimension, | ||
606 | }; | 672 | }; |
607 | 673 | ||
608 | #define NUM_AVAIL_SORTS \ | 674 | #define NUM_AVAIL_SORTS \ |
@@ -703,7 +769,7 @@ static const struct option kmem_options[] = { | |||
703 | "stat selector, Pass 'alloc' or 'caller'.", | 769 | "stat selector, Pass 'alloc' or 'caller'.", |
704 | parse_stat_opt), | 770 | parse_stat_opt), |
705 | OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", | 771 | OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", |
706 | "sort by key(s): ptr, call_site, bytes, hit, frag", | 772 | "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", |
707 | parse_sort_opt), | 773 | parse_sort_opt), |
708 | OPT_CALLBACK('l', "line", NULL, "num", | 774 | OPT_CALLBACK('l', "line", NULL, "num", |
709 | "show n lins", | 775 | "show n lins", |