aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLi Zefan <lizf@cn.fujitsu.com>2009-11-24 00:26:55 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-24 02:49:50 -0500
commit079d3f653134e2f2ac99dae28b08c0cc64268103 (patch)
treef2b5b4a7ca276f7db6c96c14eaa87d1fe01aef93
parent7d0d39459dab20bf60cac30a1a7d50b286c60cc1 (diff)
perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites
Show statistics for allocations and frees on different cpus: ------------------------------------------------------------------------------------------------------ Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag ------------------------------------------------------------------------------------------------------ perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11 | 0 | 5.011% alloc_buffer_head+16 | 288/57 | 280/56 | 5 | 0 | 2.778% radix_tree_preload+51 | 296/296 | 288/288 | 1 | 0 | 2.703% tracepoint_add_probe+32e | 157/31 | 154/30 | 5 | 0 | 1.911% do_maps_open+0 | 796/12 | 792/12 | 66 | 0 | 0.503% sock_alloc_send_pskb+16e | 23780/495 | 23744/494 | 48 | 38 | 0.151% anon_vma_prepare+9a | 3744/44 | 3740/44 | 85 | 0 | 0.107% d_alloc+21 | 64948/164 | 64944/164 | 396 | 0 | 0.006% proc_alloc_inode+23 | 262292/676 | 262288/676 | 388 | 0 | 0.002% create_object+28 | 459600/200 | 459600/200 | 2298 | 71 | 0.000% journal_start+67 | 14440/40 | 14440/40 | 361 | 0 | 0.000% get_empty_filp+df | 53504/256 | 53504/256 | 209 | 0 | 0.000% getname+2a | 823296/4096 | 823296/4096 | 201 | 0 | 0.000% seq_read+2b0 | 544768/4096 | 544768/4096 | 133 | 0 | 0.000% seq_open+6d | 17024/128 | 17024/128 | 133 | 0 | 0.000% mmap_region+2e6 | 11704/88 | 11704/88 | 133 | 0 | 0.000% single_open+0 | 1072/16 | 1072/16 | 67 | 0 | 0.000% __alloc_skb+2e | 12544/256 | 12544/256 | 49 | 38 | 0.000% __sigqueue_alloc+4a | 1296/144 | 1296/144 | 9 | 8 | 0.000% tracepoint_add_probe+6f | 80/16 | 80/16 | 5 | 0 | 0.000% ------------------------------------------------------------------------------------------------------ ... Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: linux-mm@kvack.org <linux-mm@kvack.org> LKML-Reference: <4B0B6E9F.6020309@cn.fujitsu.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--tools/perf/builtin-kmem.c122
1 files changed, 94 insertions, 28 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1ecf3f4415ce..173d6db42ecb 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -40,13 +40,14 @@ static int *cpunode_map;
40static int max_cpu_num; 40static int max_cpu_num;
41 41
42struct alloc_stat { 42struct alloc_stat {
43 union { 43 u64 call_site;
44 u64 call_site; 44 u64 ptr;
45 u64 ptr;
46 };
47 u64 bytes_req; 45 u64 bytes_req;
48 u64 bytes_alloc; 46 u64 bytes_alloc;
49 u32 hit; 47 u32 hit;
48 u32 pingpong;
49
50 short alloc_cpu;
50 51
51 struct rb_node node; 52 struct rb_node node;
52}; 53};
@@ -144,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
144 return 0; 145 return 0;
145} 146}
146 147
147static void insert_alloc_stat(unsigned long ptr, 148static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
148 int bytes_req, int bytes_alloc) 149 int bytes_req, int bytes_alloc, int cpu)
149{ 150{
150 struct rb_node **node = &root_alloc_stat.rb_node; 151 struct rb_node **node = &root_alloc_stat.rb_node;
151 struct rb_node *parent = NULL; 152 struct rb_node *parent = NULL;
152 struct alloc_stat *data = NULL; 153 struct alloc_stat *data = NULL;
153 154
154 if (!alloc_flag)
155 return;
156
157 while (*node) { 155 while (*node) {
158 parent = *node; 156 parent = *node;
159 data = rb_entry(*node, struct alloc_stat, node); 157 data = rb_entry(*node, struct alloc_stat, node);
@@ -172,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr,
172 data->bytes_alloc += bytes_req; 170 data->bytes_alloc += bytes_req;
173 } else { 171 } else {
174 data = malloc(sizeof(*data)); 172 data = malloc(sizeof(*data));
173 if (!data)
174 die("malloc");
175 data->ptr = ptr; 175 data->ptr = ptr;
176 data->pingpong = 0;
176 data->hit = 1; 177 data->hit = 1;
177 data->bytes_req = bytes_req; 178 data->bytes_req = bytes_req;
178 data->bytes_alloc = bytes_alloc; 179 data->bytes_alloc = bytes_alloc;
@@ -180,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr,
180 rb_link_node(&data->node, parent, node); 181 rb_link_node(&data->node, parent, node);
181 rb_insert_color(&data->node, &root_alloc_stat); 182 rb_insert_color(&data->node, &root_alloc_stat);
182 } 183 }
184 data->call_site = call_site;
185 data->alloc_cpu = cpu;
183} 186}
184 187
185static void insert_caller_stat(unsigned long call_site, 188static void insert_caller_stat(unsigned long call_site,
@@ -189,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site,
189 struct rb_node *parent = NULL; 192 struct rb_node *parent = NULL;
190 struct alloc_stat *data = NULL; 193 struct alloc_stat *data = NULL;
191 194
192 if (!caller_flag)
193 return;
194
195 while (*node) { 195 while (*node) {
196 parent = *node; 196 parent = *node;
197 data = rb_entry(*node, struct alloc_stat, node); 197 data = rb_entry(*node, struct alloc_stat, node);
@@ -210,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site,
210 data->bytes_alloc += bytes_req; 210 data->bytes_alloc += bytes_req;
211 } else { 211 } else {
212 data = malloc(sizeof(*data)); 212 data = malloc(sizeof(*data));
213 if (!data)
214 die("malloc");
213 data->call_site = call_site; 215 data->call_site = call_site;
216 data->pingpong = 0;
214 data->hit = 1; 217 data->hit = 1;
215 data->bytes_req = bytes_req; 218 data->bytes_req = bytes_req;
216 data->bytes_alloc = bytes_alloc; 219 data->bytes_alloc = bytes_alloc;
@@ -238,7 +241,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
238 bytes_req = raw_field_value(event, "bytes_req", raw->data); 241 bytes_req = raw_field_value(event, "bytes_req", raw->data);
239 bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); 242 bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
240 243
241 insert_alloc_stat(ptr, bytes_req, bytes_alloc); 244 insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
242 insert_caller_stat(call_site, bytes_req, bytes_alloc); 245 insert_caller_stat(call_site, bytes_req, bytes_alloc);
243 246
244 total_requested += bytes_req; 247 total_requested += bytes_req;
@@ -253,12 +256,58 @@ static void process_alloc_event(struct raw_event_sample *raw,
253 nr_allocs++; 256 nr_allocs++;
254} 257}
255 258
256static void process_free_event(struct raw_event_sample *raw __used, 259static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
257 struct event *event __used, 260static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
258 int cpu __used, 261
262static struct alloc_stat *search_alloc_stat(unsigned long ptr,
263 unsigned long call_site,
264 struct rb_root *root,
265 sort_fn_t sort_fn)
266{
267 struct rb_node *node = root->rb_node;
268 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
269
270 while (node) {
271 struct alloc_stat *data;
272 int cmp;
273
274 data = rb_entry(node, struct alloc_stat, node);
275
276 cmp = sort_fn(&key, data);
277 if (cmp < 0)
278 node = node->rb_left;
279 else if (cmp > 0)
280 node = node->rb_right;
281 else
282 return data;
283 }
284 return NULL;
285}
286
287static void process_free_event(struct raw_event_sample *raw,
288 struct event *event,
289 int cpu,
259 u64 timestamp __used, 290 u64 timestamp __used,
260 struct thread *thread __used) 291 struct thread *thread __used)
261{ 292{
293 unsigned long ptr;
294 struct alloc_stat *s_alloc, *s_caller;
295
296 ptr = raw_field_value(event, "ptr", raw->data);
297
298 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
299 if (!s_alloc)
300 return;
301
302 if (cpu != s_alloc->alloc_cpu) {
303 s_alloc->pingpong++;
304
305 s_caller = search_alloc_stat(0, s_alloc->call_site,
306 &root_caller_stat, callsite_cmp);
307 assert(s_caller);
308 s_caller->pingpong++;
309 }
310 s_alloc->alloc_cpu = -1;
262} 311}
263 312
264static void 313static void
@@ -379,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
379{ 428{
380 struct rb_node *next; 429 struct rb_node *next;
381 430
382 printf("%.78s\n", graph_dotted_line); 431 printf("%.102s\n", graph_dotted_line);
383 printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); 432 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
384 printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); 433 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
385 printf("%.78s\n", graph_dotted_line); 434 printf("%.102s\n", graph_dotted_line);
386 435
387 next = rb_first(root); 436 next = rb_first(root);
388 437
@@ -390,7 +439,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
390 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 439 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
391 node); 440 node);
392 struct symbol *sym = NULL; 441 struct symbol *sym = NULL;
393 char bf[BUFSIZ]; 442 char buf[BUFSIZ];
394 u64 addr; 443 u64 addr;
395 444
396 if (is_caller) { 445 if (is_caller) {
@@ -402,26 +451,28 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
402 addr = data->ptr; 451 addr = data->ptr;
403 452
404 if (sym != NULL) 453 if (sym != NULL)
405 snprintf(bf, sizeof(bf), "%s+%Lx", sym->name, 454 snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
406 addr - sym->start); 455 addr - sym->start);
407 else 456 else
408 snprintf(bf, sizeof(bf), "%#Lx", addr); 457 snprintf(buf, sizeof(buf), "%#Lx", addr);
458 printf(" %-34s |", buf);
409 459
410 printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", 460 printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
411 bf, (unsigned long long)data->bytes_alloc, 461 (unsigned long long)data->bytes_alloc,
412 (unsigned long)data->bytes_alloc / data->hit, 462 (unsigned long)data->bytes_alloc / data->hit,
413 (unsigned long long)data->bytes_req, 463 (unsigned long long)data->bytes_req,
414 (unsigned long)data->bytes_req / data->hit, 464 (unsigned long)data->bytes_req / data->hit,
415 (unsigned long)data->hit, 465 (unsigned long)data->hit,
466 (unsigned long)data->pingpong,
416 fragmentation(data->bytes_req, data->bytes_alloc)); 467 fragmentation(data->bytes_req, data->bytes_alloc));
417 468
418 next = rb_next(next); 469 next = rb_next(next);
419 } 470 }
420 471
421 if (n_lines == -1) 472 if (n_lines == -1)
422 printf(" ... | ... | ... | ... | ... \n"); 473 printf(" ... | ... | ... | ... | ... | ... \n");
423 474
424 printf("%.78s\n", graph_dotted_line); 475 printf("%.102s\n", graph_dotted_line);
425} 476}
426 477
427static void print_summary(void) 478static void print_summary(void)
@@ -597,12 +648,27 @@ static struct sort_dimension frag_sort_dimension = {
597 .cmp = frag_cmp, 648 .cmp = frag_cmp,
598}; 649};
599 650
651static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
652{
653 if (l->pingpong < r->pingpong)
654 return -1;
655 else if (l->pingpong > r->pingpong)
656 return 1;
657 return 0;
658}
659
660static struct sort_dimension pingpong_sort_dimension = {
661 .name = "pingpong",
662 .cmp = pingpong_cmp,
663};
664
600static struct sort_dimension *avail_sorts[] = { 665static struct sort_dimension *avail_sorts[] = {
601 &ptr_sort_dimension, 666 &ptr_sort_dimension,
602 &callsite_sort_dimension, 667 &callsite_sort_dimension,
603 &hit_sort_dimension, 668 &hit_sort_dimension,
604 &bytes_sort_dimension, 669 &bytes_sort_dimension,
605 &frag_sort_dimension, 670 &frag_sort_dimension,
671 &pingpong_sort_dimension,
606}; 672};
607 673
608#define NUM_AVAIL_SORTS \ 674#define NUM_AVAIL_SORTS \
@@ -703,7 +769,7 @@ static const struct option kmem_options[] = {
703 "stat selector, Pass 'alloc' or 'caller'.", 769 "stat selector, Pass 'alloc' or 'caller'.",
704 parse_stat_opt), 770 parse_stat_opt),
705 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 771 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
706 "sort by key(s): ptr, call_site, bytes, hit, frag", 772 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
707 parse_sort_opt), 773 parse_sort_opt),
708 OPT_CALLBACK('l', "line", NULL, "num", 774 OPT_CALLBACK('l', "line", NULL, "num",
709 "show n lins", 775 "show n lins",