diff options
Diffstat (limited to 'tools/perf/builtin-lock.c')
-rw-r--r-- | tools/perf/builtin-lock.c | 693 |
1 files changed, 438 insertions, 255 deletions
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e12c844df1e2..821c1586a22b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
@@ -23,6 +23,8 @@ | |||
23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
24 | #include <linux/hash.h> | 24 | #include <linux/hash.h> |
25 | 25 | ||
26 | static struct perf_session *session; | ||
27 | |||
26 | /* based on kernel/lockdep.c */ | 28 | /* based on kernel/lockdep.c */ |
27 | #define LOCKHASH_BITS 12 | 29 | #define LOCKHASH_BITS 12 |
28 | #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) | 30 | #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) |
@@ -32,9 +34,6 @@ static struct list_head lockhash_table[LOCKHASH_SIZE]; | |||
32 | #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) | 34 | #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) |
33 | #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) | 35 | #define lockhashentry(key) (lockhash_table + __lockhashfn((key))) |
34 | 36 | ||
35 | #define LOCK_STATE_UNLOCKED 0 /* initial state */ | ||
36 | #define LOCK_STATE_LOCKED 1 | ||
37 | |||
38 | struct lock_stat { | 37 | struct lock_stat { |
39 | struct list_head hash_entry; | 38 | struct list_head hash_entry; |
40 | struct rb_node rb; /* used for sorting */ | 39 | struct rb_node rb; /* used for sorting */ |
@@ -47,20 +46,151 @@ struct lock_stat { | |||
47 | void *addr; /* address of lockdep_map, used as ID */ | 46 | void *addr; /* address of lockdep_map, used as ID */ |
48 | char *name; /* for strcpy(), we cannot use const */ | 47 | char *name; /* for strcpy(), we cannot use const */ |
49 | 48 | ||
50 | int state; | ||
51 | u64 prev_event_time; /* timestamp of previous event */ | ||
52 | |||
53 | unsigned int nr_acquired; | ||
54 | unsigned int nr_acquire; | 49 | unsigned int nr_acquire; |
50 | unsigned int nr_acquired; | ||
55 | unsigned int nr_contended; | 51 | unsigned int nr_contended; |
56 | unsigned int nr_release; | 52 | unsigned int nr_release; |
57 | 53 | ||
54 | unsigned int nr_readlock; | ||
55 | unsigned int nr_trylock; | ||
58 | /* these times are in nano sec. */ | 56 | /* these times are in nano sec. */ |
59 | u64 wait_time_total; | 57 | u64 wait_time_total; |
60 | u64 wait_time_min; | 58 | u64 wait_time_min; |
61 | u64 wait_time_max; | 59 | u64 wait_time_max; |
60 | |||
61 | int discard; /* flag of blacklist */ | ||
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* | ||
65 | * States of lock_seq_stat | ||
66 | * | ||
67 | * UNINITIALIZED is required for detecting first event of acquire. | ||
68 | * As the nature of lock events, there is no guarantee | ||
69 | * that the first event for the locks are acquire, | ||
70 | * it can be acquired, contended or release. | ||
71 | */ | ||
72 | #define SEQ_STATE_UNINITIALIZED 0 /* initial state */ | ||
73 | #define SEQ_STATE_RELEASED 1 | ||
74 | #define SEQ_STATE_ACQUIRING 2 | ||
75 | #define SEQ_STATE_ACQUIRED 3 | ||
76 | #define SEQ_STATE_READ_ACQUIRED 4 | ||
77 | #define SEQ_STATE_CONTENDED 5 | ||
78 | |||
79 | /* | ||
80 | * MAX_LOCK_DEPTH | ||
81 | * Imported from include/linux/sched.h. | ||
82 | * Should this be synchronized? | ||
83 | */ | ||
84 | #define MAX_LOCK_DEPTH 48 | ||
85 | |||
86 | /* | ||
87 | * struct lock_seq_stat: | ||
88 | * Place to put on state of one lock sequence | ||
89 | * 1) acquire -> acquired -> release | ||
90 | * 2) acquire -> contended -> acquired -> release | ||
91 | * 3) acquire (with read or try) -> release | ||
92 | * 4) Are there other patterns? | ||
93 | */ | ||
94 | struct lock_seq_stat { | ||
95 | struct list_head list; | ||
96 | int state; | ||
97 | u64 prev_event_time; | ||
98 | void *addr; | ||
99 | |||
100 | int read_count; | ||
101 | }; | ||
102 | |||
103 | struct thread_stat { | ||
104 | struct rb_node rb; | ||
105 | |||
106 | u32 tid; | ||
107 | struct list_head seq_list; | ||
108 | }; | ||
109 | |||
110 | static struct rb_root thread_stats; | ||
111 | |||
112 | static struct thread_stat *thread_stat_find(u32 tid) | ||
113 | { | ||
114 | struct rb_node *node; | ||
115 | struct thread_stat *st; | ||
116 | |||
117 | node = thread_stats.rb_node; | ||
118 | while (node) { | ||
119 | st = container_of(node, struct thread_stat, rb); | ||
120 | if (st->tid == tid) | ||
121 | return st; | ||
122 | else if (tid < st->tid) | ||
123 | node = node->rb_left; | ||
124 | else | ||
125 | node = node->rb_right; | ||
126 | } | ||
127 | |||
128 | return NULL; | ||
129 | } | ||
130 | |||
131 | static void thread_stat_insert(struct thread_stat *new) | ||
132 | { | ||
133 | struct rb_node **rb = &thread_stats.rb_node; | ||
134 | struct rb_node *parent = NULL; | ||
135 | struct thread_stat *p; | ||
136 | |||
137 | while (*rb) { | ||
138 | p = container_of(*rb, struct thread_stat, rb); | ||
139 | parent = *rb; | ||
140 | |||
141 | if (new->tid < p->tid) | ||
142 | rb = &(*rb)->rb_left; | ||
143 | else if (new->tid > p->tid) | ||
144 | rb = &(*rb)->rb_right; | ||
145 | else | ||
146 | BUG_ON("inserting invalid thread_stat\n"); | ||
147 | } | ||
148 | |||
149 | rb_link_node(&new->rb, parent, rb); | ||
150 | rb_insert_color(&new->rb, &thread_stats); | ||
151 | } | ||
152 | |||
153 | static struct thread_stat *thread_stat_findnew_after_first(u32 tid) | ||
154 | { | ||
155 | struct thread_stat *st; | ||
156 | |||
157 | st = thread_stat_find(tid); | ||
158 | if (st) | ||
159 | return st; | ||
160 | |||
161 | st = zalloc(sizeof(struct thread_stat)); | ||
162 | if (!st) | ||
163 | die("memory allocation failed\n"); | ||
164 | |||
165 | st->tid = tid; | ||
166 | INIT_LIST_HEAD(&st->seq_list); | ||
167 | |||
168 | thread_stat_insert(st); | ||
169 | |||
170 | return st; | ||
171 | } | ||
172 | |||
173 | static struct thread_stat *thread_stat_findnew_first(u32 tid); | ||
174 | static struct thread_stat *(*thread_stat_findnew)(u32 tid) = | ||
175 | thread_stat_findnew_first; | ||
176 | |||
177 | static struct thread_stat *thread_stat_findnew_first(u32 tid) | ||
178 | { | ||
179 | struct thread_stat *st; | ||
180 | |||
181 | st = zalloc(sizeof(struct thread_stat)); | ||
182 | if (!st) | ||
183 | die("memory allocation failed\n"); | ||
184 | st->tid = tid; | ||
185 | INIT_LIST_HEAD(&st->seq_list); | ||
186 | |||
187 | rb_link_node(&st->rb, NULL, &thread_stats.rb_node); | ||
188 | rb_insert_color(&st->rb, &thread_stats); | ||
189 | |||
190 | thread_stat_findnew = thread_stat_findnew_after_first; | ||
191 | return st; | ||
192 | } | ||
193 | |||
64 | /* build simple key function one is bigger than two */ | 194 | /* build simple key function one is bigger than two */ |
65 | #define SINGLE_KEY(member) \ | 195 | #define SINGLE_KEY(member) \ |
66 | static int lock_stat_key_ ## member(struct lock_stat *one, \ | 196 | static int lock_stat_key_ ## member(struct lock_stat *one, \ |
@@ -175,8 +305,6 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name) | |||
175 | goto alloc_failed; | 305 | goto alloc_failed; |
176 | strcpy(new->name, name); | 306 | strcpy(new->name, name); |
177 | 307 | ||
178 | /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */ | ||
179 | new->state = LOCK_STATE_UNLOCKED; | ||
180 | new->wait_time_min = ULLONG_MAX; | 308 | new->wait_time_min = ULLONG_MAX; |
181 | 309 | ||
182 | list_add(&new->hash_entry, entry); | 310 | list_add(&new->hash_entry, entry); |
@@ -188,8 +316,6 @@ alloc_failed: | |||
188 | 316 | ||
189 | static char const *input_name = "perf.data"; | 317 | static char const *input_name = "perf.data"; |
190 | 318 | ||
191 | static int profile_cpu = -1; | ||
192 | |||
193 | struct raw_event_sample { | 319 | struct raw_event_sample { |
194 | u32 size; | 320 | u32 size; |
195 | char data[0]; | 321 | char data[0]; |
@@ -198,6 +324,7 @@ struct raw_event_sample { | |||
198 | struct trace_acquire_event { | 324 | struct trace_acquire_event { |
199 | void *addr; | 325 | void *addr; |
200 | const char *name; | 326 | const char *name; |
327 | int flag; | ||
201 | }; | 328 | }; |
202 | 329 | ||
203 | struct trace_acquired_event { | 330 | struct trace_acquired_event { |
@@ -241,120 +368,258 @@ struct trace_lock_handler { | |||
241 | struct thread *thread); | 368 | struct thread *thread); |
242 | }; | 369 | }; |
243 | 370 | ||
371 | static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) | ||
372 | { | ||
373 | struct lock_seq_stat *seq; | ||
374 | |||
375 | list_for_each_entry(seq, &ts->seq_list, list) { | ||
376 | if (seq->addr == addr) | ||
377 | return seq; | ||
378 | } | ||
379 | |||
380 | seq = zalloc(sizeof(struct lock_seq_stat)); | ||
381 | if (!seq) | ||
382 | die("Not enough memory\n"); | ||
383 | seq->state = SEQ_STATE_UNINITIALIZED; | ||
384 | seq->addr = addr; | ||
385 | |||
386 | list_add(&seq->list, &ts->seq_list); | ||
387 | return seq; | ||
388 | } | ||
389 | |||
390 | enum broken_state { | ||
391 | BROKEN_ACQUIRE, | ||
392 | BROKEN_ACQUIRED, | ||
393 | BROKEN_CONTENDED, | ||
394 | BROKEN_RELEASE, | ||
395 | BROKEN_MAX, | ||
396 | }; | ||
397 | |||
398 | static int bad_hist[BROKEN_MAX]; | ||
399 | |||
400 | enum acquire_flags { | ||
401 | TRY_LOCK = 1, | ||
402 | READ_LOCK = 2, | ||
403 | }; | ||
404 | |||
244 | static void | 405 | static void |
245 | report_lock_acquire_event(struct trace_acquire_event *acquire_event, | 406 | report_lock_acquire_event(struct trace_acquire_event *acquire_event, |
246 | struct event *__event __used, | 407 | struct event *__event __used, |
247 | int cpu __used, | 408 | int cpu __used, |
248 | u64 timestamp, | 409 | u64 timestamp __used, |
249 | struct thread *thread __used) | 410 | struct thread *thread __used) |
250 | { | 411 | { |
251 | struct lock_stat *st; | 412 | struct lock_stat *ls; |
413 | struct thread_stat *ts; | ||
414 | struct lock_seq_stat *seq; | ||
415 | |||
416 | ls = lock_stat_findnew(acquire_event->addr, acquire_event->name); | ||
417 | if (ls->discard) | ||
418 | return; | ||
252 | 419 | ||
253 | st = lock_stat_findnew(acquire_event->addr, acquire_event->name); | 420 | ts = thread_stat_findnew(thread->pid); |
421 | seq = get_seq(ts, acquire_event->addr); | ||
254 | 422 | ||
255 | switch (st->state) { | 423 | switch (seq->state) { |
256 | case LOCK_STATE_UNLOCKED: | 424 | case SEQ_STATE_UNINITIALIZED: |
425 | case SEQ_STATE_RELEASED: | ||
426 | if (!acquire_event->flag) { | ||
427 | seq->state = SEQ_STATE_ACQUIRING; | ||
428 | } else { | ||
429 | if (acquire_event->flag & TRY_LOCK) | ||
430 | ls->nr_trylock++; | ||
431 | if (acquire_event->flag & READ_LOCK) | ||
432 | ls->nr_readlock++; | ||
433 | seq->state = SEQ_STATE_READ_ACQUIRED; | ||
434 | seq->read_count = 1; | ||
435 | ls->nr_acquired++; | ||
436 | } | ||
437 | break; | ||
438 | case SEQ_STATE_READ_ACQUIRED: | ||
439 | if (acquire_event->flag & READ_LOCK) { | ||
440 | seq->read_count++; | ||
441 | ls->nr_acquired++; | ||
442 | goto end; | ||
443 | } else { | ||
444 | goto broken; | ||
445 | } | ||
257 | break; | 446 | break; |
258 | case LOCK_STATE_LOCKED: | 447 | case SEQ_STATE_ACQUIRED: |
448 | case SEQ_STATE_ACQUIRING: | ||
449 | case SEQ_STATE_CONTENDED: | ||
450 | broken: | ||
451 | /* broken lock sequence, discard it */ | ||
452 | ls->discard = 1; | ||
453 | bad_hist[BROKEN_ACQUIRE]++; | ||
454 | list_del(&seq->list); | ||
455 | free(seq); | ||
456 | goto end; | ||
259 | break; | 457 | break; |
260 | default: | 458 | default: |
261 | BUG_ON(1); | 459 | BUG_ON("Unknown state of lock sequence found!\n"); |
262 | break; | 460 | break; |
263 | } | 461 | } |
264 | 462 | ||
265 | st->prev_event_time = timestamp; | 463 | ls->nr_acquire++; |
464 | seq->prev_event_time = timestamp; | ||
465 | end: | ||
466 | return; | ||
266 | } | 467 | } |
267 | 468 | ||
268 | static void | 469 | static void |
269 | report_lock_acquired_event(struct trace_acquired_event *acquired_event, | 470 | report_lock_acquired_event(struct trace_acquired_event *acquired_event, |
270 | struct event *__event __used, | 471 | struct event *__event __used, |
271 | int cpu __used, | 472 | int cpu __used, |
272 | u64 timestamp, | 473 | u64 timestamp __used, |
273 | struct thread *thread __used) | 474 | struct thread *thread __used) |
274 | { | 475 | { |
275 | struct lock_stat *st; | 476 | struct lock_stat *ls; |
477 | struct thread_stat *ts; | ||
478 | struct lock_seq_stat *seq; | ||
479 | u64 contended_term; | ||
480 | |||
481 | ls = lock_stat_findnew(acquired_event->addr, acquired_event->name); | ||
482 | if (ls->discard) | ||
483 | return; | ||
276 | 484 | ||
277 | st = lock_stat_findnew(acquired_event->addr, acquired_event->name); | 485 | ts = thread_stat_findnew(thread->pid); |
486 | seq = get_seq(ts, acquired_event->addr); | ||
278 | 487 | ||
279 | switch (st->state) { | 488 | switch (seq->state) { |
280 | case LOCK_STATE_UNLOCKED: | 489 | case SEQ_STATE_UNINITIALIZED: |
281 | st->state = LOCK_STATE_LOCKED; | 490 | /* orphan event, do nothing */ |
282 | st->nr_acquired++; | 491 | return; |
492 | case SEQ_STATE_ACQUIRING: | ||
493 | break; | ||
494 | case SEQ_STATE_CONTENDED: | ||
495 | contended_term = timestamp - seq->prev_event_time; | ||
496 | ls->wait_time_total += contended_term; | ||
497 | if (contended_term < ls->wait_time_min) | ||
498 | ls->wait_time_min = contended_term; | ||
499 | if (ls->wait_time_max < contended_term) | ||
500 | ls->wait_time_max = contended_term; | ||
283 | break; | 501 | break; |
284 | case LOCK_STATE_LOCKED: | 502 | case SEQ_STATE_RELEASED: |
503 | case SEQ_STATE_ACQUIRED: | ||
504 | case SEQ_STATE_READ_ACQUIRED: | ||
505 | /* broken lock sequence, discard it */ | ||
506 | ls->discard = 1; | ||
507 | bad_hist[BROKEN_ACQUIRED]++; | ||
508 | list_del(&seq->list); | ||
509 | free(seq); | ||
510 | goto end; | ||
285 | break; | 511 | break; |
512 | |||
286 | default: | 513 | default: |
287 | BUG_ON(1); | 514 | BUG_ON("Unknown state of lock sequence found!\n"); |
288 | break; | 515 | break; |
289 | } | 516 | } |
290 | 517 | ||
291 | st->prev_event_time = timestamp; | 518 | seq->state = SEQ_STATE_ACQUIRED; |
519 | ls->nr_acquired++; | ||
520 | seq->prev_event_time = timestamp; | ||
521 | end: | ||
522 | return; | ||
292 | } | 523 | } |
293 | 524 | ||
294 | static void | 525 | static void |
295 | report_lock_contended_event(struct trace_contended_event *contended_event, | 526 | report_lock_contended_event(struct trace_contended_event *contended_event, |
296 | struct event *__event __used, | 527 | struct event *__event __used, |
297 | int cpu __used, | 528 | int cpu __used, |
298 | u64 timestamp, | 529 | u64 timestamp __used, |
299 | struct thread *thread __used) | 530 | struct thread *thread __used) |
300 | { | 531 | { |
301 | struct lock_stat *st; | 532 | struct lock_stat *ls; |
533 | struct thread_stat *ts; | ||
534 | struct lock_seq_stat *seq; | ||
302 | 535 | ||
303 | st = lock_stat_findnew(contended_event->addr, contended_event->name); | 536 | ls = lock_stat_findnew(contended_event->addr, contended_event->name); |
537 | if (ls->discard) | ||
538 | return; | ||
304 | 539 | ||
305 | switch (st->state) { | 540 | ts = thread_stat_findnew(thread->pid); |
306 | case LOCK_STATE_UNLOCKED: | 541 | seq = get_seq(ts, contended_event->addr); |
542 | |||
543 | switch (seq->state) { | ||
544 | case SEQ_STATE_UNINITIALIZED: | ||
545 | /* orphan event, do nothing */ | ||
546 | return; | ||
547 | case SEQ_STATE_ACQUIRING: | ||
307 | break; | 548 | break; |
308 | case LOCK_STATE_LOCKED: | 549 | case SEQ_STATE_RELEASED: |
309 | st->nr_contended++; | 550 | case SEQ_STATE_ACQUIRED: |
551 | case SEQ_STATE_READ_ACQUIRED: | ||
552 | case SEQ_STATE_CONTENDED: | ||
553 | /* broken lock sequence, discard it */ | ||
554 | ls->discard = 1; | ||
555 | bad_hist[BROKEN_CONTENDED]++; | ||
556 | list_del(&seq->list); | ||
557 | free(seq); | ||
558 | goto end; | ||
310 | break; | 559 | break; |
311 | default: | 560 | default: |
312 | BUG_ON(1); | 561 | BUG_ON("Unknown state of lock sequence found!\n"); |
313 | break; | 562 | break; |
314 | } | 563 | } |
315 | 564 | ||
316 | st->prev_event_time = timestamp; | 565 | seq->state = SEQ_STATE_CONTENDED; |
566 | ls->nr_contended++; | ||
567 | seq->prev_event_time = timestamp; | ||
568 | end: | ||
569 | return; | ||
317 | } | 570 | } |
318 | 571 | ||
319 | static void | 572 | static void |
320 | report_lock_release_event(struct trace_release_event *release_event, | 573 | report_lock_release_event(struct trace_release_event *release_event, |
321 | struct event *__event __used, | 574 | struct event *__event __used, |
322 | int cpu __used, | 575 | int cpu __used, |
323 | u64 timestamp, | 576 | u64 timestamp __used, |
324 | struct thread *thread __used) | 577 | struct thread *thread __used) |
325 | { | 578 | { |
326 | struct lock_stat *st; | 579 | struct lock_stat *ls; |
327 | u64 hold_time; | 580 | struct thread_stat *ts; |
581 | struct lock_seq_stat *seq; | ||
328 | 582 | ||
329 | st = lock_stat_findnew(release_event->addr, release_event->name); | 583 | ls = lock_stat_findnew(release_event->addr, release_event->name); |
584 | if (ls->discard) | ||
585 | return; | ||
330 | 586 | ||
331 | switch (st->state) { | 587 | ts = thread_stat_findnew(thread->pid); |
332 | case LOCK_STATE_UNLOCKED: | 588 | seq = get_seq(ts, release_event->addr); |
333 | break; | ||
334 | case LOCK_STATE_LOCKED: | ||
335 | st->state = LOCK_STATE_UNLOCKED; | ||
336 | hold_time = timestamp - st->prev_event_time; | ||
337 | 589 | ||
338 | if (timestamp < st->prev_event_time) { | 590 | switch (seq->state) { |
339 | /* terribly, this can happen... */ | 591 | case SEQ_STATE_UNINITIALIZED: |
592 | goto end; | ||
593 | break; | ||
594 | case SEQ_STATE_ACQUIRED: | ||
595 | break; | ||
596 | case SEQ_STATE_READ_ACQUIRED: | ||
597 | seq->read_count--; | ||
598 | BUG_ON(seq->read_count < 0); | ||
599 | if (!seq->read_count) { | ||
600 | ls->nr_release++; | ||
340 | goto end; | 601 | goto end; |
341 | } | 602 | } |
342 | 603 | break; | |
343 | if (st->wait_time_min > hold_time) | 604 | case SEQ_STATE_ACQUIRING: |
344 | st->wait_time_min = hold_time; | 605 | case SEQ_STATE_CONTENDED: |
345 | if (st->wait_time_max < hold_time) | 606 | case SEQ_STATE_RELEASED: |
346 | st->wait_time_max = hold_time; | 607 | /* broken lock sequence, discard it */ |
347 | st->wait_time_total += hold_time; | 608 | ls->discard = 1; |
348 | 609 | bad_hist[BROKEN_RELEASE]++; | |
349 | st->nr_release++; | 610 | goto free_seq; |
350 | break; | 611 | break; |
351 | default: | 612 | default: |
352 | BUG_ON(1); | 613 | BUG_ON("Unknown state of lock sequence found!\n"); |
353 | break; | 614 | break; |
354 | } | 615 | } |
355 | 616 | ||
617 | ls->nr_release++; | ||
618 | free_seq: | ||
619 | list_del(&seq->list); | ||
620 | free(seq); | ||
356 | end: | 621 | end: |
357 | st->prev_event_time = timestamp; | 622 | return; |
358 | } | 623 | } |
359 | 624 | ||
360 | /* lock oriented handlers */ | 625 | /* lock oriented handlers */ |
@@ -381,6 +646,7 @@ process_lock_acquire_event(void *data, | |||
381 | tmp = raw_field_value(event, "lockdep_addr", data); | 646 | tmp = raw_field_value(event, "lockdep_addr", data); |
382 | memcpy(&acquire_event.addr, &tmp, sizeof(void *)); | 647 | memcpy(&acquire_event.addr, &tmp, sizeof(void *)); |
383 | acquire_event.name = (char *)raw_field_ptr(event, "name", data); | 648 | acquire_event.name = (char *)raw_field_ptr(event, "name", data); |
649 | acquire_event.flag = (int)raw_field_value(event, "flag", data); | ||
384 | 650 | ||
385 | if (trace_handler->acquire_event) | 651 | if (trace_handler->acquire_event) |
386 | trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); | 652 | trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); |
@@ -441,8 +707,7 @@ process_lock_release_event(void *data, | |||
441 | } | 707 | } |
442 | 708 | ||
443 | static void | 709 | static void |
444 | process_raw_event(void *data, int cpu, | 710 | process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) |
445 | u64 timestamp, struct thread *thread) | ||
446 | { | 711 | { |
447 | struct event *event; | 712 | struct event *event; |
448 | int type; | 713 | int type; |
@@ -460,173 +725,19 @@ process_raw_event(void *data, int cpu, | |||
460 | process_lock_release_event(data, event, cpu, timestamp, thread); | 725 | process_lock_release_event(data, event, cpu, timestamp, thread); |
461 | } | 726 | } |
462 | 727 | ||
463 | struct raw_event_queue { | 728 | static void print_bad_events(int bad, int total) |
464 | u64 timestamp; | ||
465 | int cpu; | ||
466 | void *data; | ||
467 | struct thread *thread; | ||
468 | struct list_head list; | ||
469 | }; | ||
470 | |||
471 | static LIST_HEAD(raw_event_head); | ||
472 | |||
473 | #define FLUSH_PERIOD (5 * NSEC_PER_SEC) | ||
474 | |||
475 | static u64 flush_limit = ULLONG_MAX; | ||
476 | static u64 last_flush = 0; | ||
477 | struct raw_event_queue *last_inserted; | ||
478 | |||
479 | static void flush_raw_event_queue(u64 limit) | ||
480 | { | ||
481 | struct raw_event_queue *tmp, *iter; | ||
482 | |||
483 | list_for_each_entry_safe(iter, tmp, &raw_event_head, list) { | ||
484 | if (iter->timestamp > limit) | ||
485 | return; | ||
486 | |||
487 | if (iter == last_inserted) | ||
488 | last_inserted = NULL; | ||
489 | |||
490 | process_raw_event(iter->data, iter->cpu, iter->timestamp, | ||
491 | iter->thread); | ||
492 | |||
493 | last_flush = iter->timestamp; | ||
494 | list_del(&iter->list); | ||
495 | free(iter->data); | ||
496 | free(iter); | ||
497 | } | ||
498 | } | ||
499 | |||
500 | static void __queue_raw_event_end(struct raw_event_queue *new) | ||
501 | { | ||
502 | struct raw_event_queue *iter; | ||
503 | |||
504 | list_for_each_entry_reverse(iter, &raw_event_head, list) { | ||
505 | if (iter->timestamp < new->timestamp) { | ||
506 | list_add(&new->list, &iter->list); | ||
507 | return; | ||
508 | } | ||
509 | } | ||
510 | |||
511 | list_add(&new->list, &raw_event_head); | ||
512 | } | ||
513 | |||
514 | static void __queue_raw_event_before(struct raw_event_queue *new, | ||
515 | struct raw_event_queue *iter) | ||
516 | { | 729 | { |
517 | list_for_each_entry_continue_reverse(iter, &raw_event_head, list) { | 730 | /* Output for debug, this have to be removed */ |
518 | if (iter->timestamp < new->timestamp) { | 731 | int i; |
519 | list_add(&new->list, &iter->list); | 732 | const char *name[4] = |
520 | return; | 733 | { "acquire", "acquired", "contended", "release" }; |
521 | } | 734 | |
522 | } | 735 | pr_info("\n=== output for debug===\n\n"); |
523 | 736 | pr_info("bad: %d, total: %d\n", bad, total); | |
524 | list_add(&new->list, &raw_event_head); | 737 | pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100); |
525 | } | 738 | pr_info("histogram of events caused bad sequence\n"); |
526 | 739 | for (i = 0; i < BROKEN_MAX; i++) | |
527 | static void __queue_raw_event_after(struct raw_event_queue *new, | 740 | pr_info(" %10s: %d\n", name[i], bad_hist[i]); |
528 | struct raw_event_queue *iter) | ||
529 | { | ||
530 | list_for_each_entry_continue(iter, &raw_event_head, list) { | ||
531 | if (iter->timestamp > new->timestamp) { | ||
532 | list_add_tail(&new->list, &iter->list); | ||
533 | return; | ||
534 | } | ||
535 | } | ||
536 | list_add_tail(&new->list, &raw_event_head); | ||
537 | } | ||
538 | |||
539 | /* The queue is ordered by time */ | ||
540 | static void __queue_raw_event(struct raw_event_queue *new) | ||
541 | { | ||
542 | if (!last_inserted) { | ||
543 | __queue_raw_event_end(new); | ||
544 | return; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Most of the time the current event has a timestamp | ||
549 | * very close to the last event inserted, unless we just switched | ||
550 | * to another event buffer. Having a sorting based on a list and | ||
551 | * on the last inserted event that is close to the current one is | ||
552 | * probably more efficient than an rbtree based sorting. | ||
553 | */ | ||
554 | if (last_inserted->timestamp >= new->timestamp) | ||
555 | __queue_raw_event_before(new, last_inserted); | ||
556 | else | ||
557 | __queue_raw_event_after(new, last_inserted); | ||
558 | } | ||
559 | |||
560 | static void queue_raw_event(void *data, int raw_size, int cpu, | ||
561 | u64 timestamp, struct thread *thread) | ||
562 | { | ||
563 | struct raw_event_queue *new; | ||
564 | |||
565 | if (flush_limit == ULLONG_MAX) | ||
566 | flush_limit = timestamp + FLUSH_PERIOD; | ||
567 | |||
568 | if (timestamp < last_flush) { | ||
569 | printf("Warning: Timestamp below last timeslice flush\n"); | ||
570 | return; | ||
571 | } | ||
572 | |||
573 | new = malloc(sizeof(*new)); | ||
574 | if (!new) | ||
575 | die("Not enough memory\n"); | ||
576 | |||
577 | new->timestamp = timestamp; | ||
578 | new->cpu = cpu; | ||
579 | new->thread = thread; | ||
580 | |||
581 | new->data = malloc(raw_size); | ||
582 | if (!new->data) | ||
583 | die("Not enough memory\n"); | ||
584 | |||
585 | memcpy(new->data, data, raw_size); | ||
586 | |||
587 | __queue_raw_event(new); | ||
588 | last_inserted = new; | ||
589 | |||
590 | /* | ||
591 | * We want to have a slice of events covering 2 * FLUSH_PERIOD | ||
592 | * If FLUSH_PERIOD is big enough, it ensures every events that occured | ||
593 | * in the first half of the timeslice have all been buffered and there | ||
594 | * are none remaining (we need that because of the weakly ordered | ||
595 | * event recording we have). Then once we reach the 2 * FLUSH_PERIOD | ||
596 | * timeslice, we flush the first half to be gentle with the memory | ||
597 | * (the second half can still get new events in the middle, so wait | ||
598 | * another period to flush it) | ||
599 | */ | ||
600 | if (new->timestamp > flush_limit && | ||
601 | new->timestamp - flush_limit > FLUSH_PERIOD) { | ||
602 | flush_limit += FLUSH_PERIOD; | ||
603 | flush_raw_event_queue(flush_limit); | ||
604 | } | ||
605 | } | ||
606 | |||
607 | static int process_sample_event(event_t *event, struct perf_session *session) | ||
608 | { | ||
609 | struct thread *thread; | ||
610 | struct sample_data data; | ||
611 | |||
612 | bzero(&data, sizeof(struct sample_data)); | ||
613 | event__parse_sample(event, session->sample_type, &data); | ||
614 | thread = perf_session__findnew(session, data.pid); | ||
615 | |||
616 | if (thread == NULL) { | ||
617 | pr_debug("problem processing %d event, skipping it.\n", | ||
618 | event->header.type); | ||
619 | return -1; | ||
620 | } | ||
621 | |||
622 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | ||
623 | |||
624 | if (profile_cpu != -1 && profile_cpu != (int) data.cpu) | ||
625 | return 0; | ||
626 | |||
627 | queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread); | ||
628 | |||
629 | return 0; | ||
630 | } | 741 | } |
631 | 742 | ||
632 | /* TODO: various way to print, coloring, nano or milli sec */ | 743 | /* TODO: various way to print, coloring, nano or milli sec */ |
@@ -634,26 +745,30 @@ static void print_result(void) | |||
634 | { | 745 | { |
635 | struct lock_stat *st; | 746 | struct lock_stat *st; |
636 | char cut_name[20]; | 747 | char cut_name[20]; |
748 | int bad, total; | ||
637 | 749 | ||
638 | printf("%18s ", "ID"); | 750 | pr_info("%20s ", "Name"); |
639 | printf("%20s ", "Name"); | 751 | pr_info("%10s ", "acquired"); |
640 | printf("%10s ", "acquired"); | 752 | pr_info("%10s ", "contended"); |
641 | printf("%10s ", "contended"); | ||
642 | 753 | ||
643 | printf("%15s ", "total wait (ns)"); | 754 | pr_info("%15s ", "total wait (ns)"); |
644 | printf("%15s ", "max wait (ns)"); | 755 | pr_info("%15s ", "max wait (ns)"); |
645 | printf("%15s ", "min wait (ns)"); | 756 | pr_info("%15s ", "min wait (ns)"); |
646 | 757 | ||
647 | printf("\n\n"); | 758 | pr_info("\n\n"); |
648 | 759 | ||
760 | bad = total = 0; | ||
649 | while ((st = pop_from_result())) { | 761 | while ((st = pop_from_result())) { |
762 | total++; | ||
763 | if (st->discard) { | ||
764 | bad++; | ||
765 | continue; | ||
766 | } | ||
650 | bzero(cut_name, 20); | 767 | bzero(cut_name, 20); |
651 | 768 | ||
652 | printf("%p ", st->addr); | ||
653 | |||
654 | if (strlen(st->name) < 16) { | 769 | if (strlen(st->name) < 16) { |
655 | /* output raw name */ | 770 | /* output raw name */ |
656 | printf("%20s ", st->name); | 771 | pr_info("%20s ", st->name); |
657 | } else { | 772 | } else { |
658 | strncpy(cut_name, st->name, 16); | 773 | strncpy(cut_name, st->name, 16); |
659 | cut_name[16] = '.'; | 774 | cut_name[16] = '.'; |
@@ -661,18 +776,39 @@ static void print_result(void) | |||
661 | cut_name[18] = '.'; | 776 | cut_name[18] = '.'; |
662 | cut_name[19] = '\0'; | 777 | cut_name[19] = '\0'; |
663 | /* cut off name for saving output style */ | 778 | /* cut off name for saving output style */ |
664 | printf("%20s ", cut_name); | 779 | pr_info("%20s ", cut_name); |
665 | } | 780 | } |
666 | 781 | ||
667 | printf("%10u ", st->nr_acquired); | 782 | pr_info("%10u ", st->nr_acquired); |
668 | printf("%10u ", st->nr_contended); | 783 | pr_info("%10u ", st->nr_contended); |
669 | 784 | ||
670 | printf("%15llu ", st->wait_time_total); | 785 | pr_info("%15llu ", st->wait_time_total); |
671 | printf("%15llu ", st->wait_time_max); | 786 | pr_info("%15llu ", st->wait_time_max); |
672 | printf("%15llu ", st->wait_time_min == ULLONG_MAX ? | 787 | pr_info("%15llu ", st->wait_time_min == ULLONG_MAX ? |
673 | 0 : st->wait_time_min); | 788 | 0 : st->wait_time_min); |
674 | printf("\n"); | 789 | pr_info("\n"); |
675 | } | 790 | } |
791 | |||
792 | print_bad_events(bad, total); | ||
793 | } | ||
794 | |||
795 | static bool info_threads, info_map; | ||
796 | |||
797 | static void dump_threads(void) | ||
798 | { | ||
799 | struct thread_stat *st; | ||
800 | struct rb_node *node; | ||
801 | struct thread *t; | ||
802 | |||
803 | pr_info("%10s: comm\n", "Thread ID"); | ||
804 | |||
805 | node = rb_first(&thread_stats); | ||
806 | while (node) { | ||
807 | st = container_of(node, struct thread_stat, rb); | ||
808 | t = perf_session__findnew(session, st->tid); | ||
809 | pr_info("%10d: %s\n", st->tid, t->comm); | ||
810 | node = rb_next(node); | ||
811 | }; | ||
676 | } | 812 | } |
677 | 813 | ||
678 | static void dump_map(void) | 814 | static void dump_map(void) |
@@ -680,23 +816,53 @@ static void dump_map(void) | |||
680 | unsigned int i; | 816 | unsigned int i; |
681 | struct lock_stat *st; | 817 | struct lock_stat *st; |
682 | 818 | ||
819 | pr_info("Address of instance: name of class\n"); | ||
683 | for (i = 0; i < LOCKHASH_SIZE; i++) { | 820 | for (i = 0; i < LOCKHASH_SIZE; i++) { |
684 | list_for_each_entry(st, &lockhash_table[i], hash_entry) { | 821 | list_for_each_entry(st, &lockhash_table[i], hash_entry) { |
685 | printf("%p: %s\n", st->addr, st->name); | 822 | pr_info(" %p: %s\n", st->addr, st->name); |
686 | } | 823 | } |
687 | } | 824 | } |
688 | } | 825 | } |
689 | 826 | ||
827 | static void dump_info(void) | ||
828 | { | ||
829 | if (info_threads) | ||
830 | dump_threads(); | ||
831 | else if (info_map) | ||
832 | dump_map(); | ||
833 | else | ||
834 | die("Unknown type of information\n"); | ||
835 | } | ||
836 | |||
837 | static int process_sample_event(event_t *self, struct perf_session *s) | ||
838 | { | ||
839 | struct sample_data data; | ||
840 | struct thread *thread; | ||
841 | |||
842 | bzero(&data, sizeof(data)); | ||
843 | event__parse_sample(self, s->sample_type, &data); | ||
844 | |||
845 | thread = perf_session__findnew(s, data.tid); | ||
846 | if (thread == NULL) { | ||
847 | pr_debug("problem processing %d event, skipping it.\n", | ||
848 | self->header.type); | ||
849 | return -1; | ||
850 | } | ||
851 | |||
852 | process_raw_event(data.raw_data, data.cpu, data.time, thread); | ||
853 | |||
854 | return 0; | ||
855 | } | ||
856 | |||
690 | static struct perf_event_ops eops = { | 857 | static struct perf_event_ops eops = { |
691 | .sample = process_sample_event, | 858 | .sample = process_sample_event, |
692 | .comm = event__process_comm, | 859 | .comm = event__process_comm, |
860 | .ordered_samples = true, | ||
693 | }; | 861 | }; |
694 | 862 | ||
695 | static struct perf_session *session; | ||
696 | |||
697 | static int read_events(void) | 863 | static int read_events(void) |
698 | { | 864 | { |
699 | session = perf_session__new(input_name, O_RDONLY, 0); | 865 | session = perf_session__new(input_name, O_RDONLY, 0, false); |
700 | if (!session) | 866 | if (!session) |
701 | die("Initializing perf session failed\n"); | 867 | die("Initializing perf session failed\n"); |
702 | 868 | ||
@@ -720,7 +886,6 @@ static void __cmd_report(void) | |||
720 | setup_pager(); | 886 | setup_pager(); |
721 | select_key(); | 887 | select_key(); |
722 | read_events(); | 888 | read_events(); |
723 | flush_raw_event_queue(ULLONG_MAX); | ||
724 | sort_result(); | 889 | sort_result(); |
725 | print_result(); | 890 | print_result(); |
726 | } | 891 | } |
@@ -737,6 +902,19 @@ static const struct option report_options[] = { | |||
737 | OPT_END() | 902 | OPT_END() |
738 | }; | 903 | }; |
739 | 904 | ||
905 | static const char * const info_usage[] = { | ||
906 | "perf lock info [<options>]", | ||
907 | NULL | ||
908 | }; | ||
909 | |||
910 | static const struct option info_options[] = { | ||
911 | OPT_BOOLEAN('t', "threads", &info_threads, | ||
912 | "dump thread list in perf.data"), | ||
913 | OPT_BOOLEAN('m', "map", &info_map, | ||
914 | "map of lock instances (name:address table)"), | ||
915 | OPT_END() | ||
916 | }; | ||
917 | |||
740 | static const char * const lock_usage[] = { | 918 | static const char * const lock_usage[] = { |
741 | "perf lock [<options>] {record|trace|report}", | 919 | "perf lock [<options>] {record|trace|report}", |
742 | NULL | 920 | NULL |
@@ -744,14 +922,13 @@ static const char * const lock_usage[] = { | |||
744 | 922 | ||
745 | static const struct option lock_options[] = { | 923 | static const struct option lock_options[] = { |
746 | OPT_STRING('i', "input", &input_name, "file", "input file name"), | 924 | OPT_STRING('i', "input", &input_name, "file", "input file name"), |
747 | OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), | 925 | OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), |
748 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), | 926 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), |
749 | OPT_END() | 927 | OPT_END() |
750 | }; | 928 | }; |
751 | 929 | ||
752 | static const char *record_args[] = { | 930 | static const char *record_args[] = { |
753 | "record", | 931 | "record", |
754 | "-a", | ||
755 | "-R", | 932 | "-R", |
756 | "-f", | 933 | "-f", |
757 | "-m", "1024", | 934 | "-m", "1024", |
@@ -808,12 +985,18 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) | |||
808 | } else if (!strcmp(argv[0], "trace")) { | 985 | } else if (!strcmp(argv[0], "trace")) { |
809 | /* Aliased to 'perf trace' */ | 986 | /* Aliased to 'perf trace' */ |
810 | return cmd_trace(argc, argv, prefix); | 987 | return cmd_trace(argc, argv, prefix); |
811 | } else if (!strcmp(argv[0], "map")) { | 988 | } else if (!strcmp(argv[0], "info")) { |
989 | if (argc) { | ||
990 | argc = parse_options(argc, argv, | ||
991 | info_options, info_usage, 0); | ||
992 | if (argc) | ||
993 | usage_with_options(info_usage, info_options); | ||
994 | } | ||
812 | /* recycling report_lock_ops */ | 995 | /* recycling report_lock_ops */ |
813 | trace_handler = &report_lock_ops; | 996 | trace_handler = &report_lock_ops; |
814 | setup_pager(); | 997 | setup_pager(); |
815 | read_events(); | 998 | read_events(); |
816 | dump_map(); | 999 | dump_info(); |
817 | } else { | 1000 | } else { |
818 | usage_with_options(lock_usage, lock_options); | 1001 | usage_with_options(lock_usage, lock_options); |
819 | } | 1002 | } |