diff options
author | mingo <mingo@europe.(none)> | 2009-09-14 14:04:48 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-14 14:08:23 -0400 |
commit | 39aeb52f99f2380c1f16036deed2f7bb8b2e0559 (patch) | |
tree | 5d9c6dbc12da0bceb9776be4d1d7082d21850095 /tools | |
parent | 08f69e6c2e59b3d73343f8c9ecf758e0133dbc22 (diff) |
perf sched: Add support for sched:sched_stat_runtime events
This allows more precise 'perf sched latency' output:
---------------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
---------------------------------------------------------------------------------------
ksoftirqd/0-4 | 0.010 ms | 2 | avg: 2.476 ms | max: 2.977 ms |
perf-12328 | 15.844 ms | 66 | avg: 1.118 ms | max: 9.979 ms |
bdi-default-235 | 0.009 ms | 1 | avg: 0.998 ms | max: 0.998 ms |
events/1-8 | 0.020 ms | 2 | avg: 0.998 ms | max: 0.998 ms |
events/0-7 | 0.018 ms | 2 | avg: 0.992 ms | max: 0.996 ms |
sleep-12329 | 0.742 ms | 3 | avg: 0.906 ms | max: 2.289 ms |
sshd-12122 | 0.163 ms | 2 | avg: 0.283 ms | max: 0.562 ms |
loop-getpid-lon-12322 | 1023.636 ms | 69 | avg: 0.208 ms | max: 5.996 ms |
loop-getpid-lon-12321 | 1038.638 ms | 5 | avg: 0.073 ms | max: 0.171 ms |
migration/1-5 | 0.000 ms | 1 | avg: 0.006 ms | max: 0.006 ms |
---------------------------------------------------------------------------------------
TOTAL: | 2079.078 ms | 153 |
-------------------------------------------------
Also, streamline the code a bit more, add asserts for various state
machine failures (they should be debugged if they occur) and fix
a few odd ends.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-sched.c | 264 |
1 files changed, 173 insertions, 91 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 93ef7b215aba..adcb563ec4d2 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -50,7 +50,7 @@ static u64 sleep_measurement_overhead; | |||
50 | 50 | ||
51 | static unsigned long nr_tasks; | 51 | static unsigned long nr_tasks; |
52 | 52 | ||
53 | struct sched_event; | 53 | struct sched_atom; |
54 | 54 | ||
55 | struct task_desc { | 55 | struct task_desc { |
56 | unsigned long nr; | 56 | unsigned long nr; |
@@ -59,7 +59,7 @@ struct task_desc { | |||
59 | 59 | ||
60 | unsigned long nr_events; | 60 | unsigned long nr_events; |
61 | unsigned long curr_event; | 61 | unsigned long curr_event; |
62 | struct sched_event **events; | 62 | struct sched_atom **atoms; |
63 | 63 | ||
64 | pthread_t thread; | 64 | pthread_t thread; |
65 | sem_t sleep_sem; | 65 | sem_t sleep_sem; |
@@ -76,7 +76,7 @@ enum sched_event_type { | |||
76 | SCHED_EVENT_WAKEUP, | 76 | SCHED_EVENT_WAKEUP, |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct sched_event { | 79 | struct sched_atom { |
80 | enum sched_event_type type; | 80 | enum sched_event_type type; |
81 | u64 timestamp; | 81 | u64 timestamp; |
82 | u64 duration; | 82 | u64 duration; |
@@ -137,8 +137,8 @@ struct work_atom { | |||
137 | u64 runtime; | 137 | u64 runtime; |
138 | }; | 138 | }; |
139 | 139 | ||
140 | struct task_atoms { | 140 | struct work_atoms { |
141 | struct list_head atom_list; | 141 | struct list_head work_list; |
142 | struct thread *thread; | 142 | struct thread *thread; |
143 | struct rb_node node; | 143 | struct rb_node node; |
144 | u64 max_lat; | 144 | u64 max_lat; |
@@ -147,7 +147,7 @@ struct task_atoms { | |||
147 | u64 total_runtime; | 147 | u64 total_runtime; |
148 | }; | 148 | }; |
149 | 149 | ||
150 | typedef int (*sort_fn_t)(struct task_atoms *, struct task_atoms *); | 150 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); |
151 | 151 | ||
152 | static struct rb_root atom_root, sorted_atom_root; | 152 | static struct rb_root atom_root, sorted_atom_root; |
153 | 153 | ||
@@ -220,10 +220,10 @@ static void calibrate_sleep_measurement_overhead(void) | |||
220 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); | 220 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); |
221 | } | 221 | } |
222 | 222 | ||
223 | static struct sched_event * | 223 | static struct sched_atom * |
224 | get_new_event(struct task_desc *task, u64 timestamp) | 224 | get_new_event(struct task_desc *task, u64 timestamp) |
225 | { | 225 | { |
226 | struct sched_event *event = calloc(1, sizeof(*event)); | 226 | struct sched_atom *event = calloc(1, sizeof(*event)); |
227 | unsigned long idx = task->nr_events; | 227 | unsigned long idx = task->nr_events; |
228 | size_t size; | 228 | size_t size; |
229 | 229 | ||
@@ -231,27 +231,27 @@ get_new_event(struct task_desc *task, u64 timestamp) | |||
231 | event->nr = idx; | 231 | event->nr = idx; |
232 | 232 | ||
233 | task->nr_events++; | 233 | task->nr_events++; |
234 | size = sizeof(struct sched_event *) * task->nr_events; | 234 | size = sizeof(struct sched_atom *) * task->nr_events; |
235 | task->events = realloc(task->events, size); | 235 | task->atoms = realloc(task->atoms, size); |
236 | BUG_ON(!task->events); | 236 | BUG_ON(!task->atoms); |
237 | 237 | ||
238 | task->events[idx] = event; | 238 | task->atoms[idx] = event; |
239 | 239 | ||
240 | return event; | 240 | return event; |
241 | } | 241 | } |
242 | 242 | ||
243 | static struct sched_event *last_event(struct task_desc *task) | 243 | static struct sched_atom *last_event(struct task_desc *task) |
244 | { | 244 | { |
245 | if (!task->nr_events) | 245 | if (!task->nr_events) |
246 | return NULL; | 246 | return NULL; |
247 | 247 | ||
248 | return task->events[task->nr_events - 1]; | 248 | return task->atoms[task->nr_events - 1]; |
249 | } | 249 | } |
250 | 250 | ||
251 | static void | 251 | static void |
252 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | 252 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) |
253 | { | 253 | { |
254 | struct sched_event *event, *curr_event = last_event(task); | 254 | struct sched_atom *event, *curr_event = last_event(task); |
255 | 255 | ||
256 | /* | 256 | /* |
257 | * optimize an existing RUN event by merging this one | 257 | * optimize an existing RUN event by merging this one |
@@ -275,7 +275,7 @@ static void | |||
275 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | 275 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, |
276 | struct task_desc *wakee) | 276 | struct task_desc *wakee) |
277 | { | 277 | { |
278 | struct sched_event *event, *wakee_event; | 278 | struct sched_atom *event, *wakee_event; |
279 | 279 | ||
280 | event = get_new_event(task, timestamp); | 280 | event = get_new_event(task, timestamp); |
281 | event->type = SCHED_EVENT_WAKEUP; | 281 | event->type = SCHED_EVENT_WAKEUP; |
@@ -303,7 +303,7 @@ static void | |||
303 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, | 303 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, |
304 | u64 task_state __used) | 304 | u64 task_state __used) |
305 | { | 305 | { |
306 | struct sched_event *event = get_new_event(task, timestamp); | 306 | struct sched_atom *event = get_new_event(task, timestamp); |
307 | 307 | ||
308 | event->type = SCHED_EVENT_SLEEP; | 308 | event->type = SCHED_EVENT_SLEEP; |
309 | 309 | ||
@@ -372,27 +372,27 @@ static void add_cross_task_wakeups(void) | |||
372 | } | 372 | } |
373 | 373 | ||
374 | static void | 374 | static void |
375 | process_sched_event(struct task_desc *this_task __used, struct sched_event *event) | 375 | process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) |
376 | { | 376 | { |
377 | int ret = 0; | 377 | int ret = 0; |
378 | u64 now; | 378 | u64 now; |
379 | long long delta; | 379 | long long delta; |
380 | 380 | ||
381 | now = get_nsecs(); | 381 | now = get_nsecs(); |
382 | delta = start_time + event->timestamp - now; | 382 | delta = start_time + atom->timestamp - now; |
383 | 383 | ||
384 | switch (event->type) { | 384 | switch (atom->type) { |
385 | case SCHED_EVENT_RUN: | 385 | case SCHED_EVENT_RUN: |
386 | burn_nsecs(event->duration); | 386 | burn_nsecs(atom->duration); |
387 | break; | 387 | break; |
388 | case SCHED_EVENT_SLEEP: | 388 | case SCHED_EVENT_SLEEP: |
389 | if (event->wait_sem) | 389 | if (atom->wait_sem) |
390 | ret = sem_wait(event->wait_sem); | 390 | ret = sem_wait(atom->wait_sem); |
391 | BUG_ON(ret); | 391 | BUG_ON(ret); |
392 | break; | 392 | break; |
393 | case SCHED_EVENT_WAKEUP: | 393 | case SCHED_EVENT_WAKEUP: |
394 | if (event->wait_sem) | 394 | if (atom->wait_sem) |
395 | ret = sem_post(event->wait_sem); | 395 | ret = sem_post(atom->wait_sem); |
396 | BUG_ON(ret); | 396 | BUG_ON(ret); |
397 | break; | 397 | break; |
398 | default: | 398 | default: |
@@ -467,7 +467,7 @@ again: | |||
467 | 467 | ||
468 | for (i = 0; i < this_task->nr_events; i++) { | 468 | for (i = 0; i < this_task->nr_events; i++) { |
469 | this_task->curr_event = i; | 469 | this_task->curr_event = i; |
470 | process_sched_event(this_task, this_task->events[i]); | 470 | process_sched_event(this_task, this_task->atoms[i]); |
471 | } | 471 | } |
472 | 472 | ||
473 | cpu_usage_1 = get_cpu_usage_nsec_self(); | 473 | cpu_usage_1 = get_cpu_usage_nsec_self(); |
@@ -649,7 +649,7 @@ static void __cmd_replay(void) | |||
649 | if (multitarget_wakeups) | 649 | if (multitarget_wakeups) |
650 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); | 650 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); |
651 | if (nr_run_events_optimized) | 651 | if (nr_run_events_optimized) |
652 | printf("run events optimized: %ld\n", | 652 | printf("run atoms optimized: %ld\n", |
653 | nr_run_events_optimized); | 653 | nr_run_events_optimized); |
654 | 654 | ||
655 | print_task_traces(); | 655 | print_task_traces(); |
@@ -727,6 +727,20 @@ struct trace_switch_event { | |||
727 | u32 next_prio; | 727 | u32 next_prio; |
728 | }; | 728 | }; |
729 | 729 | ||
730 | struct trace_runtime_event { | ||
731 | u32 size; | ||
732 | |||
733 | u16 common_type; | ||
734 | u8 common_flags; | ||
735 | u8 common_preempt_count; | ||
736 | u32 common_pid; | ||
737 | u32 common_tgid; | ||
738 | |||
739 | char comm[16]; | ||
740 | u32 pid; | ||
741 | u64 runtime; | ||
742 | u64 vruntime; | ||
743 | }; | ||
730 | 744 | ||
731 | struct trace_wakeup_event { | 745 | struct trace_wakeup_event { |
732 | u32 size; | 746 | u32 size; |
@@ -767,6 +781,12 @@ struct trace_sched_handler { | |||
767 | u64 timestamp, | 781 | u64 timestamp, |
768 | struct thread *thread); | 782 | struct thread *thread); |
769 | 783 | ||
784 | void (*runtime_event)(struct trace_runtime_event *, | ||
785 | struct event *, | ||
786 | int cpu, | ||
787 | u64 timestamp, | ||
788 | struct thread *thread); | ||
789 | |||
770 | void (*wakeup_event)(struct trace_wakeup_event *, | 790 | void (*wakeup_event)(struct trace_wakeup_event *, |
771 | struct event *, | 791 | struct event *, |
772 | int cpu, | 792 | int cpu, |
@@ -881,7 +901,7 @@ struct sort_dimension { | |||
881 | static LIST_HEAD(cmp_pid); | 901 | static LIST_HEAD(cmp_pid); |
882 | 902 | ||
883 | static int | 903 | static int |
884 | thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms *r) | 904 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) |
885 | { | 905 | { |
886 | struct sort_dimension *sort; | 906 | struct sort_dimension *sort; |
887 | int ret = 0; | 907 | int ret = 0; |
@@ -897,18 +917,18 @@ thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms * | |||
897 | return ret; | 917 | return ret; |
898 | } | 918 | } |
899 | 919 | ||
900 | static struct task_atoms * | 920 | static struct work_atoms * |
901 | thread_atoms_search(struct rb_root *root, struct thread *thread, | 921 | thread_atoms_search(struct rb_root *root, struct thread *thread, |
902 | struct list_head *sort_list) | 922 | struct list_head *sort_list) |
903 | { | 923 | { |
904 | struct rb_node *node = root->rb_node; | 924 | struct rb_node *node = root->rb_node; |
905 | struct task_atoms key = { .thread = thread }; | 925 | struct work_atoms key = { .thread = thread }; |
906 | 926 | ||
907 | while (node) { | 927 | while (node) { |
908 | struct task_atoms *atoms; | 928 | struct work_atoms *atoms; |
909 | int cmp; | 929 | int cmp; |
910 | 930 | ||
911 | atoms = container_of(node, struct task_atoms, node); | 931 | atoms = container_of(node, struct work_atoms, node); |
912 | 932 | ||
913 | cmp = thread_lat_cmp(sort_list, &key, atoms); | 933 | cmp = thread_lat_cmp(sort_list, &key, atoms); |
914 | if (cmp > 0) | 934 | if (cmp > 0) |
@@ -924,16 +944,16 @@ thread_atoms_search(struct rb_root *root, struct thread *thread, | |||
924 | } | 944 | } |
925 | 945 | ||
926 | static void | 946 | static void |
927 | __thread_latency_insert(struct rb_root *root, struct task_atoms *data, | 947 | __thread_latency_insert(struct rb_root *root, struct work_atoms *data, |
928 | struct list_head *sort_list) | 948 | struct list_head *sort_list) |
929 | { | 949 | { |
930 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 950 | struct rb_node **new = &(root->rb_node), *parent = NULL; |
931 | 951 | ||
932 | while (*new) { | 952 | while (*new) { |
933 | struct task_atoms *this; | 953 | struct work_atoms *this; |
934 | int cmp; | 954 | int cmp; |
935 | 955 | ||
936 | this = container_of(*new, struct task_atoms, node); | 956 | this = container_of(*new, struct work_atoms, node); |
937 | parent = *new; | 957 | parent = *new; |
938 | 958 | ||
939 | cmp = thread_lat_cmp(sort_list, data, this); | 959 | cmp = thread_lat_cmp(sort_list, data, this); |
@@ -950,14 +970,14 @@ __thread_latency_insert(struct rb_root *root, struct task_atoms *data, | |||
950 | 970 | ||
951 | static void thread_atoms_insert(struct thread *thread) | 971 | static void thread_atoms_insert(struct thread *thread) |
952 | { | 972 | { |
953 | struct task_atoms *atoms; | 973 | struct work_atoms *atoms; |
954 | 974 | ||
955 | atoms = calloc(sizeof(*atoms), 1); | 975 | atoms = calloc(sizeof(*atoms), 1); |
956 | if (!atoms) | 976 | if (!atoms) |
957 | die("No memory"); | 977 | die("No memory"); |
958 | 978 | ||
959 | atoms->thread = thread; | 979 | atoms->thread = thread; |
960 | INIT_LIST_HEAD(&atoms->atom_list); | 980 | INIT_LIST_HEAD(&atoms->work_list); |
961 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); | 981 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); |
962 | } | 982 | } |
963 | 983 | ||
@@ -980,10 +1000,9 @@ static char sched_out_state(struct trace_switch_event *switch_event) | |||
980 | } | 1000 | } |
981 | 1001 | ||
982 | static void | 1002 | static void |
983 | lat_sched_out(struct task_atoms *atoms, | 1003 | add_sched_out_event(struct work_atoms *atoms, |
984 | struct trace_switch_event *switch_event __used, | 1004 | char run_state, |
985 | u64 delta, | 1005 | u64 timestamp) |
986 | u64 timestamp) | ||
987 | { | 1006 | { |
988 | struct work_atom *atom; | 1007 | struct work_atom *atom; |
989 | 1008 | ||
@@ -993,25 +1012,37 @@ lat_sched_out(struct task_atoms *atoms, | |||
993 | 1012 | ||
994 | atom->sched_out_time = timestamp; | 1013 | atom->sched_out_time = timestamp; |
995 | 1014 | ||
996 | if (sched_out_state(switch_event) == 'R') { | 1015 | if (run_state == 'R') { |
997 | atom->state = THREAD_WAIT_CPU; | 1016 | atom->state = THREAD_WAIT_CPU; |
998 | atom->wake_up_time = atom->sched_out_time; | 1017 | atom->wake_up_time = atom->sched_out_time; |
999 | } | 1018 | } |
1000 | 1019 | ||
1001 | atom->runtime = delta; | 1020 | list_add_tail(&atom->list, &atoms->work_list); |
1002 | list_add_tail(&atom->list, &atoms->atom_list); | 1021 | } |
1022 | |||
1023 | static void | ||
1024 | add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) | ||
1025 | { | ||
1026 | struct work_atom *atom; | ||
1027 | |||
1028 | BUG_ON(list_empty(&atoms->work_list)); | ||
1029 | |||
1030 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | ||
1031 | |||
1032 | atom->runtime += delta; | ||
1033 | atoms->total_runtime += delta; | ||
1003 | } | 1034 | } |
1004 | 1035 | ||
1005 | static void | 1036 | static void |
1006 | lat_sched_in(struct task_atoms *atoms, u64 timestamp) | 1037 | add_sched_in_event(struct work_atoms *atoms, u64 timestamp) |
1007 | { | 1038 | { |
1008 | struct work_atom *atom; | 1039 | struct work_atom *atom; |
1009 | u64 delta; | 1040 | u64 delta; |
1010 | 1041 | ||
1011 | if (list_empty(&atoms->atom_list)) | 1042 | if (list_empty(&atoms->work_list)) |
1012 | return; | 1043 | return; |
1013 | 1044 | ||
1014 | atom = list_entry(atoms->atom_list.prev, struct work_atom, list); | 1045 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1015 | 1046 | ||
1016 | if (atom->state != THREAD_WAIT_CPU) | 1047 | if (atom->state != THREAD_WAIT_CPU) |
1017 | return; | 1048 | return; |
@@ -1029,7 +1060,6 @@ lat_sched_in(struct task_atoms *atoms, u64 timestamp) | |||
1029 | if (delta > atoms->max_lat) | 1060 | if (delta > atoms->max_lat) |
1030 | atoms->max_lat = delta; | 1061 | atoms->max_lat = delta; |
1031 | atoms->nb_atoms++; | 1062 | atoms->nb_atoms++; |
1032 | atoms->total_runtime += atom->runtime; | ||
1033 | } | 1063 | } |
1034 | 1064 | ||
1035 | static void | 1065 | static void |
@@ -1039,13 +1069,12 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1039 | u64 timestamp, | 1069 | u64 timestamp, |
1040 | struct thread *thread __used) | 1070 | struct thread *thread __used) |
1041 | { | 1071 | { |
1042 | struct task_atoms *out_atoms, *in_atoms; | 1072 | struct work_atoms *out_events, *in_events; |
1043 | struct thread *sched_out, *sched_in; | 1073 | struct thread *sched_out, *sched_in; |
1044 | u64 timestamp0; | 1074 | u64 timestamp0; |
1045 | s64 delta; | 1075 | s64 delta; |
1046 | 1076 | ||
1047 | if (cpu >= MAX_CPUS || cpu < 0) | 1077 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
1048 | return; | ||
1049 | 1078 | ||
1050 | timestamp0 = cpu_last_switched[cpu]; | 1079 | timestamp0 = cpu_last_switched[cpu]; |
1051 | cpu_last_switched[cpu] = timestamp; | 1080 | cpu_last_switched[cpu] = timestamp; |
@@ -1061,34 +1090,63 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1061 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); | 1090 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); |
1062 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); | 1091 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); |
1063 | 1092 | ||
1064 | in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1093 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); |
1065 | if (!in_atoms) { | 1094 | if (!out_events) { |
1095 | thread_atoms_insert(sched_out); | ||
1096 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | ||
1097 | if (!out_events) | ||
1098 | die("out-event: Internal tree error"); | ||
1099 | } | ||
1100 | add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); | ||
1101 | |||
1102 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | ||
1103 | if (!in_events) { | ||
1066 | thread_atoms_insert(sched_in); | 1104 | thread_atoms_insert(sched_in); |
1067 | in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1105 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); |
1068 | if (!in_atoms) | 1106 | if (!in_events) |
1069 | die("in-atom: Internal tree error"); | 1107 | die("in-event: Internal tree error"); |
1108 | /* | ||
1109 | * Take came in we have not heard about yet, | ||
1110 | * add in an initial atom in runnable state: | ||
1111 | */ | ||
1112 | add_sched_out_event(in_events, 'R', timestamp); | ||
1070 | } | 1113 | } |
1114 | add_sched_in_event(in_events, timestamp); | ||
1115 | } | ||
1071 | 1116 | ||
1072 | out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1117 | static void |
1073 | if (!out_atoms) { | 1118 | latency_runtime_event(struct trace_runtime_event *runtime_event, |
1074 | thread_atoms_insert(sched_out); | 1119 | struct event *event __used, |
1075 | out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1120 | int cpu, |
1076 | if (!out_atoms) | 1121 | u64 timestamp, |
1077 | die("out-atom: Internal tree error"); | 1122 | struct thread *this_thread __used) |
1123 | { | ||
1124 | struct work_atoms *atoms; | ||
1125 | struct thread *thread; | ||
1126 | |||
1127 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | ||
1128 | |||
1129 | thread = threads__findnew(runtime_event->pid, &threads, &last_match); | ||
1130 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | ||
1131 | if (!atoms) { | ||
1132 | thread_atoms_insert(thread); | ||
1133 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | ||
1134 | if (!atoms) | ||
1135 | die("in-event: Internal tree error"); | ||
1136 | add_sched_out_event(atoms, 'R', timestamp); | ||
1078 | } | 1137 | } |
1079 | 1138 | ||
1080 | lat_sched_in(in_atoms, timestamp); | 1139 | add_runtime_event(atoms, runtime_event->runtime, timestamp); |
1081 | lat_sched_out(out_atoms, switch_event, delta, timestamp); | ||
1082 | } | 1140 | } |
1083 | 1141 | ||
1084 | static void | 1142 | static void |
1085 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | 1143 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, |
1086 | struct event *event __used, | 1144 | struct event *__event __used, |
1087 | int cpu __used, | 1145 | int cpu __used, |
1088 | u64 timestamp, | 1146 | u64 timestamp, |
1089 | struct thread *thread __used) | 1147 | struct thread *thread __used) |
1090 | { | 1148 | { |
1091 | struct task_atoms *atoms; | 1149 | struct work_atoms *atoms; |
1092 | struct work_atom *atom; | 1150 | struct work_atom *atom; |
1093 | struct thread *wakee; | 1151 | struct thread *wakee; |
1094 | 1152 | ||
@@ -1100,16 +1158,20 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1100 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1158 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
1101 | if (!atoms) { | 1159 | if (!atoms) { |
1102 | thread_atoms_insert(wakee); | 1160 | thread_atoms_insert(wakee); |
1103 | return; | 1161 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
1162 | if (!atoms) | ||
1163 | die("wakeup-event: Internal tree error"); | ||
1164 | add_sched_out_event(atoms, 'S', timestamp); | ||
1104 | } | 1165 | } |
1105 | 1166 | ||
1106 | if (list_empty(&atoms->atom_list)) | 1167 | BUG_ON(list_empty(&atoms->work_list)); |
1107 | return; | ||
1108 | 1168 | ||
1109 | atom = list_entry(atoms->atom_list.prev, struct work_atom, list); | 1169 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1110 | 1170 | ||
1111 | if (atom->state != THREAD_SLEEPING) | 1171 | if (atom->state != THREAD_SLEEPING) { |
1172 | printf("boo2\n"); | ||
1112 | return; | 1173 | return; |
1174 | } | ||
1113 | 1175 | ||
1114 | nr_timestamps++; | 1176 | nr_timestamps++; |
1115 | if (atom->sched_out_time > timestamp) { | 1177 | if (atom->sched_out_time > timestamp) { |
@@ -1124,40 +1186,41 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1124 | static struct trace_sched_handler lat_ops = { | 1186 | static struct trace_sched_handler lat_ops = { |
1125 | .wakeup_event = latency_wakeup_event, | 1187 | .wakeup_event = latency_wakeup_event, |
1126 | .switch_event = latency_switch_event, | 1188 | .switch_event = latency_switch_event, |
1189 | .runtime_event = latency_runtime_event, | ||
1127 | .fork_event = latency_fork_event, | 1190 | .fork_event = latency_fork_event, |
1128 | }; | 1191 | }; |
1129 | 1192 | ||
1130 | static void output_lat_thread(struct task_atoms *atom_list) | 1193 | static void output_lat_thread(struct work_atoms *work_list) |
1131 | { | 1194 | { |
1132 | int i; | 1195 | int i; |
1133 | int ret; | 1196 | int ret; |
1134 | u64 avg; | 1197 | u64 avg; |
1135 | 1198 | ||
1136 | if (!atom_list->nb_atoms) | 1199 | if (!work_list->nb_atoms) |
1137 | return; | 1200 | return; |
1138 | /* | 1201 | /* |
1139 | * Ignore idle threads: | 1202 | * Ignore idle threads: |
1140 | */ | 1203 | */ |
1141 | if (!atom_list->thread->pid) | 1204 | if (!work_list->thread->pid) |
1142 | return; | 1205 | return; |
1143 | 1206 | ||
1144 | all_runtime += atom_list->total_runtime; | 1207 | all_runtime += work_list->total_runtime; |
1145 | all_count += atom_list->nb_atoms; | 1208 | all_count += work_list->nb_atoms; |
1146 | 1209 | ||
1147 | ret = printf(" %s-%d ", atom_list->thread->comm, atom_list->thread->pid); | 1210 | ret = printf(" %s-%d ", work_list->thread->comm, work_list->thread->pid); |
1148 | 1211 | ||
1149 | for (i = 0; i < 24 - ret; i++) | 1212 | for (i = 0; i < 24 - ret; i++) |
1150 | printf(" "); | 1213 | printf(" "); |
1151 | 1214 | ||
1152 | avg = atom_list->total_lat / atom_list->nb_atoms; | 1215 | avg = work_list->total_lat / work_list->nb_atoms; |
1153 | 1216 | ||
1154 | printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", | 1217 | printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", |
1155 | (double)atom_list->total_runtime / 1e6, | 1218 | (double)work_list->total_runtime / 1e6, |
1156 | atom_list->nb_atoms, (double)avg / 1e6, | 1219 | work_list->nb_atoms, (double)avg / 1e6, |
1157 | (double)atom_list->max_lat / 1e6); | 1220 | (double)work_list->max_lat / 1e6); |
1158 | } | 1221 | } |
1159 | 1222 | ||
1160 | static int pid_cmp(struct task_atoms *l, struct task_atoms *r) | 1223 | static int pid_cmp(struct work_atoms *l, struct work_atoms *r) |
1161 | { | 1224 | { |
1162 | if (l->thread->pid < r->thread->pid) | 1225 | if (l->thread->pid < r->thread->pid) |
1163 | return -1; | 1226 | return -1; |
@@ -1172,7 +1235,7 @@ static struct sort_dimension pid_sort_dimension = { | |||
1172 | .cmp = pid_cmp, | 1235 | .cmp = pid_cmp, |
1173 | }; | 1236 | }; |
1174 | 1237 | ||
1175 | static int avg_cmp(struct task_atoms *l, struct task_atoms *r) | 1238 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) |
1176 | { | 1239 | { |
1177 | u64 avgl, avgr; | 1240 | u64 avgl, avgr; |
1178 | 1241 | ||
@@ -1198,7 +1261,7 @@ static struct sort_dimension avg_sort_dimension = { | |||
1198 | .cmp = avg_cmp, | 1261 | .cmp = avg_cmp, |
1199 | }; | 1262 | }; |
1200 | 1263 | ||
1201 | static int max_cmp(struct task_atoms *l, struct task_atoms *r) | 1264 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) |
1202 | { | 1265 | { |
1203 | if (l->max_lat < r->max_lat) | 1266 | if (l->max_lat < r->max_lat) |
1204 | return -1; | 1267 | return -1; |
@@ -1213,7 +1276,7 @@ static struct sort_dimension max_sort_dimension = { | |||
1213 | .cmp = max_cmp, | 1276 | .cmp = max_cmp, |
1214 | }; | 1277 | }; |
1215 | 1278 | ||
1216 | static int switch_cmp(struct task_atoms *l, struct task_atoms *r) | 1279 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) |
1217 | { | 1280 | { |
1218 | if (l->nb_atoms < r->nb_atoms) | 1281 | if (l->nb_atoms < r->nb_atoms) |
1219 | return -1; | 1282 | return -1; |
@@ -1228,7 +1291,7 @@ static struct sort_dimension switch_sort_dimension = { | |||
1228 | .cmp = switch_cmp, | 1291 | .cmp = switch_cmp, |
1229 | }; | 1292 | }; |
1230 | 1293 | ||
1231 | static int runtime_cmp(struct task_atoms *l, struct task_atoms *r) | 1294 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) |
1232 | { | 1295 | { |
1233 | if (l->total_runtime < r->total_runtime) | 1296 | if (l->total_runtime < r->total_runtime) |
1234 | return -1; | 1297 | return -1; |
@@ -1277,13 +1340,13 @@ static void sort_lat(void) | |||
1277 | struct rb_node *node; | 1340 | struct rb_node *node; |
1278 | 1341 | ||
1279 | for (;;) { | 1342 | for (;;) { |
1280 | struct task_atoms *data; | 1343 | struct work_atoms *data; |
1281 | node = rb_first(&atom_root); | 1344 | node = rb_first(&atom_root); |
1282 | if (!node) | 1345 | if (!node) |
1283 | break; | 1346 | break; |
1284 | 1347 | ||
1285 | rb_erase(node, &atom_root); | 1348 | rb_erase(node, &atom_root); |
1286 | data = rb_entry(node, struct task_atoms, node); | 1349 | data = rb_entry(node, struct work_atoms, node); |
1287 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); | 1350 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); |
1288 | } | 1351 | } |
1289 | } | 1352 | } |
@@ -1303,10 +1366,10 @@ static void __cmd_lat(void) | |||
1303 | next = rb_first(&sorted_atom_root); | 1366 | next = rb_first(&sorted_atom_root); |
1304 | 1367 | ||
1305 | while (next) { | 1368 | while (next) { |
1306 | struct task_atoms *atom_list; | 1369 | struct work_atoms *work_list; |
1307 | 1370 | ||
1308 | atom_list = rb_entry(next, struct task_atoms, node); | 1371 | work_list = rb_entry(next, struct work_atoms, node); |
1309 | output_lat_thread(atom_list); | 1372 | output_lat_thread(work_list); |
1310 | next = rb_next(next); | 1373 | next = rb_next(next); |
1311 | } | 1374 | } |
1312 | 1375 | ||
@@ -1369,6 +1432,23 @@ process_sched_switch_event(struct raw_event_sample *raw, | |||
1369 | } | 1432 | } |
1370 | 1433 | ||
1371 | static void | 1434 | static void |
1435 | process_sched_runtime_event(struct raw_event_sample *raw, | ||
1436 | struct event *event, | ||
1437 | int cpu __used, | ||
1438 | u64 timestamp __used, | ||
1439 | struct thread *thread __used) | ||
1440 | { | ||
1441 | struct trace_runtime_event runtime_event; | ||
1442 | |||
1443 | FILL_ARRAY(runtime_event, comm, event, raw->data); | ||
1444 | FILL_FIELD(runtime_event, pid, event, raw->data); | ||
1445 | FILL_FIELD(runtime_event, runtime, event, raw->data); | ||
1446 | FILL_FIELD(runtime_event, vruntime, event, raw->data); | ||
1447 | |||
1448 | trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); | ||
1449 | } | ||
1450 | |||
1451 | static void | ||
1372 | process_sched_fork_event(struct raw_event_sample *raw, | 1452 | process_sched_fork_event(struct raw_event_sample *raw, |
1373 | struct event *event, | 1453 | struct event *event, |
1374 | int cpu __used, | 1454 | int cpu __used, |
@@ -1410,6 +1490,8 @@ process_raw_event(event_t *raw_event __used, void *more_data, | |||
1410 | 1490 | ||
1411 | if (!strcmp(event->name, "sched_switch")) | 1491 | if (!strcmp(event->name, "sched_switch")) |
1412 | process_sched_switch_event(raw, event, cpu, timestamp, thread); | 1492 | process_sched_switch_event(raw, event, cpu, timestamp, thread); |
1493 | if (!strcmp(event->name, "sched_stat_runtime")) | ||
1494 | process_sched_runtime_event(raw, event, cpu, timestamp, thread); | ||
1413 | if (!strcmp(event->name, "sched_wakeup")) | 1495 | if (!strcmp(event->name, "sched_wakeup")) |
1414 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); | 1496 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); |
1415 | if (!strcmp(event->name, "sched_wakeup_new")) | 1497 | if (!strcmp(event->name, "sched_wakeup_new")) |