diff options
| author | mingo <mingo@europe.(none)> | 2009-09-14 14:04:48 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-09-14 14:08:23 -0400 |
| commit | 39aeb52f99f2380c1f16036deed2f7bb8b2e0559 (patch) | |
| tree | 5d9c6dbc12da0bceb9776be4d1d7082d21850095 /tools | |
| parent | 08f69e6c2e59b3d73343f8c9ecf758e0133dbc22 (diff) | |
perf sched: Add support for sched:sched_stat_runtime events
This allows more precise 'perf sched latency' output:
---------------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
---------------------------------------------------------------------------------------
ksoftirqd/0-4 | 0.010 ms | 2 | avg: 2.476 ms | max: 2.977 ms |
perf-12328 | 15.844 ms | 66 | avg: 1.118 ms | max: 9.979 ms |
bdi-default-235 | 0.009 ms | 1 | avg: 0.998 ms | max: 0.998 ms |
events/1-8 | 0.020 ms | 2 | avg: 0.998 ms | max: 0.998 ms |
events/0-7 | 0.018 ms | 2 | avg: 0.992 ms | max: 0.996 ms |
sleep-12329 | 0.742 ms | 3 | avg: 0.906 ms | max: 2.289 ms |
sshd-12122 | 0.163 ms | 2 | avg: 0.283 ms | max: 0.562 ms |
loop-getpid-lon-12322 | 1023.636 ms | 69 | avg: 0.208 ms | max: 5.996 ms |
loop-getpid-lon-12321 | 1038.638 ms | 5 | avg: 0.073 ms | max: 0.171 ms |
migration/1-5 | 0.000 ms | 1 | avg: 0.006 ms | max: 0.006 ms |
---------------------------------------------------------------------------------------
TOTAL: | 2079.078 ms | 153 |
-------------------------------------------------
Also, streamline the code a bit more, add asserts for various state
machine failures (they should be debugged if they occur) and fix
a few odd ends.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/perf/builtin-sched.c | 264 |
1 files changed, 173 insertions, 91 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 93ef7b215aba..adcb563ec4d2 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
| @@ -50,7 +50,7 @@ static u64 sleep_measurement_overhead; | |||
| 50 | 50 | ||
| 51 | static unsigned long nr_tasks; | 51 | static unsigned long nr_tasks; |
| 52 | 52 | ||
| 53 | struct sched_event; | 53 | struct sched_atom; |
| 54 | 54 | ||
| 55 | struct task_desc { | 55 | struct task_desc { |
| 56 | unsigned long nr; | 56 | unsigned long nr; |
| @@ -59,7 +59,7 @@ struct task_desc { | |||
| 59 | 59 | ||
| 60 | unsigned long nr_events; | 60 | unsigned long nr_events; |
| 61 | unsigned long curr_event; | 61 | unsigned long curr_event; |
| 62 | struct sched_event **events; | 62 | struct sched_atom **atoms; |
| 63 | 63 | ||
| 64 | pthread_t thread; | 64 | pthread_t thread; |
| 65 | sem_t sleep_sem; | 65 | sem_t sleep_sem; |
| @@ -76,7 +76,7 @@ enum sched_event_type { | |||
| 76 | SCHED_EVENT_WAKEUP, | 76 | SCHED_EVENT_WAKEUP, |
| 77 | }; | 77 | }; |
| 78 | 78 | ||
| 79 | struct sched_event { | 79 | struct sched_atom { |
| 80 | enum sched_event_type type; | 80 | enum sched_event_type type; |
| 81 | u64 timestamp; | 81 | u64 timestamp; |
| 82 | u64 duration; | 82 | u64 duration; |
| @@ -137,8 +137,8 @@ struct work_atom { | |||
| 137 | u64 runtime; | 137 | u64 runtime; |
| 138 | }; | 138 | }; |
| 139 | 139 | ||
| 140 | struct task_atoms { | 140 | struct work_atoms { |
| 141 | struct list_head atom_list; | 141 | struct list_head work_list; |
| 142 | struct thread *thread; | 142 | struct thread *thread; |
| 143 | struct rb_node node; | 143 | struct rb_node node; |
| 144 | u64 max_lat; | 144 | u64 max_lat; |
| @@ -147,7 +147,7 @@ struct task_atoms { | |||
| 147 | u64 total_runtime; | 147 | u64 total_runtime; |
| 148 | }; | 148 | }; |
| 149 | 149 | ||
| 150 | typedef int (*sort_fn_t)(struct task_atoms *, struct task_atoms *); | 150 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); |
| 151 | 151 | ||
| 152 | static struct rb_root atom_root, sorted_atom_root; | 152 | static struct rb_root atom_root, sorted_atom_root; |
| 153 | 153 | ||
| @@ -220,10 +220,10 @@ static void calibrate_sleep_measurement_overhead(void) | |||
| 220 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); | 220 | printf("sleep measurement overhead: %Ld nsecs\n", min_delta); |
| 221 | } | 221 | } |
| 222 | 222 | ||
| 223 | static struct sched_event * | 223 | static struct sched_atom * |
| 224 | get_new_event(struct task_desc *task, u64 timestamp) | 224 | get_new_event(struct task_desc *task, u64 timestamp) |
| 225 | { | 225 | { |
| 226 | struct sched_event *event = calloc(1, sizeof(*event)); | 226 | struct sched_atom *event = calloc(1, sizeof(*event)); |
| 227 | unsigned long idx = task->nr_events; | 227 | unsigned long idx = task->nr_events; |
| 228 | size_t size; | 228 | size_t size; |
| 229 | 229 | ||
| @@ -231,27 +231,27 @@ get_new_event(struct task_desc *task, u64 timestamp) | |||
| 231 | event->nr = idx; | 231 | event->nr = idx; |
| 232 | 232 | ||
| 233 | task->nr_events++; | 233 | task->nr_events++; |
| 234 | size = sizeof(struct sched_event *) * task->nr_events; | 234 | size = sizeof(struct sched_atom *) * task->nr_events; |
| 235 | task->events = realloc(task->events, size); | 235 | task->atoms = realloc(task->atoms, size); |
| 236 | BUG_ON(!task->events); | 236 | BUG_ON(!task->atoms); |
| 237 | 237 | ||
| 238 | task->events[idx] = event; | 238 | task->atoms[idx] = event; |
| 239 | 239 | ||
| 240 | return event; | 240 | return event; |
| 241 | } | 241 | } |
| 242 | 242 | ||
| 243 | static struct sched_event *last_event(struct task_desc *task) | 243 | static struct sched_atom *last_event(struct task_desc *task) |
| 244 | { | 244 | { |
| 245 | if (!task->nr_events) | 245 | if (!task->nr_events) |
| 246 | return NULL; | 246 | return NULL; |
| 247 | 247 | ||
| 248 | return task->events[task->nr_events - 1]; | 248 | return task->atoms[task->nr_events - 1]; |
| 249 | } | 249 | } |
| 250 | 250 | ||
| 251 | static void | 251 | static void |
| 252 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | 252 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) |
| 253 | { | 253 | { |
| 254 | struct sched_event *event, *curr_event = last_event(task); | 254 | struct sched_atom *event, *curr_event = last_event(task); |
| 255 | 255 | ||
| 256 | /* | 256 | /* |
| 257 | * optimize an existing RUN event by merging this one | 257 | * optimize an existing RUN event by merging this one |
| @@ -275,7 +275,7 @@ static void | |||
| 275 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | 275 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, |
| 276 | struct task_desc *wakee) | 276 | struct task_desc *wakee) |
| 277 | { | 277 | { |
| 278 | struct sched_event *event, *wakee_event; | 278 | struct sched_atom *event, *wakee_event; |
| 279 | 279 | ||
| 280 | event = get_new_event(task, timestamp); | 280 | event = get_new_event(task, timestamp); |
| 281 | event->type = SCHED_EVENT_WAKEUP; | 281 | event->type = SCHED_EVENT_WAKEUP; |
| @@ -303,7 +303,7 @@ static void | |||
| 303 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, | 303 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, |
| 304 | u64 task_state __used) | 304 | u64 task_state __used) |
| 305 | { | 305 | { |
| 306 | struct sched_event *event = get_new_event(task, timestamp); | 306 | struct sched_atom *event = get_new_event(task, timestamp); |
| 307 | 307 | ||
| 308 | event->type = SCHED_EVENT_SLEEP; | 308 | event->type = SCHED_EVENT_SLEEP; |
| 309 | 309 | ||
| @@ -372,27 +372,27 @@ static void add_cross_task_wakeups(void) | |||
| 372 | } | 372 | } |
| 373 | 373 | ||
| 374 | static void | 374 | static void |
| 375 | process_sched_event(struct task_desc *this_task __used, struct sched_event *event) | 375 | process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) |
| 376 | { | 376 | { |
| 377 | int ret = 0; | 377 | int ret = 0; |
| 378 | u64 now; | 378 | u64 now; |
| 379 | long long delta; | 379 | long long delta; |
| 380 | 380 | ||
| 381 | now = get_nsecs(); | 381 | now = get_nsecs(); |
| 382 | delta = start_time + event->timestamp - now; | 382 | delta = start_time + atom->timestamp - now; |
| 383 | 383 | ||
| 384 | switch (event->type) { | 384 | switch (atom->type) { |
| 385 | case SCHED_EVENT_RUN: | 385 | case SCHED_EVENT_RUN: |
| 386 | burn_nsecs(event->duration); | 386 | burn_nsecs(atom->duration); |
| 387 | break; | 387 | break; |
| 388 | case SCHED_EVENT_SLEEP: | 388 | case SCHED_EVENT_SLEEP: |
| 389 | if (event->wait_sem) | 389 | if (atom->wait_sem) |
| 390 | ret = sem_wait(event->wait_sem); | 390 | ret = sem_wait(atom->wait_sem); |
| 391 | BUG_ON(ret); | 391 | BUG_ON(ret); |
| 392 | break; | 392 | break; |
| 393 | case SCHED_EVENT_WAKEUP: | 393 | case SCHED_EVENT_WAKEUP: |
| 394 | if (event->wait_sem) | 394 | if (atom->wait_sem) |
| 395 | ret = sem_post(event->wait_sem); | 395 | ret = sem_post(atom->wait_sem); |
| 396 | BUG_ON(ret); | 396 | BUG_ON(ret); |
| 397 | break; | 397 | break; |
| 398 | default: | 398 | default: |
| @@ -467,7 +467,7 @@ again: | |||
| 467 | 467 | ||
| 468 | for (i = 0; i < this_task->nr_events; i++) { | 468 | for (i = 0; i < this_task->nr_events; i++) { |
| 469 | this_task->curr_event = i; | 469 | this_task->curr_event = i; |
| 470 | process_sched_event(this_task, this_task->events[i]); | 470 | process_sched_event(this_task, this_task->atoms[i]); |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | cpu_usage_1 = get_cpu_usage_nsec_self(); | 473 | cpu_usage_1 = get_cpu_usage_nsec_self(); |
| @@ -649,7 +649,7 @@ static void __cmd_replay(void) | |||
| 649 | if (multitarget_wakeups) | 649 | if (multitarget_wakeups) |
| 650 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); | 650 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); |
| 651 | if (nr_run_events_optimized) | 651 | if (nr_run_events_optimized) |
| 652 | printf("run events optimized: %ld\n", | 652 | printf("run atoms optimized: %ld\n", |
| 653 | nr_run_events_optimized); | 653 | nr_run_events_optimized); |
| 654 | 654 | ||
| 655 | print_task_traces(); | 655 | print_task_traces(); |
| @@ -727,6 +727,20 @@ struct trace_switch_event { | |||
| 727 | u32 next_prio; | 727 | u32 next_prio; |
| 728 | }; | 728 | }; |
| 729 | 729 | ||
| 730 | struct trace_runtime_event { | ||
| 731 | u32 size; | ||
| 732 | |||
| 733 | u16 common_type; | ||
| 734 | u8 common_flags; | ||
| 735 | u8 common_preempt_count; | ||
| 736 | u32 common_pid; | ||
| 737 | u32 common_tgid; | ||
| 738 | |||
| 739 | char comm[16]; | ||
| 740 | u32 pid; | ||
| 741 | u64 runtime; | ||
| 742 | u64 vruntime; | ||
| 743 | }; | ||
| 730 | 744 | ||
| 731 | struct trace_wakeup_event { | 745 | struct trace_wakeup_event { |
| 732 | u32 size; | 746 | u32 size; |
| @@ -767,6 +781,12 @@ struct trace_sched_handler { | |||
| 767 | u64 timestamp, | 781 | u64 timestamp, |
| 768 | struct thread *thread); | 782 | struct thread *thread); |
| 769 | 783 | ||
| 784 | void (*runtime_event)(struct trace_runtime_event *, | ||
| 785 | struct event *, | ||
| 786 | int cpu, | ||
| 787 | u64 timestamp, | ||
| 788 | struct thread *thread); | ||
| 789 | |||
| 770 | void (*wakeup_event)(struct trace_wakeup_event *, | 790 | void (*wakeup_event)(struct trace_wakeup_event *, |
| 771 | struct event *, | 791 | struct event *, |
| 772 | int cpu, | 792 | int cpu, |
| @@ -881,7 +901,7 @@ struct sort_dimension { | |||
| 881 | static LIST_HEAD(cmp_pid); | 901 | static LIST_HEAD(cmp_pid); |
| 882 | 902 | ||
| 883 | static int | 903 | static int |
| 884 | thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms *r) | 904 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) |
| 885 | { | 905 | { |
| 886 | struct sort_dimension *sort; | 906 | struct sort_dimension *sort; |
| 887 | int ret = 0; | 907 | int ret = 0; |
| @@ -897,18 +917,18 @@ thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms * | |||
| 897 | return ret; | 917 | return ret; |
| 898 | } | 918 | } |
| 899 | 919 | ||
| 900 | static struct task_atoms * | 920 | static struct work_atoms * |
| 901 | thread_atoms_search(struct rb_root *root, struct thread *thread, | 921 | thread_atoms_search(struct rb_root *root, struct thread *thread, |
| 902 | struct list_head *sort_list) | 922 | struct list_head *sort_list) |
| 903 | { | 923 | { |
| 904 | struct rb_node *node = root->rb_node; | 924 | struct rb_node *node = root->rb_node; |
| 905 | struct task_atoms key = { .thread = thread }; | 925 | struct work_atoms key = { .thread = thread }; |
| 906 | 926 | ||
| 907 | while (node) { | 927 | while (node) { |
| 908 | struct task_atoms *atoms; | 928 | struct work_atoms *atoms; |
| 909 | int cmp; | 929 | int cmp; |
| 910 | 930 | ||
| 911 | atoms = container_of(node, struct task_atoms, node); | 931 | atoms = container_of(node, struct work_atoms, node); |
| 912 | 932 | ||
| 913 | cmp = thread_lat_cmp(sort_list, &key, atoms); | 933 | cmp = thread_lat_cmp(sort_list, &key, atoms); |
| 914 | if (cmp > 0) | 934 | if (cmp > 0) |
| @@ -924,16 +944,16 @@ thread_atoms_search(struct rb_root *root, struct thread *thread, | |||
| 924 | } | 944 | } |
| 925 | 945 | ||
| 926 | static void | 946 | static void |
| 927 | __thread_latency_insert(struct rb_root *root, struct task_atoms *data, | 947 | __thread_latency_insert(struct rb_root *root, struct work_atoms *data, |
| 928 | struct list_head *sort_list) | 948 | struct list_head *sort_list) |
| 929 | { | 949 | { |
| 930 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 950 | struct rb_node **new = &(root->rb_node), *parent = NULL; |
| 931 | 951 | ||
| 932 | while (*new) { | 952 | while (*new) { |
| 933 | struct task_atoms *this; | 953 | struct work_atoms *this; |
| 934 | int cmp; | 954 | int cmp; |
| 935 | 955 | ||
| 936 | this = container_of(*new, struct task_atoms, node); | 956 | this = container_of(*new, struct work_atoms, node); |
| 937 | parent = *new; | 957 | parent = *new; |
| 938 | 958 | ||
| 939 | cmp = thread_lat_cmp(sort_list, data, this); | 959 | cmp = thread_lat_cmp(sort_list, data, this); |
| @@ -950,14 +970,14 @@ __thread_latency_insert(struct rb_root *root, struct task_atoms *data, | |||
| 950 | 970 | ||
| 951 | static void thread_atoms_insert(struct thread *thread) | 971 | static void thread_atoms_insert(struct thread *thread) |
| 952 | { | 972 | { |
| 953 | struct task_atoms *atoms; | 973 | struct work_atoms *atoms; |
| 954 | 974 | ||
| 955 | atoms = calloc(sizeof(*atoms), 1); | 975 | atoms = calloc(sizeof(*atoms), 1); |
| 956 | if (!atoms) | 976 | if (!atoms) |
| 957 | die("No memory"); | 977 | die("No memory"); |
| 958 | 978 | ||
| 959 | atoms->thread = thread; | 979 | atoms->thread = thread; |
| 960 | INIT_LIST_HEAD(&atoms->atom_list); | 980 | INIT_LIST_HEAD(&atoms->work_list); |
| 961 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); | 981 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); |
| 962 | } | 982 | } |
| 963 | 983 | ||
| @@ -980,10 +1000,9 @@ static char sched_out_state(struct trace_switch_event *switch_event) | |||
| 980 | } | 1000 | } |
| 981 | 1001 | ||
| 982 | static void | 1002 | static void |
| 983 | lat_sched_out(struct task_atoms *atoms, | 1003 | add_sched_out_event(struct work_atoms *atoms, |
| 984 | struct trace_switch_event *switch_event __used, | 1004 | char run_state, |
| 985 | u64 delta, | 1005 | u64 timestamp) |
| 986 | u64 timestamp) | ||
| 987 | { | 1006 | { |
| 988 | struct work_atom *atom; | 1007 | struct work_atom *atom; |
| 989 | 1008 | ||
| @@ -993,25 +1012,37 @@ lat_sched_out(struct task_atoms *atoms, | |||
| 993 | 1012 | ||
| 994 | atom->sched_out_time = timestamp; | 1013 | atom->sched_out_time = timestamp; |
| 995 | 1014 | ||
| 996 | if (sched_out_state(switch_event) == 'R') { | 1015 | if (run_state == 'R') { |
| 997 | atom->state = THREAD_WAIT_CPU; | 1016 | atom->state = THREAD_WAIT_CPU; |
| 998 | atom->wake_up_time = atom->sched_out_time; | 1017 | atom->wake_up_time = atom->sched_out_time; |
| 999 | } | 1018 | } |
| 1000 | 1019 | ||
| 1001 | atom->runtime = delta; | 1020 | list_add_tail(&atom->list, &atoms->work_list); |
| 1002 | list_add_tail(&atom->list, &atoms->atom_list); | 1021 | } |
| 1022 | |||
| 1023 | static void | ||
| 1024 | add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) | ||
| 1025 | { | ||
| 1026 | struct work_atom *atom; | ||
| 1027 | |||
| 1028 | BUG_ON(list_empty(&atoms->work_list)); | ||
| 1029 | |||
| 1030 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | ||
| 1031 | |||
| 1032 | atom->runtime += delta; | ||
| 1033 | atoms->total_runtime += delta; | ||
| 1003 | } | 1034 | } |
| 1004 | 1035 | ||
| 1005 | static void | 1036 | static void |
| 1006 | lat_sched_in(struct task_atoms *atoms, u64 timestamp) | 1037 | add_sched_in_event(struct work_atoms *atoms, u64 timestamp) |
| 1007 | { | 1038 | { |
| 1008 | struct work_atom *atom; | 1039 | struct work_atom *atom; |
| 1009 | u64 delta; | 1040 | u64 delta; |
| 1010 | 1041 | ||
| 1011 | if (list_empty(&atoms->atom_list)) | 1042 | if (list_empty(&atoms->work_list)) |
| 1012 | return; | 1043 | return; |
| 1013 | 1044 | ||
| 1014 | atom = list_entry(atoms->atom_list.prev, struct work_atom, list); | 1045 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
| 1015 | 1046 | ||
| 1016 | if (atom->state != THREAD_WAIT_CPU) | 1047 | if (atom->state != THREAD_WAIT_CPU) |
| 1017 | return; | 1048 | return; |
| @@ -1029,7 +1060,6 @@ lat_sched_in(struct task_atoms *atoms, u64 timestamp) | |||
| 1029 | if (delta > atoms->max_lat) | 1060 | if (delta > atoms->max_lat) |
| 1030 | atoms->max_lat = delta; | 1061 | atoms->max_lat = delta; |
| 1031 | atoms->nb_atoms++; | 1062 | atoms->nb_atoms++; |
| 1032 | atoms->total_runtime += atom->runtime; | ||
| 1033 | } | 1063 | } |
| 1034 | 1064 | ||
| 1035 | static void | 1065 | static void |
| @@ -1039,13 +1069,12 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
| 1039 | u64 timestamp, | 1069 | u64 timestamp, |
| 1040 | struct thread *thread __used) | 1070 | struct thread *thread __used) |
| 1041 | { | 1071 | { |
| 1042 | struct task_atoms *out_atoms, *in_atoms; | 1072 | struct work_atoms *out_events, *in_events; |
| 1043 | struct thread *sched_out, *sched_in; | 1073 | struct thread *sched_out, *sched_in; |
| 1044 | u64 timestamp0; | 1074 | u64 timestamp0; |
| 1045 | s64 delta; | 1075 | s64 delta; |
| 1046 | 1076 | ||
| 1047 | if (cpu >= MAX_CPUS || cpu < 0) | 1077 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
| 1048 | return; | ||
| 1049 | 1078 | ||
| 1050 | timestamp0 = cpu_last_switched[cpu]; | 1079 | timestamp0 = cpu_last_switched[cpu]; |
| 1051 | cpu_last_switched[cpu] = timestamp; | 1080 | cpu_last_switched[cpu] = timestamp; |
| @@ -1061,34 +1090,63 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
| 1061 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); | 1090 | sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); |
| 1062 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); | 1091 | sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); |
| 1063 | 1092 | ||
| 1064 | in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1093 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); |
| 1065 | if (!in_atoms) { | 1094 | if (!out_events) { |
| 1095 | thread_atoms_insert(sched_out); | ||
| 1096 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | ||
| 1097 | if (!out_events) | ||
| 1098 | die("out-event: Internal tree error"); | ||
| 1099 | } | ||
| 1100 | add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); | ||
| 1101 | |||
| 1102 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | ||
| 1103 | if (!in_events) { | ||
| 1066 | thread_atoms_insert(sched_in); | 1104 | thread_atoms_insert(sched_in); |
| 1067 | in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1105 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); |
| 1068 | if (!in_atoms) | 1106 | if (!in_events) |
| 1069 | die("in-atom: Internal tree error"); | 1107 | die("in-event: Internal tree error"); |
| 1108 | /* | ||
| 1109 | * Take came in we have not heard about yet, | ||
| 1110 | * add in an initial atom in runnable state: | ||
| 1111 | */ | ||
| 1112 | add_sched_out_event(in_events, 'R', timestamp); | ||
| 1070 | } | 1113 | } |
| 1114 | add_sched_in_event(in_events, timestamp); | ||
| 1115 | } | ||
| 1071 | 1116 | ||
| 1072 | out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1117 | static void |
| 1073 | if (!out_atoms) { | 1118 | latency_runtime_event(struct trace_runtime_event *runtime_event, |
| 1074 | thread_atoms_insert(sched_out); | 1119 | struct event *event __used, |
| 1075 | out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1120 | int cpu, |
| 1076 | if (!out_atoms) | 1121 | u64 timestamp, |
| 1077 | die("out-atom: Internal tree error"); | 1122 | struct thread *this_thread __used) |
| 1123 | { | ||
| 1124 | struct work_atoms *atoms; | ||
| 1125 | struct thread *thread; | ||
| 1126 | |||
| 1127 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | ||
| 1128 | |||
| 1129 | thread = threads__findnew(runtime_event->pid, &threads, &last_match); | ||
| 1130 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | ||
| 1131 | if (!atoms) { | ||
| 1132 | thread_atoms_insert(thread); | ||
| 1133 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | ||
| 1134 | if (!atoms) | ||
| 1135 | die("in-event: Internal tree error"); | ||
| 1136 | add_sched_out_event(atoms, 'R', timestamp); | ||
| 1078 | } | 1137 | } |
| 1079 | 1138 | ||
| 1080 | lat_sched_in(in_atoms, timestamp); | 1139 | add_runtime_event(atoms, runtime_event->runtime, timestamp); |
| 1081 | lat_sched_out(out_atoms, switch_event, delta, timestamp); | ||
| 1082 | } | 1140 | } |
| 1083 | 1141 | ||
| 1084 | static void | 1142 | static void |
| 1085 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | 1143 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, |
| 1086 | struct event *event __used, | 1144 | struct event *__event __used, |
| 1087 | int cpu __used, | 1145 | int cpu __used, |
| 1088 | u64 timestamp, | 1146 | u64 timestamp, |
| 1089 | struct thread *thread __used) | 1147 | struct thread *thread __used) |
| 1090 | { | 1148 | { |
| 1091 | struct task_atoms *atoms; | 1149 | struct work_atoms *atoms; |
| 1092 | struct work_atom *atom; | 1150 | struct work_atom *atom; |
| 1093 | struct thread *wakee; | 1151 | struct thread *wakee; |
| 1094 | 1152 | ||
| @@ -1100,16 +1158,20 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
| 1100 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1158 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
| 1101 | if (!atoms) { | 1159 | if (!atoms) { |
| 1102 | thread_atoms_insert(wakee); | 1160 | thread_atoms_insert(wakee); |
| 1103 | return; | 1161 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); |
| 1162 | if (!atoms) | ||
| 1163 | die("wakeup-event: Internal tree error"); | ||
| 1164 | add_sched_out_event(atoms, 'S', timestamp); | ||
| 1104 | } | 1165 | } |
| 1105 | 1166 | ||
| 1106 | if (list_empty(&atoms->atom_list)) | 1167 | BUG_ON(list_empty(&atoms->work_list)); |
| 1107 | return; | ||
| 1108 | 1168 | ||
| 1109 | atom = list_entry(atoms->atom_list.prev, struct work_atom, list); | 1169 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
| 1110 | 1170 | ||
| 1111 | if (atom->state != THREAD_SLEEPING) | 1171 | if (atom->state != THREAD_SLEEPING) { |
| 1172 | printf("boo2\n"); | ||
| 1112 | return; | 1173 | return; |
| 1174 | } | ||
| 1113 | 1175 | ||
| 1114 | nr_timestamps++; | 1176 | nr_timestamps++; |
| 1115 | if (atom->sched_out_time > timestamp) { | 1177 | if (atom->sched_out_time > timestamp) { |
| @@ -1124,40 +1186,41 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
| 1124 | static struct trace_sched_handler lat_ops = { | 1186 | static struct trace_sched_handler lat_ops = { |
| 1125 | .wakeup_event = latency_wakeup_event, | 1187 | .wakeup_event = latency_wakeup_event, |
| 1126 | .switch_event = latency_switch_event, | 1188 | .switch_event = latency_switch_event, |
| 1189 | .runtime_event = latency_runtime_event, | ||
| 1127 | .fork_event = latency_fork_event, | 1190 | .fork_event = latency_fork_event, |
| 1128 | }; | 1191 | }; |
| 1129 | 1192 | ||
| 1130 | static void output_lat_thread(struct task_atoms *atom_list) | 1193 | static void output_lat_thread(struct work_atoms *work_list) |
| 1131 | { | 1194 | { |
| 1132 | int i; | 1195 | int i; |
| 1133 | int ret; | 1196 | int ret; |
| 1134 | u64 avg; | 1197 | u64 avg; |
| 1135 | 1198 | ||
| 1136 | if (!atom_list->nb_atoms) | 1199 | if (!work_list->nb_atoms) |
| 1137 | return; | 1200 | return; |
| 1138 | /* | 1201 | /* |
| 1139 | * Ignore idle threads: | 1202 | * Ignore idle threads: |
| 1140 | */ | 1203 | */ |
| 1141 | if (!atom_list->thread->pid) | 1204 | if (!work_list->thread->pid) |
| 1142 | return; | 1205 | return; |
| 1143 | 1206 | ||
| 1144 | all_runtime += atom_list->total_runtime; | 1207 | all_runtime += work_list->total_runtime; |
| 1145 | all_count += atom_list->nb_atoms; | 1208 | all_count += work_list->nb_atoms; |
| 1146 | 1209 | ||
| 1147 | ret = printf(" %s-%d ", atom_list->thread->comm, atom_list->thread->pid); | 1210 | ret = printf(" %s-%d ", work_list->thread->comm, work_list->thread->pid); |
| 1148 | 1211 | ||
| 1149 | for (i = 0; i < 24 - ret; i++) | 1212 | for (i = 0; i < 24 - ret; i++) |
| 1150 | printf(" "); | 1213 | printf(" "); |
| 1151 | 1214 | ||
| 1152 | avg = atom_list->total_lat / atom_list->nb_atoms; | 1215 | avg = work_list->total_lat / work_list->nb_atoms; |
| 1153 | 1216 | ||
| 1154 | printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", | 1217 | printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", |
| 1155 | (double)atom_list->total_runtime / 1e6, | 1218 | (double)work_list->total_runtime / 1e6, |
| 1156 | atom_list->nb_atoms, (double)avg / 1e6, | 1219 | work_list->nb_atoms, (double)avg / 1e6, |
| 1157 | (double)atom_list->max_lat / 1e6); | 1220 | (double)work_list->max_lat / 1e6); |
| 1158 | } | 1221 | } |
| 1159 | 1222 | ||
| 1160 | static int pid_cmp(struct task_atoms *l, struct task_atoms *r) | 1223 | static int pid_cmp(struct work_atoms *l, struct work_atoms *r) |
| 1161 | { | 1224 | { |
| 1162 | if (l->thread->pid < r->thread->pid) | 1225 | if (l->thread->pid < r->thread->pid) |
| 1163 | return -1; | 1226 | return -1; |
| @@ -1172,7 +1235,7 @@ static struct sort_dimension pid_sort_dimension = { | |||
| 1172 | .cmp = pid_cmp, | 1235 | .cmp = pid_cmp, |
| 1173 | }; | 1236 | }; |
| 1174 | 1237 | ||
| 1175 | static int avg_cmp(struct task_atoms *l, struct task_atoms *r) | 1238 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) |
| 1176 | { | 1239 | { |
| 1177 | u64 avgl, avgr; | 1240 | u64 avgl, avgr; |
| 1178 | 1241 | ||
| @@ -1198,7 +1261,7 @@ static struct sort_dimension avg_sort_dimension = { | |||
| 1198 | .cmp = avg_cmp, | 1261 | .cmp = avg_cmp, |
| 1199 | }; | 1262 | }; |
| 1200 | 1263 | ||
| 1201 | static int max_cmp(struct task_atoms *l, struct task_atoms *r) | 1264 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) |
| 1202 | { | 1265 | { |
| 1203 | if (l->max_lat < r->max_lat) | 1266 | if (l->max_lat < r->max_lat) |
| 1204 | return -1; | 1267 | return -1; |
| @@ -1213,7 +1276,7 @@ static struct sort_dimension max_sort_dimension = { | |||
| 1213 | .cmp = max_cmp, | 1276 | .cmp = max_cmp, |
| 1214 | }; | 1277 | }; |
| 1215 | 1278 | ||
| 1216 | static int switch_cmp(struct task_atoms *l, struct task_atoms *r) | 1279 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) |
| 1217 | { | 1280 | { |
| 1218 | if (l->nb_atoms < r->nb_atoms) | 1281 | if (l->nb_atoms < r->nb_atoms) |
| 1219 | return -1; | 1282 | return -1; |
| @@ -1228,7 +1291,7 @@ static struct sort_dimension switch_sort_dimension = { | |||
| 1228 | .cmp = switch_cmp, | 1291 | .cmp = switch_cmp, |
| 1229 | }; | 1292 | }; |
| 1230 | 1293 | ||
| 1231 | static int runtime_cmp(struct task_atoms *l, struct task_atoms *r) | 1294 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) |
| 1232 | { | 1295 | { |
| 1233 | if (l->total_runtime < r->total_runtime) | 1296 | if (l->total_runtime < r->total_runtime) |
| 1234 | return -1; | 1297 | return -1; |
| @@ -1277,13 +1340,13 @@ static void sort_lat(void) | |||
| 1277 | struct rb_node *node; | 1340 | struct rb_node *node; |
| 1278 | 1341 | ||
| 1279 | for (;;) { | 1342 | for (;;) { |
| 1280 | struct task_atoms *data; | 1343 | struct work_atoms *data; |
| 1281 | node = rb_first(&atom_root); | 1344 | node = rb_first(&atom_root); |
| 1282 | if (!node) | 1345 | if (!node) |
| 1283 | break; | 1346 | break; |
| 1284 | 1347 | ||
| 1285 | rb_erase(node, &atom_root); | 1348 | rb_erase(node, &atom_root); |
| 1286 | data = rb_entry(node, struct task_atoms, node); | 1349 | data = rb_entry(node, struct work_atoms, node); |
| 1287 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); | 1350 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); |
| 1288 | } | 1351 | } |
| 1289 | } | 1352 | } |
| @@ -1303,10 +1366,10 @@ static void __cmd_lat(void) | |||
| 1303 | next = rb_first(&sorted_atom_root); | 1366 | next = rb_first(&sorted_atom_root); |
| 1304 | 1367 | ||
| 1305 | while (next) { | 1368 | while (next) { |
| 1306 | struct task_atoms *atom_list; | 1369 | struct work_atoms *work_list; |
| 1307 | 1370 | ||
| 1308 | atom_list = rb_entry(next, struct task_atoms, node); | 1371 | work_list = rb_entry(next, struct work_atoms, node); |
| 1309 | output_lat_thread(atom_list); | 1372 | output_lat_thread(work_list); |
| 1310 | next = rb_next(next); | 1373 | next = rb_next(next); |
| 1311 | } | 1374 | } |
| 1312 | 1375 | ||
| @@ -1369,6 +1432,23 @@ process_sched_switch_event(struct raw_event_sample *raw, | |||
| 1369 | } | 1432 | } |
| 1370 | 1433 | ||
| 1371 | static void | 1434 | static void |
| 1435 | process_sched_runtime_event(struct raw_event_sample *raw, | ||
| 1436 | struct event *event, | ||
| 1437 | int cpu __used, | ||
| 1438 | u64 timestamp __used, | ||
| 1439 | struct thread *thread __used) | ||
| 1440 | { | ||
| 1441 | struct trace_runtime_event runtime_event; | ||
| 1442 | |||
| 1443 | FILL_ARRAY(runtime_event, comm, event, raw->data); | ||
| 1444 | FILL_FIELD(runtime_event, pid, event, raw->data); | ||
| 1445 | FILL_FIELD(runtime_event, runtime, event, raw->data); | ||
| 1446 | FILL_FIELD(runtime_event, vruntime, event, raw->data); | ||
| 1447 | |||
| 1448 | trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | static void | ||
| 1372 | process_sched_fork_event(struct raw_event_sample *raw, | 1452 | process_sched_fork_event(struct raw_event_sample *raw, |
| 1373 | struct event *event, | 1453 | struct event *event, |
| 1374 | int cpu __used, | 1454 | int cpu __used, |
| @@ -1410,6 +1490,8 @@ process_raw_event(event_t *raw_event __used, void *more_data, | |||
| 1410 | 1490 | ||
| 1411 | if (!strcmp(event->name, "sched_switch")) | 1491 | if (!strcmp(event->name, "sched_switch")) |
| 1412 | process_sched_switch_event(raw, event, cpu, timestamp, thread); | 1492 | process_sched_switch_event(raw, event, cpu, timestamp, thread); |
| 1493 | if (!strcmp(event->name, "sched_stat_runtime")) | ||
| 1494 | process_sched_runtime_event(raw, event, cpu, timestamp, thread); | ||
| 1413 | if (!strcmp(event->name, "sched_wakeup")) | 1495 | if (!strcmp(event->name, "sched_wakeup")) |
| 1414 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); | 1496 | process_sched_wakeup_event(raw, event, cpu, timestamp, thread); |
| 1415 | if (!strcmp(event->name, "sched_wakeup_new")) | 1497 | if (!strcmp(event->name, "sched_wakeup_new")) |
