summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authormingo <mingo@europe.(none)>2009-09-14 14:04:48 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-14 14:08:23 -0400
commit39aeb52f99f2380c1f16036deed2f7bb8b2e0559 (patch)
tree5d9c6dbc12da0bceb9776be4d1d7082d21850095 /tools
parent08f69e6c2e59b3d73343f8c9ecf758e0133dbc22 (diff)
perf sched: Add support for sched:sched_stat_runtime events
This allows more precise 'perf sched latency' output: --------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | --------------------------------------------------------------------------------------- ksoftirqd/0-4 | 0.010 ms | 2 | avg: 2.476 ms | max: 2.977 ms | perf-12328 | 15.844 ms | 66 | avg: 1.118 ms | max: 9.979 ms | bdi-default-235 | 0.009 ms | 1 | avg: 0.998 ms | max: 0.998 ms | events/1-8 | 0.020 ms | 2 | avg: 0.998 ms | max: 0.998 ms | events/0-7 | 0.018 ms | 2 | avg: 0.992 ms | max: 0.996 ms | sleep-12329 | 0.742 ms | 3 | avg: 0.906 ms | max: 2.289 ms | sshd-12122 | 0.163 ms | 2 | avg: 0.283 ms | max: 0.562 ms | loop-getpid-lon-12322 | 1023.636 ms | 69 | avg: 0.208 ms | max: 5.996 ms | loop-getpid-lon-12321 | 1038.638 ms | 5 | avg: 0.073 ms | max: 0.171 ms | migration/1-5 | 0.000 ms | 1 | avg: 0.006 ms | max: 0.006 ms | --------------------------------------------------------------------------------------- TOTAL: | 2079.078 ms | 153 | ------------------------------------------------- Also, streamline the code a bit more, add asserts for various state machine failures (they should be debugged if they occur) and fix a few odd ends. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/builtin-sched.c264
1 files changed, 173 insertions, 91 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 93ef7b215aba..adcb563ec4d2 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -50,7 +50,7 @@ static u64 sleep_measurement_overhead;
50 50
51static unsigned long nr_tasks; 51static unsigned long nr_tasks;
52 52
53struct sched_event; 53struct sched_atom;
54 54
55struct task_desc { 55struct task_desc {
56 unsigned long nr; 56 unsigned long nr;
@@ -59,7 +59,7 @@ struct task_desc {
59 59
60 unsigned long nr_events; 60 unsigned long nr_events;
61 unsigned long curr_event; 61 unsigned long curr_event;
62 struct sched_event **events; 62 struct sched_atom **atoms;
63 63
64 pthread_t thread; 64 pthread_t thread;
65 sem_t sleep_sem; 65 sem_t sleep_sem;
@@ -76,7 +76,7 @@ enum sched_event_type {
76 SCHED_EVENT_WAKEUP, 76 SCHED_EVENT_WAKEUP,
77}; 77};
78 78
79struct sched_event { 79struct sched_atom {
80 enum sched_event_type type; 80 enum sched_event_type type;
81 u64 timestamp; 81 u64 timestamp;
82 u64 duration; 82 u64 duration;
@@ -137,8 +137,8 @@ struct work_atom {
137 u64 runtime; 137 u64 runtime;
138}; 138};
139 139
140struct task_atoms { 140struct work_atoms {
141 struct list_head atom_list; 141 struct list_head work_list;
142 struct thread *thread; 142 struct thread *thread;
143 struct rb_node node; 143 struct rb_node node;
144 u64 max_lat; 144 u64 max_lat;
@@ -147,7 +147,7 @@ struct task_atoms {
147 u64 total_runtime; 147 u64 total_runtime;
148}; 148};
149 149
150typedef int (*sort_fn_t)(struct task_atoms *, struct task_atoms *); 150typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
151 151
152static struct rb_root atom_root, sorted_atom_root; 152static struct rb_root atom_root, sorted_atom_root;
153 153
@@ -220,10 +220,10 @@ static void calibrate_sleep_measurement_overhead(void)
220 printf("sleep measurement overhead: %Ld nsecs\n", min_delta); 220 printf("sleep measurement overhead: %Ld nsecs\n", min_delta);
221} 221}
222 222
223static struct sched_event * 223static struct sched_atom *
224get_new_event(struct task_desc *task, u64 timestamp) 224get_new_event(struct task_desc *task, u64 timestamp)
225{ 225{
226 struct sched_event *event = calloc(1, sizeof(*event)); 226 struct sched_atom *event = calloc(1, sizeof(*event));
227 unsigned long idx = task->nr_events; 227 unsigned long idx = task->nr_events;
228 size_t size; 228 size_t size;
229 229
@@ -231,27 +231,27 @@ get_new_event(struct task_desc *task, u64 timestamp)
231 event->nr = idx; 231 event->nr = idx;
232 232
233 task->nr_events++; 233 task->nr_events++;
234 size = sizeof(struct sched_event *) * task->nr_events; 234 size = sizeof(struct sched_atom *) * task->nr_events;
235 task->events = realloc(task->events, size); 235 task->atoms = realloc(task->atoms, size);
236 BUG_ON(!task->events); 236 BUG_ON(!task->atoms);
237 237
238 task->events[idx] = event; 238 task->atoms[idx] = event;
239 239
240 return event; 240 return event;
241} 241}
242 242
243static struct sched_event *last_event(struct task_desc *task) 243static struct sched_atom *last_event(struct task_desc *task)
244{ 244{
245 if (!task->nr_events) 245 if (!task->nr_events)
246 return NULL; 246 return NULL;
247 247
248 return task->events[task->nr_events - 1]; 248 return task->atoms[task->nr_events - 1];
249} 249}
250 250
251static void 251static void
252add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 252add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
253{ 253{
254 struct sched_event *event, *curr_event = last_event(task); 254 struct sched_atom *event, *curr_event = last_event(task);
255 255
256 /* 256 /*
257 * optimize an existing RUN event by merging this one 257 * optimize an existing RUN event by merging this one
@@ -275,7 +275,7 @@ static void
275add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 275add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
276 struct task_desc *wakee) 276 struct task_desc *wakee)
277{ 277{
278 struct sched_event *event, *wakee_event; 278 struct sched_atom *event, *wakee_event;
279 279
280 event = get_new_event(task, timestamp); 280 event = get_new_event(task, timestamp);
281 event->type = SCHED_EVENT_WAKEUP; 281 event->type = SCHED_EVENT_WAKEUP;
@@ -303,7 +303,7 @@ static void
303add_sched_event_sleep(struct task_desc *task, u64 timestamp, 303add_sched_event_sleep(struct task_desc *task, u64 timestamp,
304 u64 task_state __used) 304 u64 task_state __used)
305{ 305{
306 struct sched_event *event = get_new_event(task, timestamp); 306 struct sched_atom *event = get_new_event(task, timestamp);
307 307
308 event->type = SCHED_EVENT_SLEEP; 308 event->type = SCHED_EVENT_SLEEP;
309 309
@@ -372,27 +372,27 @@ static void add_cross_task_wakeups(void)
372} 372}
373 373
374static void 374static void
375process_sched_event(struct task_desc *this_task __used, struct sched_event *event) 375process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
376{ 376{
377 int ret = 0; 377 int ret = 0;
378 u64 now; 378 u64 now;
379 long long delta; 379 long long delta;
380 380
381 now = get_nsecs(); 381 now = get_nsecs();
382 delta = start_time + event->timestamp - now; 382 delta = start_time + atom->timestamp - now;
383 383
384 switch (event->type) { 384 switch (atom->type) {
385 case SCHED_EVENT_RUN: 385 case SCHED_EVENT_RUN:
386 burn_nsecs(event->duration); 386 burn_nsecs(atom->duration);
387 break; 387 break;
388 case SCHED_EVENT_SLEEP: 388 case SCHED_EVENT_SLEEP:
389 if (event->wait_sem) 389 if (atom->wait_sem)
390 ret = sem_wait(event->wait_sem); 390 ret = sem_wait(atom->wait_sem);
391 BUG_ON(ret); 391 BUG_ON(ret);
392 break; 392 break;
393 case SCHED_EVENT_WAKEUP: 393 case SCHED_EVENT_WAKEUP:
394 if (event->wait_sem) 394 if (atom->wait_sem)
395 ret = sem_post(event->wait_sem); 395 ret = sem_post(atom->wait_sem);
396 BUG_ON(ret); 396 BUG_ON(ret);
397 break; 397 break;
398 default: 398 default:
@@ -467,7 +467,7 @@ again:
467 467
468 for (i = 0; i < this_task->nr_events; i++) { 468 for (i = 0; i < this_task->nr_events; i++) {
469 this_task->curr_event = i; 469 this_task->curr_event = i;
470 process_sched_event(this_task, this_task->events[i]); 470 process_sched_event(this_task, this_task->atoms[i]);
471 } 471 }
472 472
473 cpu_usage_1 = get_cpu_usage_nsec_self(); 473 cpu_usage_1 = get_cpu_usage_nsec_self();
@@ -649,7 +649,7 @@ static void __cmd_replay(void)
649 if (multitarget_wakeups) 649 if (multitarget_wakeups)
650 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 650 printf("multi-target wakeups: %ld\n", multitarget_wakeups);
651 if (nr_run_events_optimized) 651 if (nr_run_events_optimized)
652 printf("run events optimized: %ld\n", 652 printf("run atoms optimized: %ld\n",
653 nr_run_events_optimized); 653 nr_run_events_optimized);
654 654
655 print_task_traces(); 655 print_task_traces();
@@ -727,6 +727,20 @@ struct trace_switch_event {
727 u32 next_prio; 727 u32 next_prio;
728}; 728};
729 729
730struct trace_runtime_event {
731 u32 size;
732
733 u16 common_type;
734 u8 common_flags;
735 u8 common_preempt_count;
736 u32 common_pid;
737 u32 common_tgid;
738
739 char comm[16];
740 u32 pid;
741 u64 runtime;
742 u64 vruntime;
743};
730 744
731struct trace_wakeup_event { 745struct trace_wakeup_event {
732 u32 size; 746 u32 size;
@@ -767,6 +781,12 @@ struct trace_sched_handler {
767 u64 timestamp, 781 u64 timestamp,
768 struct thread *thread); 782 struct thread *thread);
769 783
784 void (*runtime_event)(struct trace_runtime_event *,
785 struct event *,
786 int cpu,
787 u64 timestamp,
788 struct thread *thread);
789
770 void (*wakeup_event)(struct trace_wakeup_event *, 790 void (*wakeup_event)(struct trace_wakeup_event *,
771 struct event *, 791 struct event *,
772 int cpu, 792 int cpu,
@@ -881,7 +901,7 @@ struct sort_dimension {
881static LIST_HEAD(cmp_pid); 901static LIST_HEAD(cmp_pid);
882 902
883static int 903static int
884thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms *r) 904thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
885{ 905{
886 struct sort_dimension *sort; 906 struct sort_dimension *sort;
887 int ret = 0; 907 int ret = 0;
@@ -897,18 +917,18 @@ thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms *
897 return ret; 917 return ret;
898} 918}
899 919
900static struct task_atoms * 920static struct work_atoms *
901thread_atoms_search(struct rb_root *root, struct thread *thread, 921thread_atoms_search(struct rb_root *root, struct thread *thread,
902 struct list_head *sort_list) 922 struct list_head *sort_list)
903{ 923{
904 struct rb_node *node = root->rb_node; 924 struct rb_node *node = root->rb_node;
905 struct task_atoms key = { .thread = thread }; 925 struct work_atoms key = { .thread = thread };
906 926
907 while (node) { 927 while (node) {
908 struct task_atoms *atoms; 928 struct work_atoms *atoms;
909 int cmp; 929 int cmp;
910 930
911 atoms = container_of(node, struct task_atoms, node); 931 atoms = container_of(node, struct work_atoms, node);
912 932
913 cmp = thread_lat_cmp(sort_list, &key, atoms); 933 cmp = thread_lat_cmp(sort_list, &key, atoms);
914 if (cmp > 0) 934 if (cmp > 0)
@@ -924,16 +944,16 @@ thread_atoms_search(struct rb_root *root, struct thread *thread,
924} 944}
925 945
926static void 946static void
927__thread_latency_insert(struct rb_root *root, struct task_atoms *data, 947__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
928 struct list_head *sort_list) 948 struct list_head *sort_list)
929{ 949{
930 struct rb_node **new = &(root->rb_node), *parent = NULL; 950 struct rb_node **new = &(root->rb_node), *parent = NULL;
931 951
932 while (*new) { 952 while (*new) {
933 struct task_atoms *this; 953 struct work_atoms *this;
934 int cmp; 954 int cmp;
935 955
936 this = container_of(*new, struct task_atoms, node); 956 this = container_of(*new, struct work_atoms, node);
937 parent = *new; 957 parent = *new;
938 958
939 cmp = thread_lat_cmp(sort_list, data, this); 959 cmp = thread_lat_cmp(sort_list, data, this);
@@ -950,14 +970,14 @@ __thread_latency_insert(struct rb_root *root, struct task_atoms *data,
950 970
951static void thread_atoms_insert(struct thread *thread) 971static void thread_atoms_insert(struct thread *thread)
952{ 972{
953 struct task_atoms *atoms; 973 struct work_atoms *atoms;
954 974
955 atoms = calloc(sizeof(*atoms), 1); 975 atoms = calloc(sizeof(*atoms), 1);
956 if (!atoms) 976 if (!atoms)
957 die("No memory"); 977 die("No memory");
958 978
959 atoms->thread = thread; 979 atoms->thread = thread;
960 INIT_LIST_HEAD(&atoms->atom_list); 980 INIT_LIST_HEAD(&atoms->work_list);
961 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 981 __thread_latency_insert(&atom_root, atoms, &cmp_pid);
962} 982}
963 983
@@ -980,10 +1000,9 @@ static char sched_out_state(struct trace_switch_event *switch_event)
980} 1000}
981 1001
982static void 1002static void
983lat_sched_out(struct task_atoms *atoms, 1003add_sched_out_event(struct work_atoms *atoms,
984 struct trace_switch_event *switch_event __used, 1004 char run_state,
985 u64 delta, 1005 u64 timestamp)
986 u64 timestamp)
987{ 1006{
988 struct work_atom *atom; 1007 struct work_atom *atom;
989 1008
@@ -993,25 +1012,37 @@ lat_sched_out(struct task_atoms *atoms,
993 1012
994 atom->sched_out_time = timestamp; 1013 atom->sched_out_time = timestamp;
995 1014
996 if (sched_out_state(switch_event) == 'R') { 1015 if (run_state == 'R') {
997 atom->state = THREAD_WAIT_CPU; 1016 atom->state = THREAD_WAIT_CPU;
998 atom->wake_up_time = atom->sched_out_time; 1017 atom->wake_up_time = atom->sched_out_time;
999 } 1018 }
1000 1019
1001 atom->runtime = delta; 1020 list_add_tail(&atom->list, &atoms->work_list);
1002 list_add_tail(&atom->list, &atoms->atom_list); 1021}
1022
1023static void
1024add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used)
1025{
1026 struct work_atom *atom;
1027
1028 BUG_ON(list_empty(&atoms->work_list));
1029
1030 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1031
1032 atom->runtime += delta;
1033 atoms->total_runtime += delta;
1003} 1034}
1004 1035
1005static void 1036static void
1006lat_sched_in(struct task_atoms *atoms, u64 timestamp) 1037add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
1007{ 1038{
1008 struct work_atom *atom; 1039 struct work_atom *atom;
1009 u64 delta; 1040 u64 delta;
1010 1041
1011 if (list_empty(&atoms->atom_list)) 1042 if (list_empty(&atoms->work_list))
1012 return; 1043 return;
1013 1044
1014 atom = list_entry(atoms->atom_list.prev, struct work_atom, list); 1045 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1015 1046
1016 if (atom->state != THREAD_WAIT_CPU) 1047 if (atom->state != THREAD_WAIT_CPU)
1017 return; 1048 return;
@@ -1029,7 +1060,6 @@ lat_sched_in(struct task_atoms *atoms, u64 timestamp)
1029 if (delta > atoms->max_lat) 1060 if (delta > atoms->max_lat)
1030 atoms->max_lat = delta; 1061 atoms->max_lat = delta;
1031 atoms->nb_atoms++; 1062 atoms->nb_atoms++;
1032 atoms->total_runtime += atom->runtime;
1033} 1063}
1034 1064
1035static void 1065static void
@@ -1039,13 +1069,12 @@ latency_switch_event(struct trace_switch_event *switch_event,
1039 u64 timestamp, 1069 u64 timestamp,
1040 struct thread *thread __used) 1070 struct thread *thread __used)
1041{ 1071{
1042 struct task_atoms *out_atoms, *in_atoms; 1072 struct work_atoms *out_events, *in_events;
1043 struct thread *sched_out, *sched_in; 1073 struct thread *sched_out, *sched_in;
1044 u64 timestamp0; 1074 u64 timestamp0;
1045 s64 delta; 1075 s64 delta;
1046 1076
1047 if (cpu >= MAX_CPUS || cpu < 0) 1077 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1048 return;
1049 1078
1050 timestamp0 = cpu_last_switched[cpu]; 1079 timestamp0 = cpu_last_switched[cpu];
1051 cpu_last_switched[cpu] = timestamp; 1080 cpu_last_switched[cpu] = timestamp;
@@ -1061,34 +1090,63 @@ latency_switch_event(struct trace_switch_event *switch_event,
1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1090 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1091 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
1063 1092
1064 in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1093 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1065 if (!in_atoms) { 1094 if (!out_events) {
1095 thread_atoms_insert(sched_out);
1096 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1097 if (!out_events)
1098 die("out-event: Internal tree error");
1099 }
1100 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp);
1101
1102 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
1103 if (!in_events) {
1066 thread_atoms_insert(sched_in); 1104 thread_atoms_insert(sched_in);
1067 in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1105 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
1068 if (!in_atoms) 1106 if (!in_events)
1069 die("in-atom: Internal tree error"); 1107 die("in-event: Internal tree error");
1108 /*
1109 * Take came in we have not heard about yet,
1110 * add in an initial atom in runnable state:
1111 */
1112 add_sched_out_event(in_events, 'R', timestamp);
1070 } 1113 }
1114 add_sched_in_event(in_events, timestamp);
1115}
1071 1116
1072 out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1117static void
1073 if (!out_atoms) { 1118latency_runtime_event(struct trace_runtime_event *runtime_event,
1074 thread_atoms_insert(sched_out); 1119 struct event *event __used,
1075 out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1120 int cpu,
1076 if (!out_atoms) 1121 u64 timestamp,
1077 die("out-atom: Internal tree error"); 1122 struct thread *this_thread __used)
1123{
1124 struct work_atoms *atoms;
1125 struct thread *thread;
1126
1127 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1128
1129 thread = threads__findnew(runtime_event->pid, &threads, &last_match);
1130 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1131 if (!atoms) {
1132 thread_atoms_insert(thread);
1133 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1134 if (!atoms)
1135 die("in-event: Internal tree error");
1136 add_sched_out_event(atoms, 'R', timestamp);
1078 } 1137 }
1079 1138
1080 lat_sched_in(in_atoms, timestamp); 1139 add_runtime_event(atoms, runtime_event->runtime, timestamp);
1081 lat_sched_out(out_atoms, switch_event, delta, timestamp);
1082} 1140}
1083 1141
1084static void 1142static void
1085latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1143latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1086 struct event *event __used, 1144 struct event *__event __used,
1087 int cpu __used, 1145 int cpu __used,
1088 u64 timestamp, 1146 u64 timestamp,
1089 struct thread *thread __used) 1147 struct thread *thread __used)
1090{ 1148{
1091 struct task_atoms *atoms; 1149 struct work_atoms *atoms;
1092 struct work_atom *atom; 1150 struct work_atom *atom;
1093 struct thread *wakee; 1151 struct thread *wakee;
1094 1152
@@ -1100,16 +1158,20 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1100 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1158 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1101 if (!atoms) { 1159 if (!atoms) {
1102 thread_atoms_insert(wakee); 1160 thread_atoms_insert(wakee);
1103 return; 1161 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1162 if (!atoms)
1163 die("wakeup-event: Internal tree error");
1164 add_sched_out_event(atoms, 'S', timestamp);
1104 } 1165 }
1105 1166
1106 if (list_empty(&atoms->atom_list)) 1167 BUG_ON(list_empty(&atoms->work_list));
1107 return;
1108 1168
1109 atom = list_entry(atoms->atom_list.prev, struct work_atom, list); 1169 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1110 1170
1111 if (atom->state != THREAD_SLEEPING) 1171 if (atom->state != THREAD_SLEEPING) {
1172 printf("boo2\n");
1112 return; 1173 return;
1174 }
1113 1175
1114 nr_timestamps++; 1176 nr_timestamps++;
1115 if (atom->sched_out_time > timestamp) { 1177 if (atom->sched_out_time > timestamp) {
@@ -1124,40 +1186,41 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1124static struct trace_sched_handler lat_ops = { 1186static struct trace_sched_handler lat_ops = {
1125 .wakeup_event = latency_wakeup_event, 1187 .wakeup_event = latency_wakeup_event,
1126 .switch_event = latency_switch_event, 1188 .switch_event = latency_switch_event,
1189 .runtime_event = latency_runtime_event,
1127 .fork_event = latency_fork_event, 1190 .fork_event = latency_fork_event,
1128}; 1191};
1129 1192
1130static void output_lat_thread(struct task_atoms *atom_list) 1193static void output_lat_thread(struct work_atoms *work_list)
1131{ 1194{
1132 int i; 1195 int i;
1133 int ret; 1196 int ret;
1134 u64 avg; 1197 u64 avg;
1135 1198
1136 if (!atom_list->nb_atoms) 1199 if (!work_list->nb_atoms)
1137 return; 1200 return;
1138 /* 1201 /*
1139 * Ignore idle threads: 1202 * Ignore idle threads:
1140 */ 1203 */
1141 if (!atom_list->thread->pid) 1204 if (!work_list->thread->pid)
1142 return; 1205 return;
1143 1206
1144 all_runtime += atom_list->total_runtime; 1207 all_runtime += work_list->total_runtime;
1145 all_count += atom_list->nb_atoms; 1208 all_count += work_list->nb_atoms;
1146 1209
1147 ret = printf(" %s-%d ", atom_list->thread->comm, atom_list->thread->pid); 1210 ret = printf(" %s-%d ", work_list->thread->comm, work_list->thread->pid);
1148 1211
1149 for (i = 0; i < 24 - ret; i++) 1212 for (i = 0; i < 24 - ret; i++)
1150 printf(" "); 1213 printf(" ");
1151 1214
1152 avg = atom_list->total_lat / atom_list->nb_atoms; 1215 avg = work_list->total_lat / work_list->nb_atoms;
1153 1216
1154 printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n", 1217 printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n",
1155 (double)atom_list->total_runtime / 1e6, 1218 (double)work_list->total_runtime / 1e6,
1156 atom_list->nb_atoms, (double)avg / 1e6, 1219 work_list->nb_atoms, (double)avg / 1e6,
1157 (double)atom_list->max_lat / 1e6); 1220 (double)work_list->max_lat / 1e6);
1158} 1221}
1159 1222
1160static int pid_cmp(struct task_atoms *l, struct task_atoms *r) 1223static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
1161{ 1224{
1162 if (l->thread->pid < r->thread->pid) 1225 if (l->thread->pid < r->thread->pid)
1163 return -1; 1226 return -1;
@@ -1172,7 +1235,7 @@ static struct sort_dimension pid_sort_dimension = {
1172 .cmp = pid_cmp, 1235 .cmp = pid_cmp,
1173}; 1236};
1174 1237
1175static int avg_cmp(struct task_atoms *l, struct task_atoms *r) 1238static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
1176{ 1239{
1177 u64 avgl, avgr; 1240 u64 avgl, avgr;
1178 1241
@@ -1198,7 +1261,7 @@ static struct sort_dimension avg_sort_dimension = {
1198 .cmp = avg_cmp, 1261 .cmp = avg_cmp,
1199}; 1262};
1200 1263
1201static int max_cmp(struct task_atoms *l, struct task_atoms *r) 1264static int max_cmp(struct work_atoms *l, struct work_atoms *r)
1202{ 1265{
1203 if (l->max_lat < r->max_lat) 1266 if (l->max_lat < r->max_lat)
1204 return -1; 1267 return -1;
@@ -1213,7 +1276,7 @@ static struct sort_dimension max_sort_dimension = {
1213 .cmp = max_cmp, 1276 .cmp = max_cmp,
1214}; 1277};
1215 1278
1216static int switch_cmp(struct task_atoms *l, struct task_atoms *r) 1279static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
1217{ 1280{
1218 if (l->nb_atoms < r->nb_atoms) 1281 if (l->nb_atoms < r->nb_atoms)
1219 return -1; 1282 return -1;
@@ -1228,7 +1291,7 @@ static struct sort_dimension switch_sort_dimension = {
1228 .cmp = switch_cmp, 1291 .cmp = switch_cmp,
1229}; 1292};
1230 1293
1231static int runtime_cmp(struct task_atoms *l, struct task_atoms *r) 1294static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
1232{ 1295{
1233 if (l->total_runtime < r->total_runtime) 1296 if (l->total_runtime < r->total_runtime)
1234 return -1; 1297 return -1;
@@ -1277,13 +1340,13 @@ static void sort_lat(void)
1277 struct rb_node *node; 1340 struct rb_node *node;
1278 1341
1279 for (;;) { 1342 for (;;) {
1280 struct task_atoms *data; 1343 struct work_atoms *data;
1281 node = rb_first(&atom_root); 1344 node = rb_first(&atom_root);
1282 if (!node) 1345 if (!node)
1283 break; 1346 break;
1284 1347
1285 rb_erase(node, &atom_root); 1348 rb_erase(node, &atom_root);
1286 data = rb_entry(node, struct task_atoms, node); 1349 data = rb_entry(node, struct work_atoms, node);
1287 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1350 __thread_latency_insert(&sorted_atom_root, data, &sort_list);
1288 } 1351 }
1289} 1352}
@@ -1303,10 +1366,10 @@ static void __cmd_lat(void)
1303 next = rb_first(&sorted_atom_root); 1366 next = rb_first(&sorted_atom_root);
1304 1367
1305 while (next) { 1368 while (next) {
1306 struct task_atoms *atom_list; 1369 struct work_atoms *work_list;
1307 1370
1308 atom_list = rb_entry(next, struct task_atoms, node); 1371 work_list = rb_entry(next, struct work_atoms, node);
1309 output_lat_thread(atom_list); 1372 output_lat_thread(work_list);
1310 next = rb_next(next); 1373 next = rb_next(next);
1311 } 1374 }
1312 1375
@@ -1369,6 +1432,23 @@ process_sched_switch_event(struct raw_event_sample *raw,
1369} 1432}
1370 1433
1371static void 1434static void
1435process_sched_runtime_event(struct raw_event_sample *raw,
1436 struct event *event,
1437 int cpu __used,
1438 u64 timestamp __used,
1439 struct thread *thread __used)
1440{
1441 struct trace_runtime_event runtime_event;
1442
1443 FILL_ARRAY(runtime_event, comm, event, raw->data);
1444 FILL_FIELD(runtime_event, pid, event, raw->data);
1445 FILL_FIELD(runtime_event, runtime, event, raw->data);
1446 FILL_FIELD(runtime_event, vruntime, event, raw->data);
1447
1448 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
1449}
1450
1451static void
1372process_sched_fork_event(struct raw_event_sample *raw, 1452process_sched_fork_event(struct raw_event_sample *raw,
1373 struct event *event, 1453 struct event *event,
1374 int cpu __used, 1454 int cpu __used,
@@ -1410,6 +1490,8 @@ process_raw_event(event_t *raw_event __used, void *more_data,
1410 1490
1411 if (!strcmp(event->name, "sched_switch")) 1491 if (!strcmp(event->name, "sched_switch"))
1412 process_sched_switch_event(raw, event, cpu, timestamp, thread); 1492 process_sched_switch_event(raw, event, cpu, timestamp, thread);
1493 if (!strcmp(event->name, "sched_stat_runtime"))
1494 process_sched_runtime_event(raw, event, cpu, timestamp, thread);
1413 if (!strcmp(event->name, "sched_wakeup")) 1495 if (!strcmp(event->name, "sched_wakeup"))
1414 process_sched_wakeup_event(raw, event, cpu, timestamp, thread); 1496 process_sched_wakeup_event(raw, event, cpu, timestamp, thread);
1415 if (!strcmp(event->name, "sched_wakeup_new")) 1497 if (!strcmp(event->name, "sched_wakeup_new"))