aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-09-12 02:06:14 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-13 04:22:43 -0400
commitcdce9d738b91e1ec686e4ef6ca46d46e2891e208 (patch)
tree1fd64c4bef114ce44a3c96ad7183fa200bff3d12 /tools/perf
parent419ab0d6a959f41ec7fde807fe311aaafb05c3be (diff)
perf sched: Add sched latency profiling
Add the -l --latency option that reports statistics about the scheduler latencies. For now, the latencies are measured in the following sequence scope: - task A is sleeping (D or S state) - task B wakes up A ^ | | latency timeframe | | v - task A is scheduled in Start by recording every scheduler events: perf record -e sched:* and then fetch the results: perf sched -l Tasks count total avg max migration/0 2 39849 19924 28826 ksoftirqd/0 7 756383 108054 373014 migration/1 5 45391 9078 10452 ksoftirqd/1 2 399055 199527 359130 events/0 8 4780110 597513 4500250 events/1 9 6353057 705895 2986012 kblockd/0 42 37805097 900121 5077684 The snapshot are in nanoseconds. - Count: number of snapshots taken for the given task - Total: total latencies in nanosec - Avg : average of latency between wake up and sched in - Max : max snapshot latency Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/builtin-sched.c296
1 files changed, 285 insertions, 11 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 756fe62eb046..4f9e943181a3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -31,6 +31,7 @@ static struct perf_header *header;
31static u64 sample_type; 31static u64 sample_type;
32 32
33static int replay_mode; 33static int replay_mode;
34static int lat_mode;
34 35
35 36
36/* 37/*
@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
847 .fork_event = replay_fork_event, 848 .fork_event = replay_fork_event,
848}; 849};
849 850
851#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
852
853enum thread_state {
854 THREAD_SLEEPING,
855 THREAD_WAKED_UP,
856 THREAD_SCHED_IN,
857 THREAD_IGNORE
858};
859
860struct lat_snapshot {
861 struct list_head list;
862 enum thread_state state;
863 u64 wake_up_time;
864 u64 sched_in_time;
865};
866
867struct thread_latency {
868 struct list_head snapshot_list;
869 struct thread *thread;
870 struct rb_node node;
871};
872
873static struct rb_root lat_snapshot_root;
874
875static struct thread_latency *
876thread_latency_search(struct rb_root *root, struct thread *thread)
877{
878 struct rb_node *node = root->rb_node;
879
880 while (node) {
881 struct thread_latency *lat;
882
883 lat = container_of(node, struct thread_latency, node);
884 if (thread->pid < lat->thread->pid)
885 node = node->rb_left;
886 else if (thread->pid > lat->thread->pid)
887 node = node->rb_right;
888 else {
889 return lat;
890 }
891 }
892 return NULL;
893}
894
895static void
896__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
897{
898 struct rb_node **new = &(root->rb_node), *parent = NULL;
899
900 while (*new) {
901 struct thread_latency *this;
902
903 this = container_of(*new, struct thread_latency, node);
904 parent = *new;
905 if (data->thread->pid < this->thread->pid)
906 new = &((*new)->rb_left);
907 else if (data->thread->pid > this->thread->pid)
908 new = &((*new)->rb_right);
909 else
910 die("Double thread insertion\n");
911 }
912
913 rb_link_node(&data->node, parent, new);
914 rb_insert_color(&data->node, root);
915}
916
917static void thread_latency_insert(struct thread *thread)
918{
919 struct thread_latency *lat;
920 lat = calloc(sizeof(*lat), 1);
921 if (!lat)
922 die("No memory");
923
924 lat->thread = thread;
925 INIT_LIST_HEAD(&lat->snapshot_list);
926 __thread_latency_insert(&lat_snapshot_root, lat);
927}
928
929static void
930latency_fork_event(struct trace_fork_event *fork_event __used,
931 struct event *event __used,
932 int cpu __used,
933 u64 timestamp __used,
934 struct thread *thread __used)
935{
936 /* should insert the newcomer */
937}
938
939static char sched_out_state(struct trace_switch_event *switch_event)
940{
941 const char *str = TASK_STATE_TO_CHAR_STR;
942
943 return str[switch_event->prev_state];
944}
945
946static void
947lat_sched_out(struct thread_latency *lat,
948 struct trace_switch_event *switch_event)
949{
950 struct lat_snapshot *snapshot;
951
952 if (sched_out_state(switch_event) == 'R')
953 return;
954
955 snapshot = calloc(sizeof(*snapshot), 1);
956 if (!snapshot)
957 die("Non memory");
958
959 list_add_tail(&snapshot->list, &lat->snapshot_list);
960}
961
962static void
963lat_sched_in(struct thread_latency *lat, u64 timestamp)
964{
965 struct lat_snapshot *snapshot;
966
967 if (list_empty(&lat->snapshot_list))
968 return;
969
970 snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
971 list);
972
973 if (snapshot->state != THREAD_WAKED_UP)
974 return;
975
976 if (timestamp < snapshot->wake_up_time) {
977 snapshot->state = THREAD_IGNORE;
978 return;
979 }
980
981 snapshot->state = THREAD_SCHED_IN;
982 snapshot->sched_in_time = timestamp;
983}
984
985
986static void
987latency_switch_event(struct trace_switch_event *switch_event,
988 struct event *event __used,
989 int cpu __used,
990 u64 timestamp,
991 struct thread *thread __used)
992{
993 struct thread_latency *out_lat, *in_lat;
994 struct thread *sched_out, *sched_in;
995
996 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
997 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
998
999 in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
1000 if (!in_lat) {
1001 thread_latency_insert(sched_in);
1002 in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
1003 if (!in_lat)
1004 die("Internal latency tree error");
1005 }
1006
1007 out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
1008 if (!out_lat) {
1009 thread_latency_insert(sched_out);
1010 out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
1011 if (!out_lat)
1012 die("Internal latency tree error");
1013 }
1014
1015 lat_sched_in(in_lat, timestamp);
1016 lat_sched_out(out_lat, switch_event);
1017}
1018
1019static void
1020latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1021 struct event *event __used,
1022 int cpu __used,
1023 u64 timestamp,
1024 struct thread *thread __used)
1025{
1026 struct thread_latency *lat;
1027 struct lat_snapshot *snapshot;
1028 struct thread *wakee;
1029
1030 /* Note for later, it may be interesting to observe the failing cases */
1031 if (!wakeup_event->success)
1032 return;
1033
1034 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
1035 lat = thread_latency_search(&lat_snapshot_root, wakee);
1036 if (!lat) {
1037 thread_latency_insert(wakee);
1038 return;
1039 }
1040
1041 if (list_empty(&lat->snapshot_list))
1042 return;
1043
1044 snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
1045 list);
1046
1047 if (snapshot->state != THREAD_SLEEPING)
1048 return;
1049
1050 snapshot->state = THREAD_WAKED_UP;
1051 snapshot->wake_up_time = timestamp;
1052}
1053
1054static struct trace_sched_handler lat_ops = {
1055 .wakeup_event = latency_wakeup_event,
1056 .switch_event = latency_switch_event,
1057 .fork_event = latency_fork_event,
1058};
1059
1060static void output_lat_thread(struct thread_latency *lat)
1061{
1062 struct lat_snapshot *shot;
1063 int count = 0;
1064 int i;
1065 int ret;
1066 u64 max = 0, avg;
1067 u64 total = 0, delta;
1068
1069 list_for_each_entry(shot, &lat->snapshot_list, list) {
1070 if (shot->state != THREAD_SCHED_IN)
1071 continue;
1072
1073 count++;
1074
1075 delta = shot->sched_in_time - shot->wake_up_time;
1076 if (delta > max)
1077 max = delta;
1078 total += delta;
1079 }
1080
1081 if (!count)
1082 return;
1083
1084 ret = printf("%s", lat->thread->comm);
1085
1086 for (i = 0; i < 25 - ret; i++)
1087 printf(" ");
1088
1089 avg = total / count;
1090
1091 printf("%5d %10llu %10llu %10llu\n", count, total, avg, max);
1092}
1093
1094static void output_lat_results(void)
1095{
1096 struct rb_node *next;
1097
1098 printf(" Tasks");
1099 printf(" count");
1100 printf(" total");
1101 printf(" avg");
1102 printf(" max\n\n");
1103
1104 next = rb_first(&lat_snapshot_root);
1105
1106 while (next) {
1107 struct thread_latency *lat;
1108
1109 lat = rb_entry(next, struct thread_latency, node);
1110 output_lat_thread(lat);
1111 next = rb_next(next);
1112 }
1113}
850 1114
851static struct trace_sched_handler *trace_handler; 1115static struct trace_sched_handler *trace_handler;
852 1116
@@ -1154,6 +1418,8 @@ static const struct option options[] = {
1154 "dump raw trace in ASCII"), 1418 "dump raw trace in ASCII"),
1155 OPT_BOOLEAN('r', "replay", &replay_mode, 1419 OPT_BOOLEAN('r', "replay", &replay_mode,
1156 "replay sched behaviour from traces"), 1420 "replay sched behaviour from traces"),
1421 OPT_BOOLEAN('l', "latency", &lat_mode,
1422 "measure various latencies"),
1157 OPT_BOOLEAN('v', "verbose", &verbose, 1423 OPT_BOOLEAN('v', "verbose", &verbose,
1158 "be more verbose (show symbol address, etc)"), 1424 "be more verbose (show symbol address, etc)"),
1159 OPT_END() 1425 OPT_END()
@@ -1180,22 +1446,30 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1180 1446
1181 if (replay_mode) 1447 if (replay_mode)
1182 trace_handler = &replay_ops; 1448 trace_handler = &replay_ops;
1183 else /* We may need a default subcommand */ 1449 else if (lat_mode)
1450 trace_handler = &lat_ops;
1451 else /* We may need a default subcommand (perf trace?) */
1184 die("Please select a sub command (-r)\n"); 1452 die("Please select a sub command (-r)\n");
1185 1453
1186 calibrate_run_measurement_overhead(); 1454 if (replay_mode) {
1187 calibrate_sleep_measurement_overhead(); 1455 calibrate_run_measurement_overhead();
1456 calibrate_sleep_measurement_overhead();
1188 1457
1189 test_calibrations(); 1458 test_calibrations();
1190 1459
1191 parse_trace(); 1460 parse_trace();
1192 print_task_traces(); 1461 print_task_traces();
1193 add_cross_task_wakeups(); 1462 add_cross_task_wakeups();
1194 1463
1195 create_tasks(); 1464 create_tasks();
1196 printf("------------------------------------------------------------\n"); 1465 printf("------------------------------------------------------------\n");
1197 for (i = 0; i < nr_iterations; i++) 1466 for (i = 0; i < nr_iterations; i++)
1198 run_one_test(); 1467 run_one_test();
1468 } else if (lat_mode) {
1469 setup_pager();
1470 __cmd_sched();
1471 output_lat_results();
1472 }
1199 1473
1200 return 0; 1474 return 0;
1201} 1475}