perf sched: Add sched latency profiling

Add the -l --latency option that reports statistics about the scheduler latencies. For now, the latencies are measured in the following sequence scope: - task A is sleeping (D or S state) - task B wakes up A ^ | | latency timeframe | | v - task A is scheduled in Start by recording every scheduler events: perf record -e sched:* and then fetch the results: perf sched -l Tasks count total avg max migration/0 2 39849 19924 28826 ksoftirqd/0 7 756383 108054 373014 migration/1 5 45391 9078 10452 ksoftirqd/1 2 399055 199527 359130 events/0 8 4780110 597513 4500250 events/1 9 6353057 705895 2986012 kblockd/0 42 37805097 900121 5077684 The snapshot are in nanoseconds. - Count: number of snapshots taken for the given task - Total: total latencies in nanosec - Avg : average of latency between wake up and sched in - Max : max snapshot latency Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Frederic Weisbecker <fweisbec@gmail.com> 2009-09-12 02:06:14 -0400
committer: Ingo Molnar <mingo@elte.hu> 2009-09-13 04:22:43 -0400
commit: cdce9d738b91e1ec686e4ef6ca46d46e2891e208 (patch)
tree: 1fd64c4bef114ce44a3c96ad7183fa200bff3d12 /tools
parent: 419ab0d6a959f41ec7fde807fe311aaafb05c3be (diff)
1 files changed, 285 insertions, 11 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 756fe62eb046..4f9e943181a3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -31,6 +31,7 @@ static struct perf_header	*header;
 static u64                      sample_type;
 static int                      replay_mode;
+static int                      lat_mode;
 /*
@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops  = {
        .fork_event = replay_fork_event,
 };
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+enum thread_state {
+        THREAD_SLEEPING,
+        THREAD_WAKED_UP,
+        THREAD_SCHED_IN,
+        THREAD_IGNORE
+};
+struct lat_snapshot {
+        struct list_head        list;
+        enum thread_state       state;
+        u64                     wake_up_time;
+        u64                     sched_in_time;
+};
+struct thread_latency {
+        struct list_head        snapshot_list;
+        struct thread           *thread;
+        struct rb_node          node;
+};
+static struct rb_root lat_snapshot_root;
+static struct thread_latency *
+thread_latency_search(struct rb_root *root, struct thread *thread)
+{
+        struct rb_node *node = root->rb_node;
+        while (node) {
+                struct thread_latency *lat;
+                lat = container_of(node, struct thread_latency, node);
+                if (thread->pid < lat->thread->pid)
+                        node = node->rb_left;
+                else if (thread->pid > lat->thread->pid)
+                        node = node->rb_right;
+                else {
+                        return lat;
+                }
+        }
+        return NULL;
+}
+static void
+__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        while (*new) {
+                struct thread_latency *this;
+                this = container_of(*new, struct thread_latency, node);
+                parent = *new;
+                if (data->thread->pid < this->thread->pid)
+                        new = &((*new)->rb_left);
+                else if (data->thread->pid > this->thread->pid)
+                        new = &((*new)->rb_right);
+                else
+                        die("Double thread insertion\n");
+        }
+        rb_link_node(&data->node, parent, new);
+        rb_insert_color(&data->node, root);
+}
+static void thread_latency_insert(struct thread *thread)
+{
+        struct thread_latency *lat;
+        lat = calloc(sizeof(*lat), 1);
+        if (!lat)
+                die("No memory");
+        lat->thread = thread;
+        INIT_LIST_HEAD(&lat->snapshot_list);
+        __thread_latency_insert(&lat_snapshot_root, lat);
+}
+static void
+latency_fork_event(struct trace_fork_event *fork_event __used,
+                   struct event *event __used,
+                   int cpu __used,
+                   u64 timestamp __used,
+                   struct thread *thread __used)
+{
+        /* should insert the newcomer */
+}
+static char sched_out_state(struct trace_switch_event *switch_event)
+{
+        const char *str = TASK_STATE_TO_CHAR_STR;
+        return str[switch_event->prev_state];
+}
+static void
+lat_sched_out(struct thread_latency *lat,
+             struct trace_switch_event *switch_event)
+{
+        struct lat_snapshot *snapshot;
+        if (sched_out_state(switch_event) == 'R')
+                return;
+        snapshot = calloc(sizeof(*snapshot), 1);
+        if (!snapshot)
+                die("Non memory");
+        list_add_tail(&snapshot->list, &lat->snapshot_list);
+}
+static void
+lat_sched_in(struct thread_latency *lat, u64 timestamp)
+{
+        struct lat_snapshot *snapshot;
+        if (list_empty(&lat->snapshot_list))
+                return;
+        snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
+                              list);
+        if (snapshot->state != THREAD_WAKED_UP)
+                return;
+        if (timestamp < snapshot->wake_up_time) {
+                snapshot->state = THREAD_IGNORE;
+                return;
+        }
+        snapshot->state = THREAD_SCHED_IN;
+        snapshot->sched_in_time = timestamp;
+}
+static void
+latency_switch_event(struct trace_switch_event *switch_event,
+                     struct event *event __used,
+                     int cpu __used,
+                     u64 timestamp,
+                     struct thread *thread __used)
+{
+        struct thread_latency *out_lat, *in_lat;
+        struct thread *sched_out, *sched_in;
+        sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+        sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+        in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
+        if (!in_lat) {
+                thread_latency_insert(sched_in);
+                in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
+                if (!in_lat)
+                        die("Internal latency tree error");
+        }
+        out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
+        if (!out_lat) {
+                thread_latency_insert(sched_out);
+                out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
+                if (!out_lat)
+                        die("Internal latency tree error");
+        }
+        lat_sched_in(in_lat, timestamp);
+        lat_sched_out(out_lat, switch_event);
+}
+static void
+latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
+                     struct event *event __used,
+                     int cpu __used,
+                     u64 timestamp,
+                     struct thread *thread __used)
+{
+        struct thread_latency *lat;
+        struct lat_snapshot *snapshot;
+        struct thread *wakee;
+        /* Note for later, it may be interesting to observe the failing cases */
+        if (!wakeup_event->success)
+                return;
+        wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+        lat = thread_latency_search(&lat_snapshot_root, wakee);
+        if (!lat) {
+                thread_latency_insert(wakee);
+                return;
+        }
+        if (list_empty(&lat->snapshot_list))
+                return;
+        snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
+                              list);
+        if (snapshot->state != THREAD_SLEEPING)
+                return;
+        snapshot->state = THREAD_WAKED_UP;
+        snapshot->wake_up_time = timestamp;
+}
+static struct trace_sched_handler lat_ops  = {
+        .wakeup_event = latency_wakeup_event,
+        .switch_event = latency_switch_event,
+        .fork_event = latency_fork_event,
+};
+static void output_lat_thread(struct thread_latency *lat)
+{
+        struct lat_snapshot *shot;
+        int count = 0;
+        int i;
+        int ret;
+        u64 max = 0, avg;
+        u64 total = 0, delta;
+        list_for_each_entry(shot, &lat->snapshot_list, list) {
+                if (shot->state != THREAD_SCHED_IN)
+                        continue;
+                count++;
+                delta = shot->sched_in_time - shot->wake_up_time;
+                if (delta > max)
+                        max = delta;
+                total += delta;
+        }
+        if (!count)
+                return;
+        ret = printf("%s", lat->thread->comm);
+        for (i = 0; i < 25 - ret; i++)
+                printf(" ");
+        avg = total / count;
+        printf("%5d        %10llu       %10llu      %10llu\n", count, total, avg, max);
+}
+static void output_lat_results(void)
+{
+        struct rb_node *next;
+        printf(" Tasks");
+        printf("                     count");
+        printf("          total");
+        printf("              avg");
+        printf("            max\n\n");
+        next = rb_first(&lat_snapshot_root);
+        while (next) {
+                struct thread_latency *lat;
+                lat = rb_entry(next, struct thread_latency, node);
+                output_lat_thread(lat);
+                next = rb_next(next);
+        }
+}
 static struct trace_sched_handler *trace_handler;
@@ -1154,6 +1418,8 @@ static const struct option options[] = {
                    "dump raw trace in ASCII"),
        OPT_BOOLEAN('r', "replay", &replay_mode,
                    "replay sched behaviour from traces"),
+        OPT_BOOLEAN('l', "latency", &lat_mode,
+                    "measure various latencies"),
        OPT_BOOLEAN('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
        OPT_END()
@@ -1180,22 +1446,30 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
        if (replay_mode)
                trace_handler = &replay_ops;
-        else /* We may need a default subcommand */
+        else if (lat_mode)
+                trace_handler = &lat_ops;
+        else /* We may need a default subcommand (perf trace?) */
                die("Please select a sub command (-r)\n");
-        calibrate_run_measurement_overhead();
+        if (replay_mode) {
-        calibrate_sleep_measurement_overhead();
+                calibrate_run_measurement_overhead();
+                calibrate_sleep_measurement_overhead();
-        test_calibrations();
+                test_calibrations();
-        parse_trace();
+                parse_trace();
-        print_task_traces();
+                print_task_traces();
-        add_cross_task_wakeups();
+                add_cross_task_wakeups();
-        create_tasks();
+                create_tasks();
-        printf("------------------------------------------------------------\n");
+                printf("------------------------------------------------------------\n");
-        for (i = 0; i < nr_iterations; i++)
+                for (i = 0; i < nr_iterations; i++)
-                run_one_test();
+                        run_one_test();
+        } else if (lat_mode) {
+                setup_pager();
+                __cmd_sched();
+                output_lat_results();
+        }
        return 0;
 }
author	Frederic Weisbecker <fweisbec@gmail.com>	2009-09-12 02:06:14 -0400
committer	Ingo Molnar <mingo@elte.hu>	2009-09-13 04:22:43 -0400
commit	cdce9d738b91e1ec686e4ef6ca46d46e2891e208 (patch)
tree	1fd64c4bef114ce44a3c96ad7183fa200bff3d12 /tools
parent	419ab0d6a959f41ec7fde807fe311aaafb05c3be (diff)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 756fe62eb046..4f9e943181a3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c
@@ -31,6 +31,7 @@ static struct perf_header *header;
31	static u64 sample_type;	31	static u64 sample_type;
32		32
33	static int replay_mode;	33	static int replay_mode;
		34	static int lat_mode;
34		35
35		36
36	/*	37	/*
@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
847	.fork_event = replay_fork_event,	848	.fork_event = replay_fork_event,
848	};	849	};
849		850
		851	#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
		852
		853	enum thread_state {
		854	THREAD_SLEEPING,
		855	THREAD_WAKED_UP,
		856	THREAD_SCHED_IN,
		857	THREAD_IGNORE
		858	};
		859
		860	struct lat_snapshot {
		861	struct list_head list;
		862	enum thread_state state;
		863	u64 wake_up_time;
		864	u64 sched_in_time;
		865	};
		866
		867	struct thread_latency {
		868	struct list_head snapshot_list;
		869	struct thread *thread;
		870	struct rb_node node;
		871	};
		872
		873	static struct rb_root lat_snapshot_root;
		874
		875	static struct thread_latency *
		876	thread_latency_search(struct rb_root root, struct thread thread)
		877	{
		878	struct rb_node *node = root->rb_node;
		879
		880	while (node) {
		881	struct thread_latency *lat;
		882
		883	lat = container_of(node, struct thread_latency, node);
		884	if (thread->pid < lat->thread->pid)
		885	node = node->rb_left;
		886	else if (thread->pid > lat->thread->pid)
		887	node = node->rb_right;
		888	else {
		889	return lat;
		890	}
		891	}
		892	return NULL;
		893	}
		894
		895	static void
		896	__thread_latency_insert(struct rb_root root, struct thread_latency data)
		897	{
		898	struct rb_node *new = &(root->rb_node), parent = NULL;
		899
		900	while (*new) {
		901	struct thread_latency *this;
		902
		903	this = container_of(*new, struct thread_latency, node);
		904	parent = *new;
		905	if (data->thread->pid < this->thread->pid)
		906	new = &((*new)->rb_left);
		907	else if (data->thread->pid > this->thread->pid)
		908	new = &((*new)->rb_right);
		909	else
		910	die("Double thread insertion\n");
		911	}
		912
		913	rb_link_node(&data->node, parent, new);
		914	rb_insert_color(&data->node, root);
		915	}
		916
		917	static void thread_latency_insert(struct thread *thread)
		918	{
		919	struct thread_latency *lat;
		920	lat = calloc(sizeof(*lat), 1);
		921	if (!lat)
		922	die("No memory");
		923
		924	lat->thread = thread;
		925	INIT_LIST_HEAD(&lat->snapshot_list);
		926	__thread_latency_insert(&lat_snapshot_root, lat);
		927	}
		928
		929	static void
		930	latency_fork_event(struct trace_fork_event *fork_event __used,
		931	struct event *event __used,
		932	int cpu __used,
		933	u64 timestamp __used,
		934	struct thread *thread __used)
		935	{
		936	/* should insert the newcomer */
		937	}
		938
		939	static char sched_out_state(struct trace_switch_event *switch_event)
		940	{
		941	const char *str = TASK_STATE_TO_CHAR_STR;
		942
		943	return str[switch_event->prev_state];
		944	}
		945
		946	static void
		947	lat_sched_out(struct thread_latency *lat,
		948	struct trace_switch_event *switch_event)
		949	{
		950	struct lat_snapshot *snapshot;
		951
		952	if (sched_out_state(switch_event) == 'R')
		953	return;
		954
		955	snapshot = calloc(sizeof(*snapshot), 1);
		956	if (!snapshot)
		957	die("Non memory");
		958
		959	list_add_tail(&snapshot->list, &lat->snapshot_list);
		960	}
		961
		962	static void
		963	lat_sched_in(struct thread_latency *lat, u64 timestamp)
		964	{
		965	struct lat_snapshot *snapshot;
		966
		967	if (list_empty(&lat->snapshot_list))
		968	return;
		969
		970	snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
		971	list);
		972
		973	if (snapshot->state != THREAD_WAKED_UP)
		974	return;
		975
		976	if (timestamp < snapshot->wake_up_time) {
		977	snapshot->state = THREAD_IGNORE;
		978	return;
		979	}
		980
		981	snapshot->state = THREAD_SCHED_IN;
		982	snapshot->sched_in_time = timestamp;
		983	}
		984
		985
		986	static void
		987	latency_switch_event(struct trace_switch_event *switch_event,
		988	struct event *event __used,
		989	int cpu __used,
		990	u64 timestamp,
		991	struct thread *thread __used)
		992	{
		993	struct thread_latency out_lat, in_lat;
		994	struct thread sched_out, sched_in;
		995
		996	sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
		997	sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
		998
		999	in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
		1000	if (!in_lat) {
		1001	thread_latency_insert(sched_in);
		1002	in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
		1003	if (!in_lat)
		1004	die("Internal latency tree error");
		1005	}
		1006
		1007	out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
		1008	if (!out_lat) {
		1009	thread_latency_insert(sched_out);
		1010	out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
		1011	if (!out_lat)
		1012	die("Internal latency tree error");
		1013	}
		1014
		1015	lat_sched_in(in_lat, timestamp);
		1016	lat_sched_out(out_lat, switch_event);
		1017	}
		1018
		1019	static void
		1020	latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
		1021	struct event *event __used,
		1022	int cpu __used,
		1023	u64 timestamp,
		1024	struct thread *thread __used)
		1025	{
		1026	struct thread_latency *lat;
		1027	struct lat_snapshot *snapshot;
		1028	struct thread *wakee;
		1029
		1030	/* Note for later, it may be interesting to observe the failing cases */
		1031	if (!wakeup_event->success)
		1032	return;
		1033
		1034	wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
		1035	lat = thread_latency_search(&lat_snapshot_root, wakee);
		1036	if (!lat) {
		1037	thread_latency_insert(wakee);
		1038	return;
		1039	}
		1040
		1041	if (list_empty(&lat->snapshot_list))
		1042	return;
		1043
		1044	snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
		1045	list);
		1046
		1047	if (snapshot->state != THREAD_SLEEPING)
		1048	return;
		1049
		1050	snapshot->state = THREAD_WAKED_UP;
		1051	snapshot->wake_up_time = timestamp;
		1052	}
		1053
		1054	static struct trace_sched_handler lat_ops = {
		1055	.wakeup_event = latency_wakeup_event,
		1056	.switch_event = latency_switch_event,
		1057	.fork_event = latency_fork_event,
		1058	};
		1059
		1060	static void output_lat_thread(struct thread_latency *lat)
		1061	{
		1062	struct lat_snapshot *shot;
		1063	int count = 0;
		1064	int i;
		1065	int ret;
		1066	u64 max = 0, avg;
		1067	u64 total = 0, delta;
		1068
		1069	list_for_each_entry(shot, &lat->snapshot_list, list) {
		1070	if (shot->state != THREAD_SCHED_IN)
		1071	continue;
		1072
		1073	count++;
		1074
		1075	delta = shot->sched_in_time - shot->wake_up_time;
		1076	if (delta > max)
		1077	max = delta;
		1078	total += delta;
		1079	}
		1080
		1081	if (!count)
		1082	return;
		1083
		1084	ret = printf("%s", lat->thread->comm);
		1085
		1086	for (i = 0; i < 25 - ret; i++)
		1087	printf(" ");
		1088
		1089	avg = total / count;
		1090
		1091	printf("%5d %10llu %10llu %10llu\n", count, total, avg, max);
		1092	}
		1093
		1094	static void output_lat_results(void)
		1095	{
		1096	struct rb_node *next;
		1097
		1098	printf(" Tasks");
		1099	printf(" count");
		1100	printf(" total");
		1101	printf(" avg");
		1102	printf(" max\n\n");
		1103
		1104	next = rb_first(&lat_snapshot_root);
		1105
		1106	while (next) {
		1107	struct thread_latency *lat;
		1108
		1109	lat = rb_entry(next, struct thread_latency, node);
		1110	output_lat_thread(lat);
		1111	next = rb_next(next);
		1112	}
		1113	}
850		1114
851	static struct trace_sched_handler *trace_handler;	1115	static struct trace_sched_handler *trace_handler;
852		1116
@@ -1154,6 +1418,8 @@ static const struct option options[] = {
1154	"dump raw trace in ASCII"),	1418	"dump raw trace in ASCII"),
1155	OPT_BOOLEAN('r', "replay", &replay_mode,	1419	OPT_BOOLEAN('r', "replay", &replay_mode,
1156	"replay sched behaviour from traces"),	1420	"replay sched behaviour from traces"),
		1421	OPT_BOOLEAN('l', "latency", &lat_mode,
		1422	"measure various latencies"),
1157	OPT_BOOLEAN('v', "verbose", &verbose,	1423	OPT_BOOLEAN('v', "verbose", &verbose,
1158	"be more verbose (show symbol address, etc)"),	1424	"be more verbose (show symbol address, etc)"),
1159	OPT_END()	1425	OPT_END()
@@ -1180,22 +1446,30 @@ int cmd_sched(int argc, const char *argv, const char prefix __used)
1180		1446
1181	if (replay_mode)	1447	if (replay_mode)
1182	trace_handler = &replay_ops;	1448	trace_handler = &replay_ops;
1183	else /* We may need a default subcommand */	1449	else if (lat_mode)
		1450	trace_handler = &lat_ops;
		1451	else /* We may need a default subcommand (perf trace?) */
1184	die("Please select a sub command (-r)\n");	1452	die("Please select a sub command (-r)\n");
1185		1453
1186	calibrate_run_measurement_overhead();	1454	if (replay_mode) {
1187	calibrate_sleep_measurement_overhead();	1455	calibrate_run_measurement_overhead();
		1456	calibrate_sleep_measurement_overhead();
1188		1457
1189	test_calibrations();	1458	test_calibrations();
1190		1459
1191	parse_trace();	1460	parse_trace();
1192	print_task_traces();	1461	print_task_traces();
1193	add_cross_task_wakeups();	1462	add_cross_task_wakeups();
1194		1463
1195	create_tasks();	1464	create_tasks();
1196	printf("------------------------------------------------------------\n");	1465	printf("------------------------------------------------------------\n");
1197	for (i = 0; i < nr_iterations; i++)	1466	for (i = 0; i < nr_iterations; i++)
1198	run_one_test();	1467	run_one_test();
		1468	} else if (lat_mode) {
		1469	setup_pager();
		1470	__cmd_sched();
		1471	output_lat_results();
		1472	}
1199		1473
1200	return 0;	1474	return 0;
1201	}	1475	}