diff options
author | Mike Galbraith <efault@gmx.de> | 2009-10-10 08:46:04 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-10-12 03:10:55 -0400 |
commit | 55ffb7a6bd45d0083ffb132381cb46964a4afe01 (patch) | |
tree | 89853e721eda384aad61bf71960bfdaea92269f5 /tools/perf/builtin-sched.c | |
parent | 7e4ff9e3e8f88de8a8536f43294cd32b4e7d9123 (diff) |
perf sched: Add -C option to measure on a specific CPU
To refresh, trying to sched record only one CPU results in bogus
latencies as below.
I fixed^Wmade it stop doing the bad thing today, by
following task migration events properly.
Before:
marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10
marge:/root/tmp # perf sched lat
-----------------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------------
Xorg:4943 | 1.290 ms | 1 | avg: 1670.132 ms | max: 1670.132 ms |
hald-addon-stor:3569 | 0.091 ms | 3 | avg: 658.609 ms | max: 1975.797 ms |
hald-addon-stor:3573 | 0.209 ms | 4 | avg: 499.138 ms | max: 1990.565 ms |
audispd:4270 | 0.012 ms | 1 | avg: 0.015 ms | max: 0.015 ms |
....
marge:/root/tmp # perf sched trace|grep 'Xorg:4943'
swapper-0 [000] 401.184013288: sched_stat_runtime: task: Xorg:4943 runtime: 1233188 [ns], vruntime: 19105169779 [ns]
rt2870TimerQHan-4947 [000] 402.854140127: sched_stat_wait: task: Xorg:4943 wait: 580073 [ns]
rt2870TimerQHan-4947 [000] 402.854141770: sched_migrate_task: task Xorg:4943 [140] from: 1 to: 0
rt2870TimerQHan-4947 [000] 402.854143854: sched_stat_wait: task: Xorg:4943 wait: 0 [ns]
rt2870TimerQHan-4947 [000] 402.854145397: sched_switch: task rt2870TimerQHan:4947 [140] (D) ==> Xorg:4943 [140]
Xorg-4943 [000] 402.854193133: sched_stat_runtime: task: Xorg:4943 runtime: 56546 [ns], vruntime: 11766332500 [ns]
Xorg-4943 [000] 402.854196842: sched_switch: task Xorg:4943 [140] (S) ==> swapper:0 [140]
After:
marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10
marge:/root/tmp # perf sched lat
-----------------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------------
amarokapp:11150 | 271.297 ms | 878 | avg: 0.130 ms | max: 1.057 ms |
konsole:5965 | 1.370 ms | 12 | avg: 0.092 ms | max: 0.855 ms |
Xorg:4943 | 179.980 ms | 1109 | avg: 0.087 ms | max: 1.206 ms |
hald-addon-stor:3574 | 0.212 ms | 9 | avg: 0.040 ms | max: 0.169 ms |
hald-addon-stor:3570 | 0.223 ms | 9 | avg: 0.037 ms | max: 0.223 ms |
klauncher:5864 | 0.550 ms | 8 | avg: 0.032 ms | max: 0.048 ms |
The 'Maximum delay ms' results are now sane.
Signed-off-by: Mike Galbraith <efault@gmx.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf/builtin-sched.c')
-rw-r--r-- | tools/perf/builtin-sched.c | 101 |
1 files changed, 100 insertions, 1 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 6b00529ce348..387a44234368 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -33,6 +33,8 @@ static u64 sample_type; | |||
33 | static char default_sort_order[] = "avg, max, switch, runtime"; | 33 | static char default_sort_order[] = "avg, max, switch, runtime"; |
34 | static char *sort_order = default_sort_order; | 34 | static char *sort_order = default_sort_order; |
35 | 35 | ||
36 | static int profile_cpu = -1; | ||
37 | |||
36 | static char *cwd; | 38 | static char *cwd; |
37 | static int cwdlen; | 39 | static int cwdlen; |
38 | 40 | ||
@@ -75,6 +77,7 @@ enum sched_event_type { | |||
75 | SCHED_EVENT_RUN, | 77 | SCHED_EVENT_RUN, |
76 | SCHED_EVENT_SLEEP, | 78 | SCHED_EVENT_SLEEP, |
77 | SCHED_EVENT_WAKEUP, | 79 | SCHED_EVENT_WAKEUP, |
80 | SCHED_EVENT_MIGRATION, | ||
78 | }; | 81 | }; |
79 | 82 | ||
80 | struct sched_atom { | 83 | struct sched_atom { |
@@ -399,6 +402,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) | |||
399 | ret = sem_post(atom->wait_sem); | 402 | ret = sem_post(atom->wait_sem); |
400 | BUG_ON(ret); | 403 | BUG_ON(ret); |
401 | break; | 404 | break; |
405 | case SCHED_EVENT_MIGRATION: | ||
406 | break; | ||
402 | default: | 407 | default: |
403 | BUG_ON(1); | 408 | BUG_ON(1); |
404 | } | 409 | } |
@@ -746,6 +751,22 @@ struct trace_fork_event { | |||
746 | u32 child_pid; | 751 | u32 child_pid; |
747 | }; | 752 | }; |
748 | 753 | ||
754 | struct trace_migrate_task_event { | ||
755 | u32 size; | ||
756 | |||
757 | u16 common_type; | ||
758 | u8 common_flags; | ||
759 | u8 common_preempt_count; | ||
760 | u32 common_pid; | ||
761 | u32 common_tgid; | ||
762 | |||
763 | char comm[16]; | ||
764 | u32 pid; | ||
765 | |||
766 | u32 prio; | ||
767 | u32 cpu; | ||
768 | }; | ||
769 | |||
749 | struct trace_sched_handler { | 770 | struct trace_sched_handler { |
750 | void (*switch_event)(struct trace_switch_event *, | 771 | void (*switch_event)(struct trace_switch_event *, |
751 | struct event *, | 772 | struct event *, |
@@ -770,6 +791,12 @@ struct trace_sched_handler { | |||
770 | int cpu, | 791 | int cpu, |
771 | u64 timestamp, | 792 | u64 timestamp, |
772 | struct thread *thread); | 793 | struct thread *thread); |
794 | |||
795 | void (*migrate_task_event)(struct trace_migrate_task_event *, | ||
796 | struct event *, | ||
797 | int cpu, | ||
798 | u64 timestamp, | ||
799 | struct thread *thread); | ||
773 | }; | 800 | }; |
774 | 801 | ||
775 | 802 | ||
@@ -1140,7 +1167,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1140 | 1167 | ||
1141 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | 1168 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1142 | 1169 | ||
1143 | if (atom->state != THREAD_SLEEPING) | 1170 | /* |
1171 | * You WILL be missing events if you've recorded only | ||
1172 | * one CPU, or are only looking at only one, so don't | ||
1173 | * make useless noise. | ||
1174 | */ | ||
1175 | if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) | ||
1144 | nr_state_machine_bugs++; | 1176 | nr_state_machine_bugs++; |
1145 | 1177 | ||
1146 | nr_timestamps++; | 1178 | nr_timestamps++; |
@@ -1153,11 +1185,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1153 | atom->wake_up_time = timestamp; | 1185 | atom->wake_up_time = timestamp; |
1154 | } | 1186 | } |
1155 | 1187 | ||
1188 | static void | ||
1189 | latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, | ||
1190 | struct event *__event __used, | ||
1191 | int cpu __used, | ||
1192 | u64 timestamp, | ||
1193 | struct thread *thread __used) | ||
1194 | { | ||
1195 | struct work_atoms *atoms; | ||
1196 | struct work_atom *atom; | ||
1197 | struct thread *migrant; | ||
1198 | |||
1199 | /* | ||
1200 | * Only need to worry about migration when profiling one CPU. | ||
1201 | */ | ||
1202 | if (profile_cpu == -1) | ||
1203 | return; | ||
1204 | |||
1205 | migrant = threads__findnew(migrate_task_event->pid, &threads, &last_match); | ||
1206 | atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); | ||
1207 | if (!atoms) { | ||
1208 | thread_atoms_insert(migrant); | ||
1209 | register_pid(migrant->pid, migrant->comm); | ||
1210 | atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); | ||
1211 | if (!atoms) | ||
1212 | die("migration-event: Internal tree error"); | ||
1213 | add_sched_out_event(atoms, 'R', timestamp); | ||
1214 | } | ||
1215 | |||
1216 | BUG_ON(list_empty(&atoms->work_list)); | ||
1217 | |||
1218 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | ||
1219 | atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; | ||
1220 | |||
1221 | nr_timestamps++; | ||
1222 | |||
1223 | if (atom->sched_out_time > timestamp) | ||
1224 | nr_unordered_timestamps++; | ||
1225 | } | ||
1226 | |||
1156 | static struct trace_sched_handler lat_ops = { | 1227 | static struct trace_sched_handler lat_ops = { |
1157 | .wakeup_event = latency_wakeup_event, | 1228 | .wakeup_event = latency_wakeup_event, |
1158 | .switch_event = latency_switch_event, | 1229 | .switch_event = latency_switch_event, |
1159 | .runtime_event = latency_runtime_event, | 1230 | .runtime_event = latency_runtime_event, |
1160 | .fork_event = latency_fork_event, | 1231 | .fork_event = latency_fork_event, |
1232 | .migrate_task_event = latency_migrate_task_event, | ||
1161 | }; | 1233 | }; |
1162 | 1234 | ||
1163 | static void output_lat_thread(struct work_atoms *work_list) | 1235 | static void output_lat_thread(struct work_atoms *work_list) |
@@ -1518,6 +1590,26 @@ process_sched_exit_event(struct event *event, | |||
1518 | } | 1590 | } |
1519 | 1591 | ||
1520 | static void | 1592 | static void |
1593 | process_sched_migrate_task_event(struct raw_event_sample *raw, | ||
1594 | struct event *event, | ||
1595 | int cpu __used, | ||
1596 | u64 timestamp __used, | ||
1597 | struct thread *thread __used) | ||
1598 | { | ||
1599 | struct trace_migrate_task_event migrate_task_event; | ||
1600 | |||
1601 | FILL_COMMON_FIELDS(migrate_task_event, event, raw->data); | ||
1602 | |||
1603 | FILL_ARRAY(migrate_task_event, comm, event, raw->data); | ||
1604 | FILL_FIELD(migrate_task_event, pid, event, raw->data); | ||
1605 | FILL_FIELD(migrate_task_event, prio, event, raw->data); | ||
1606 | FILL_FIELD(migrate_task_event, cpu, event, raw->data); | ||
1607 | |||
1608 | if (trace_handler->migrate_task_event) | ||
1609 | trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); | ||
1610 | } | ||
1611 | |||
1612 | static void | ||
1521 | process_raw_event(event_t *raw_event __used, void *more_data, | 1613 | process_raw_event(event_t *raw_event __used, void *more_data, |
1522 | int cpu, u64 timestamp, struct thread *thread) | 1614 | int cpu, u64 timestamp, struct thread *thread) |
1523 | { | 1615 | { |
@@ -1540,6 +1632,8 @@ process_raw_event(event_t *raw_event __used, void *more_data, | |||
1540 | process_sched_fork_event(raw, event, cpu, timestamp, thread); | 1632 | process_sched_fork_event(raw, event, cpu, timestamp, thread); |
1541 | if (!strcmp(event->name, "sched_process_exit")) | 1633 | if (!strcmp(event->name, "sched_process_exit")) |
1542 | process_sched_exit_event(event, cpu, timestamp, thread); | 1634 | process_sched_exit_event(event, cpu, timestamp, thread); |
1635 | if (!strcmp(event->name, "sched_migrate_task")) | ||
1636 | process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); | ||
1543 | } | 1637 | } |
1544 | 1638 | ||
1545 | static int | 1639 | static int |
@@ -1589,6 +1683,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |||
1589 | return -1; | 1683 | return -1; |
1590 | } | 1684 | } |
1591 | 1685 | ||
1686 | if (profile_cpu != -1 && profile_cpu != (int) cpu) | ||
1687 | return 0; | ||
1688 | |||
1592 | process_raw_event(event, more_data, cpu, timestamp, thread); | 1689 | process_raw_event(event, more_data, cpu, timestamp, thread); |
1593 | 1690 | ||
1594 | return 0; | 1691 | return 0; |
@@ -1771,6 +1868,8 @@ static const struct option latency_options[] = { | |||
1771 | "sort by key(s): runtime, switch, avg, max"), | 1868 | "sort by key(s): runtime, switch, avg, max"), |
1772 | OPT_BOOLEAN('v', "verbose", &verbose, | 1869 | OPT_BOOLEAN('v', "verbose", &verbose, |
1773 | "be more verbose (show symbol address, etc)"), | 1870 | "be more verbose (show symbol address, etc)"), |
1871 | OPT_INTEGER('C', "CPU", &profile_cpu, | ||
1872 | "CPU to profile on"), | ||
1774 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 1873 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1775 | "dump raw trace in ASCII"), | 1874 | "dump raw trace in ASCII"), |
1776 | OPT_END() | 1875 | OPT_END() |