aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorMike Galbraith <efault@gmx.de>2009-10-10 08:46:04 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-12 03:10:55 -0400
commit55ffb7a6bd45d0083ffb132381cb46964a4afe01 (patch)
tree89853e721eda384aad61bf71960bfdaea92269f5 /tools/perf
parent7e4ff9e3e8f88de8a8536f43294cd32b4e7d9123 (diff)
perf sched: Add -C option to measure on a specific CPU
To refresh, trying to sched record only one CPU results in bogus latencies as below. I fixed^Wmade it stop doing the bad thing today, by following task migration events properly. Before: marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10 marge:/root/tmp # perf sched lat ----------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | ----------------------------------------------------------------------------------------- Xorg:4943 | 1.290 ms | 1 | avg: 1670.132 ms | max: 1670.132 ms | hald-addon-stor:3569 | 0.091 ms | 3 | avg: 658.609 ms | max: 1975.797 ms | hald-addon-stor:3573 | 0.209 ms | 4 | avg: 499.138 ms | max: 1990.565 ms | audispd:4270 | 0.012 ms | 1 | avg: 0.015 ms | max: 0.015 ms | .... marge:/root/tmp # perf sched trace|grep 'Xorg:4943' swapper-0 [000] 401.184013288: sched_stat_runtime: task: Xorg:4943 runtime: 1233188 [ns], vruntime: 19105169779 [ns] rt2870TimerQHan-4947 [000] 402.854140127: sched_stat_wait: task: Xorg:4943 wait: 580073 [ns] rt2870TimerQHan-4947 [000] 402.854141770: sched_migrate_task: task Xorg:4943 [140] from: 1 to: 0 rt2870TimerQHan-4947 [000] 402.854143854: sched_stat_wait: task: Xorg:4943 wait: 0 [ns] rt2870TimerQHan-4947 [000] 402.854145397: sched_switch: task rt2870TimerQHan:4947 [140] (D) ==> Xorg:4943 [140] Xorg-4943 [000] 402.854193133: sched_stat_runtime: task: Xorg:4943 runtime: 56546 [ns], vruntime: 11766332500 [ns] Xorg-4943 [000] 402.854196842: sched_switch: task Xorg:4943 [140] (S) ==> swapper:0 [140] After: marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10 marge:/root/tmp # perf sched lat ----------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | ----------------------------------------------------------------------------------------- amarokapp:11150 | 271.297 ms | 878 | avg: 0.130 ms | max: 1.057 ms | konsole:5965 | 1.370 ms | 12 | avg: 0.092 ms | max: 0.855 ms | Xorg:4943 | 179.980 ms | 1109 | avg: 0.087 ms | max: 1.206 ms | hald-addon-stor:3574 | 0.212 ms | 9 | avg: 0.040 ms | max: 0.169 ms | hald-addon-stor:3570 | 0.223 ms | 9 | avg: 0.037 ms | max: 0.223 ms | klauncher:5864 | 0.550 ms | 8 | avg: 0.032 ms | max: 0.048 ms | The 'Maximum delay ms' results are now sane. Signed-off-by: Mike Galbraith <efault@gmx.de> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/builtin-sched.c101
1 files changed, 100 insertions, 1 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 6b00529ce348..387a44234368 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -33,6 +33,8 @@ static u64 sample_type;
33static char default_sort_order[] = "avg, max, switch, runtime"; 33static char default_sort_order[] = "avg, max, switch, runtime";
34static char *sort_order = default_sort_order; 34static char *sort_order = default_sort_order;
35 35
36static int profile_cpu = -1;
37
36static char *cwd; 38static char *cwd;
37static int cwdlen; 39static int cwdlen;
38 40
@@ -75,6 +77,7 @@ enum sched_event_type {
75 SCHED_EVENT_RUN, 77 SCHED_EVENT_RUN,
76 SCHED_EVENT_SLEEP, 78 SCHED_EVENT_SLEEP,
77 SCHED_EVENT_WAKEUP, 79 SCHED_EVENT_WAKEUP,
80 SCHED_EVENT_MIGRATION,
78}; 81};
79 82
80struct sched_atom { 83struct sched_atom {
@@ -399,6 +402,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
399 ret = sem_post(atom->wait_sem); 402 ret = sem_post(atom->wait_sem);
400 BUG_ON(ret); 403 BUG_ON(ret);
401 break; 404 break;
405 case SCHED_EVENT_MIGRATION:
406 break;
402 default: 407 default:
403 BUG_ON(1); 408 BUG_ON(1);
404 } 409 }
@@ -746,6 +751,22 @@ struct trace_fork_event {
746 u32 child_pid; 751 u32 child_pid;
747}; 752};
748 753
754struct trace_migrate_task_event {
755 u32 size;
756
757 u16 common_type;
758 u8 common_flags;
759 u8 common_preempt_count;
760 u32 common_pid;
761 u32 common_tgid;
762
763 char comm[16];
764 u32 pid;
765
766 u32 prio;
767 u32 cpu;
768};
769
749struct trace_sched_handler { 770struct trace_sched_handler {
750 void (*switch_event)(struct trace_switch_event *, 771 void (*switch_event)(struct trace_switch_event *,
751 struct event *, 772 struct event *,
@@ -770,6 +791,12 @@ struct trace_sched_handler {
770 int cpu, 791 int cpu,
771 u64 timestamp, 792 u64 timestamp,
772 struct thread *thread); 793 struct thread *thread);
794
795 void (*migrate_task_event)(struct trace_migrate_task_event *,
796 struct event *,
797 int cpu,
798 u64 timestamp,
799 struct thread *thread);
773}; 800};
774 801
775 802
@@ -1140,7 +1167,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1140 1167
1141 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1168 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1142 1169
1143 if (atom->state != THREAD_SLEEPING) 1170 /*
1171 * You WILL be missing events if you've recorded only
1172 * one CPU, or are only looking at only one, so don't
1173 * make useless noise.
1174 */
1175 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1144 nr_state_machine_bugs++; 1176 nr_state_machine_bugs++;
1145 1177
1146 nr_timestamps++; 1178 nr_timestamps++;
@@ -1153,11 +1185,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1153 atom->wake_up_time = timestamp; 1185 atom->wake_up_time = timestamp;
1154} 1186}
1155 1187
1188static void
1189latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1190 struct event *__event __used,
1191 int cpu __used,
1192 u64 timestamp,
1193 struct thread *thread __used)
1194{
1195 struct work_atoms *atoms;
1196 struct work_atom *atom;
1197 struct thread *migrant;
1198
1199 /*
1200 * Only need to worry about migration when profiling one CPU.
1201 */
1202 if (profile_cpu == -1)
1203 return;
1204
1205 migrant = threads__findnew(migrate_task_event->pid, &threads, &last_match);
1206 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1207 if (!atoms) {
1208 thread_atoms_insert(migrant);
1209 register_pid(migrant->pid, migrant->comm);
1210 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1211 if (!atoms)
1212 die("migration-event: Internal tree error");
1213 add_sched_out_event(atoms, 'R', timestamp);
1214 }
1215
1216 BUG_ON(list_empty(&atoms->work_list));
1217
1218 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1219 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
1220
1221 nr_timestamps++;
1222
1223 if (atom->sched_out_time > timestamp)
1224 nr_unordered_timestamps++;
1225}
1226
1156static struct trace_sched_handler lat_ops = { 1227static struct trace_sched_handler lat_ops = {
1157 .wakeup_event = latency_wakeup_event, 1228 .wakeup_event = latency_wakeup_event,
1158 .switch_event = latency_switch_event, 1229 .switch_event = latency_switch_event,
1159 .runtime_event = latency_runtime_event, 1230 .runtime_event = latency_runtime_event,
1160 .fork_event = latency_fork_event, 1231 .fork_event = latency_fork_event,
1232 .migrate_task_event = latency_migrate_task_event,
1161}; 1233};
1162 1234
1163static void output_lat_thread(struct work_atoms *work_list) 1235static void output_lat_thread(struct work_atoms *work_list)
@@ -1518,6 +1590,26 @@ process_sched_exit_event(struct event *event,
1518} 1590}
1519 1591
1520static void 1592static void
1593process_sched_migrate_task_event(struct raw_event_sample *raw,
1594 struct event *event,
1595 int cpu __used,
1596 u64 timestamp __used,
1597 struct thread *thread __used)
1598{
1599 struct trace_migrate_task_event migrate_task_event;
1600
1601 FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
1602
1603 FILL_ARRAY(migrate_task_event, comm, event, raw->data);
1604 FILL_FIELD(migrate_task_event, pid, event, raw->data);
1605 FILL_FIELD(migrate_task_event, prio, event, raw->data);
1606 FILL_FIELD(migrate_task_event, cpu, event, raw->data);
1607
1608 if (trace_handler->migrate_task_event)
1609 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
1610}
1611
1612static void
1521process_raw_event(event_t *raw_event __used, void *more_data, 1613process_raw_event(event_t *raw_event __used, void *more_data,
1522 int cpu, u64 timestamp, struct thread *thread) 1614 int cpu, u64 timestamp, struct thread *thread)
1523{ 1615{
@@ -1540,6 +1632,8 @@ process_raw_event(event_t *raw_event __used, void *more_data,
1540 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1632 process_sched_fork_event(raw, event, cpu, timestamp, thread);
1541 if (!strcmp(event->name, "sched_process_exit")) 1633 if (!strcmp(event->name, "sched_process_exit"))
1542 process_sched_exit_event(event, cpu, timestamp, thread); 1634 process_sched_exit_event(event, cpu, timestamp, thread);
1635 if (!strcmp(event->name, "sched_migrate_task"))
1636 process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
1543} 1637}
1544 1638
1545static int 1639static int
@@ -1589,6 +1683,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1589 return -1; 1683 return -1;
1590 } 1684 }
1591 1685
1686 if (profile_cpu != -1 && profile_cpu != (int) cpu)
1687 return 0;
1688
1592 process_raw_event(event, more_data, cpu, timestamp, thread); 1689 process_raw_event(event, more_data, cpu, timestamp, thread);
1593 1690
1594 return 0; 1691 return 0;
@@ -1771,6 +1868,8 @@ static const struct option latency_options[] = {
1771 "sort by key(s): runtime, switch, avg, max"), 1868 "sort by key(s): runtime, switch, avg, max"),
1772 OPT_BOOLEAN('v', "verbose", &verbose, 1869 OPT_BOOLEAN('v', "verbose", &verbose,
1773 "be more verbose (show symbol address, etc)"), 1870 "be more verbose (show symbol address, etc)"),
1871 OPT_INTEGER('C', "CPU", &profile_cpu,
1872 "CPU to profile on"),
1774 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1873 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1775 "dump raw trace in ASCII"), 1874 "dump raw trace in ASCII"),
1776 OPT_END() 1875 OPT_END()