diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2012-09-11 16:29:27 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2012-09-11 16:29:27 -0400 |
commit | 0e9b07e574e544c1e840c59dabf39fef120620ae (patch) | |
tree | af0a102da556de851c1d99626591a14372fd538b /tools/perf | |
parent | 4218e6734197f3842fc9b6362f12973918d913aa (diff) |
perf sched: Use perf_tool as ancestor
So that we can remove all the globals.
Before:
text data bss dec hex filename
1586833 110368 1438600 3135801 2fd939 /tmp/oldperf
After:
text data bss dec hex filename
1629329 93568 848328 2571225 273bd9 /root/bin/perf
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-oph40vikij0crjz4eyapneov@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-sched.c | 1136 |
1 files changed, 562 insertions, 574 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index af11b1aa1bd7..79f88fa3f7a3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -23,26 +23,12 @@ | |||
23 | #include <pthread.h> | 23 | #include <pthread.h> |
24 | #include <math.h> | 24 | #include <math.h> |
25 | 25 | ||
26 | static const char *input_name; | ||
27 | |||
28 | static char default_sort_order[] = "avg, max, switch, runtime"; | ||
29 | static const char *sort_order = default_sort_order; | ||
30 | |||
31 | static int profile_cpu = -1; | ||
32 | |||
33 | #define PR_SET_NAME 15 /* Set process name */ | 26 | #define PR_SET_NAME 15 /* Set process name */ |
34 | #define MAX_CPUS 4096 | 27 | #define MAX_CPUS 4096 |
35 | |||
36 | static u64 run_measurement_overhead; | ||
37 | static u64 sleep_measurement_overhead; | ||
38 | |||
39 | #define COMM_LEN 20 | 28 | #define COMM_LEN 20 |
40 | #define SYM_LEN 129 | 29 | #define SYM_LEN 129 |
41 | |||
42 | #define MAX_PID 65536 | 30 | #define MAX_PID 65536 |
43 | 31 | ||
44 | static unsigned long nr_tasks; | ||
45 | |||
46 | struct sched_atom; | 32 | struct sched_atom; |
47 | 33 | ||
48 | struct task_desc { | 34 | struct task_desc { |
@@ -80,44 +66,6 @@ struct sched_atom { | |||
80 | struct task_desc *wakee; | 66 | struct task_desc *wakee; |
81 | }; | 67 | }; |
82 | 68 | ||
83 | static struct task_desc *pid_to_task[MAX_PID]; | ||
84 | |||
85 | static struct task_desc **tasks; | ||
86 | |||
87 | static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; | ||
88 | static u64 start_time; | ||
89 | |||
90 | static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; | ||
91 | |||
92 | static unsigned long nr_run_events; | ||
93 | static unsigned long nr_sleep_events; | ||
94 | static unsigned long nr_wakeup_events; | ||
95 | |||
96 | static unsigned long nr_sleep_corrections; | ||
97 | static unsigned long nr_run_events_optimized; | ||
98 | |||
99 | static unsigned long targetless_wakeups; | ||
100 | static unsigned long multitarget_wakeups; | ||
101 | |||
102 | static u64 cpu_usage; | ||
103 | static u64 runavg_cpu_usage; | ||
104 | static u64 parent_cpu_usage; | ||
105 | static u64 runavg_parent_cpu_usage; | ||
106 | |||
107 | static unsigned long nr_runs; | ||
108 | static u64 sum_runtime; | ||
109 | static u64 sum_fluct; | ||
110 | static u64 run_avg; | ||
111 | |||
112 | static unsigned int replay_repeat = 10; | ||
113 | static unsigned long nr_timestamps; | ||
114 | static unsigned long nr_unordered_timestamps; | ||
115 | static unsigned long nr_state_machine_bugs; | ||
116 | static unsigned long nr_context_switch_bugs; | ||
117 | static unsigned long nr_events; | ||
118 | static unsigned long nr_lost_chunks; | ||
119 | static unsigned long nr_lost_events; | ||
120 | |||
121 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" | 69 | #define TASK_STATE_TO_CHAR_STR "RSDTtZX" |
122 | 70 | ||
123 | enum thread_state { | 71 | enum thread_state { |
@@ -149,11 +97,169 @@ struct work_atoms { | |||
149 | 97 | ||
150 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); | 98 | typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); |
151 | 99 | ||
152 | static struct rb_root atom_root, sorted_atom_root; | 100 | struct trace_switch_event { |
101 | u32 size; | ||
102 | |||
103 | u16 common_type; | ||
104 | u8 common_flags; | ||
105 | u8 common_preempt_count; | ||
106 | u32 common_pid; | ||
107 | u32 common_tgid; | ||
108 | |||
109 | char prev_comm[16]; | ||
110 | u32 prev_pid; | ||
111 | u32 prev_prio; | ||
112 | u64 prev_state; | ||
113 | char next_comm[16]; | ||
114 | u32 next_pid; | ||
115 | u32 next_prio; | ||
116 | }; | ||
117 | |||
118 | struct trace_runtime_event { | ||
119 | u32 size; | ||
120 | |||
121 | u16 common_type; | ||
122 | u8 common_flags; | ||
123 | u8 common_preempt_count; | ||
124 | u32 common_pid; | ||
125 | u32 common_tgid; | ||
126 | |||
127 | char comm[16]; | ||
128 | u32 pid; | ||
129 | u64 runtime; | ||
130 | u64 vruntime; | ||
131 | }; | ||
132 | |||
133 | struct trace_wakeup_event { | ||
134 | u32 size; | ||
153 | 135 | ||
154 | static u64 all_runtime; | 136 | u16 common_type; |
155 | static u64 all_count; | 137 | u8 common_flags; |
138 | u8 common_preempt_count; | ||
139 | u32 common_pid; | ||
140 | u32 common_tgid; | ||
156 | 141 | ||
142 | char comm[16]; | ||
143 | u32 pid; | ||
144 | |||
145 | u32 prio; | ||
146 | u32 success; | ||
147 | u32 cpu; | ||
148 | }; | ||
149 | |||
150 | struct trace_fork_event { | ||
151 | u32 size; | ||
152 | |||
153 | u16 common_type; | ||
154 | u8 common_flags; | ||
155 | u8 common_preempt_count; | ||
156 | u32 common_pid; | ||
157 | u32 common_tgid; | ||
158 | |||
159 | char parent_comm[16]; | ||
160 | u32 parent_pid; | ||
161 | char child_comm[16]; | ||
162 | u32 child_pid; | ||
163 | }; | ||
164 | |||
165 | struct trace_migrate_task_event { | ||
166 | u32 size; | ||
167 | |||
168 | u16 common_type; | ||
169 | u8 common_flags; | ||
170 | u8 common_preempt_count; | ||
171 | u32 common_pid; | ||
172 | u32 common_tgid; | ||
173 | |||
174 | char comm[16]; | ||
175 | u32 pid; | ||
176 | |||
177 | u32 prio; | ||
178 | u32 cpu; | ||
179 | }; | ||
180 | |||
181 | struct perf_sched; | ||
182 | |||
183 | struct trace_sched_handler { | ||
184 | int (*switch_event)(struct perf_sched *sched, | ||
185 | struct trace_switch_event *event, | ||
186 | struct machine *machine, | ||
187 | struct event_format *tp_format, | ||
188 | struct perf_sample *sample); | ||
189 | |||
190 | int (*runtime_event)(struct perf_sched *sched, | ||
191 | struct trace_runtime_event *event, | ||
192 | struct machine *machine, | ||
193 | struct perf_sample *sample); | ||
194 | |||
195 | int (*wakeup_event)(struct perf_sched *sched, | ||
196 | struct trace_wakeup_event *event, | ||
197 | struct machine *machine, | ||
198 | struct event_format *tp_format, | ||
199 | struct perf_sample *sample); | ||
200 | |||
201 | int (*fork_event)(struct perf_sched *sched, | ||
202 | struct trace_fork_event *event, | ||
203 | struct event_format *tp_format); | ||
204 | |||
205 | int (*migrate_task_event)(struct perf_sched *sched, | ||
206 | struct trace_migrate_task_event *event, | ||
207 | struct machine *machine, | ||
208 | struct perf_sample *sample); | ||
209 | }; | ||
210 | |||
211 | struct perf_sched { | ||
212 | struct perf_tool tool; | ||
213 | const char *input_name; | ||
214 | const char *sort_order; | ||
215 | unsigned long nr_tasks; | ||
216 | struct task_desc *pid_to_task[MAX_PID]; | ||
217 | struct task_desc **tasks; | ||
218 | const struct trace_sched_handler *tp_handler; | ||
219 | pthread_mutex_t start_work_mutex; | ||
220 | pthread_mutex_t work_done_wait_mutex; | ||
221 | int profile_cpu; | ||
222 | /* | ||
223 | * Track the current task - that way we can know whether there's any | ||
224 | * weird events, such as a task being switched away that is not current. | ||
225 | */ | ||
226 | int max_cpu; | ||
227 | u32 curr_pid[MAX_CPUS]; | ||
228 | struct thread *curr_thread[MAX_CPUS]; | ||
229 | char next_shortname1; | ||
230 | char next_shortname2; | ||
231 | unsigned int replay_repeat; | ||
232 | unsigned long nr_run_events; | ||
233 | unsigned long nr_sleep_events; | ||
234 | unsigned long nr_wakeup_events; | ||
235 | unsigned long nr_sleep_corrections; | ||
236 | unsigned long nr_run_events_optimized; | ||
237 | unsigned long targetless_wakeups; | ||
238 | unsigned long multitarget_wakeups; | ||
239 | unsigned long nr_runs; | ||
240 | unsigned long nr_timestamps; | ||
241 | unsigned long nr_unordered_timestamps; | ||
242 | unsigned long nr_state_machine_bugs; | ||
243 | unsigned long nr_context_switch_bugs; | ||
244 | unsigned long nr_events; | ||
245 | unsigned long nr_lost_chunks; | ||
246 | unsigned long nr_lost_events; | ||
247 | u64 run_measurement_overhead; | ||
248 | u64 sleep_measurement_overhead; | ||
249 | u64 start_time; | ||
250 | u64 cpu_usage; | ||
251 | u64 runavg_cpu_usage; | ||
252 | u64 parent_cpu_usage; | ||
253 | u64 runavg_parent_cpu_usage; | ||
254 | u64 sum_runtime; | ||
255 | u64 sum_fluct; | ||
256 | u64 run_avg; | ||
257 | u64 all_runtime; | ||
258 | u64 all_count; | ||
259 | u64 cpu_last_switched[MAX_CPUS]; | ||
260 | struct rb_root atom_root, sorted_atom_root; | ||
261 | struct list_head sort_list, cmp_pid; | ||
262 | }; | ||
157 | 263 | ||
158 | static u64 get_nsecs(void) | 264 | static u64 get_nsecs(void) |
159 | { | 265 | { |
@@ -164,13 +270,13 @@ static u64 get_nsecs(void) | |||
164 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; | 270 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
165 | } | 271 | } |
166 | 272 | ||
167 | static void burn_nsecs(u64 nsecs) | 273 | static void burn_nsecs(struct perf_sched *sched, u64 nsecs) |
168 | { | 274 | { |
169 | u64 T0 = get_nsecs(), T1; | 275 | u64 T0 = get_nsecs(), T1; |
170 | 276 | ||
171 | do { | 277 | do { |
172 | T1 = get_nsecs(); | 278 | T1 = get_nsecs(); |
173 | } while (T1 + run_measurement_overhead < T0 + nsecs); | 279 | } while (T1 + sched->run_measurement_overhead < T0 + nsecs); |
174 | } | 280 | } |
175 | 281 | ||
176 | static void sleep_nsecs(u64 nsecs) | 282 | static void sleep_nsecs(u64 nsecs) |
@@ -183,24 +289,24 @@ static void sleep_nsecs(u64 nsecs) | |||
183 | nanosleep(&ts, NULL); | 289 | nanosleep(&ts, NULL); |
184 | } | 290 | } |
185 | 291 | ||
186 | static void calibrate_run_measurement_overhead(void) | 292 | static void calibrate_run_measurement_overhead(struct perf_sched *sched) |
187 | { | 293 | { |
188 | u64 T0, T1, delta, min_delta = 1000000000ULL; | 294 | u64 T0, T1, delta, min_delta = 1000000000ULL; |
189 | int i; | 295 | int i; |
190 | 296 | ||
191 | for (i = 0; i < 10; i++) { | 297 | for (i = 0; i < 10; i++) { |
192 | T0 = get_nsecs(); | 298 | T0 = get_nsecs(); |
193 | burn_nsecs(0); | 299 | burn_nsecs(sched, 0); |
194 | T1 = get_nsecs(); | 300 | T1 = get_nsecs(); |
195 | delta = T1-T0; | 301 | delta = T1-T0; |
196 | min_delta = min(min_delta, delta); | 302 | min_delta = min(min_delta, delta); |
197 | } | 303 | } |
198 | run_measurement_overhead = min_delta; | 304 | sched->run_measurement_overhead = min_delta; |
199 | 305 | ||
200 | printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); | 306 | printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); |
201 | } | 307 | } |
202 | 308 | ||
203 | static void calibrate_sleep_measurement_overhead(void) | 309 | static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) |
204 | { | 310 | { |
205 | u64 T0, T1, delta, min_delta = 1000000000ULL; | 311 | u64 T0, T1, delta, min_delta = 1000000000ULL; |
206 | int i; | 312 | int i; |
@@ -213,7 +319,7 @@ static void calibrate_sleep_measurement_overhead(void) | |||
213 | min_delta = min(min_delta, delta); | 319 | min_delta = min(min_delta, delta); |
214 | } | 320 | } |
215 | min_delta -= 10000; | 321 | min_delta -= 10000; |
216 | sleep_measurement_overhead = min_delta; | 322 | sched->sleep_measurement_overhead = min_delta; |
217 | 323 | ||
218 | printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); | 324 | printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); |
219 | } | 325 | } |
@@ -246,8 +352,8 @@ static struct sched_atom *last_event(struct task_desc *task) | |||
246 | return task->atoms[task->nr_events - 1]; | 352 | return task->atoms[task->nr_events - 1]; |
247 | } | 353 | } |
248 | 354 | ||
249 | static void | 355 | static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, |
250 | add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | 356 | u64 timestamp, u64 duration) |
251 | { | 357 | { |
252 | struct sched_atom *event, *curr_event = last_event(task); | 358 | struct sched_atom *event, *curr_event = last_event(task); |
253 | 359 | ||
@@ -256,7 +362,7 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | |||
256 | * to it: | 362 | * to it: |
257 | */ | 363 | */ |
258 | if (curr_event && curr_event->type == SCHED_EVENT_RUN) { | 364 | if (curr_event && curr_event->type == SCHED_EVENT_RUN) { |
259 | nr_run_events_optimized++; | 365 | sched->nr_run_events_optimized++; |
260 | curr_event->duration += duration; | 366 | curr_event->duration += duration; |
261 | return; | 367 | return; |
262 | } | 368 | } |
@@ -266,12 +372,11 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) | |||
266 | event->type = SCHED_EVENT_RUN; | 372 | event->type = SCHED_EVENT_RUN; |
267 | event->duration = duration; | 373 | event->duration = duration; |
268 | 374 | ||
269 | nr_run_events++; | 375 | sched->nr_run_events++; |
270 | } | 376 | } |
271 | 377 | ||
272 | static void | 378 | static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, |
273 | add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | 379 | u64 timestamp, struct task_desc *wakee) |
274 | struct task_desc *wakee) | ||
275 | { | 380 | { |
276 | struct sched_atom *event, *wakee_event; | 381 | struct sched_atom *event, *wakee_event; |
277 | 382 | ||
@@ -281,11 +386,11 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | |||
281 | 386 | ||
282 | wakee_event = last_event(wakee); | 387 | wakee_event = last_event(wakee); |
283 | if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { | 388 | if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { |
284 | targetless_wakeups++; | 389 | sched->targetless_wakeups++; |
285 | return; | 390 | return; |
286 | } | 391 | } |
287 | if (wakee_event->wait_sem) { | 392 | if (wakee_event->wait_sem) { |
288 | multitarget_wakeups++; | 393 | sched->multitarget_wakeups++; |
289 | return; | 394 | return; |
290 | } | 395 | } |
291 | 396 | ||
@@ -294,89 +399,89 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, | |||
294 | wakee_event->specific_wait = 1; | 399 | wakee_event->specific_wait = 1; |
295 | event->wait_sem = wakee_event->wait_sem; | 400 | event->wait_sem = wakee_event->wait_sem; |
296 | 401 | ||
297 | nr_wakeup_events++; | 402 | sched->nr_wakeup_events++; |
298 | } | 403 | } |
299 | 404 | ||
300 | static void | 405 | static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, |
301 | add_sched_event_sleep(struct task_desc *task, u64 timestamp, | 406 | u64 timestamp, u64 task_state __maybe_unused) |
302 | u64 task_state __maybe_unused) | ||
303 | { | 407 | { |
304 | struct sched_atom *event = get_new_event(task, timestamp); | 408 | struct sched_atom *event = get_new_event(task, timestamp); |
305 | 409 | ||
306 | event->type = SCHED_EVENT_SLEEP; | 410 | event->type = SCHED_EVENT_SLEEP; |
307 | 411 | ||
308 | nr_sleep_events++; | 412 | sched->nr_sleep_events++; |
309 | } | 413 | } |
310 | 414 | ||
311 | static struct task_desc *register_pid(unsigned long pid, const char *comm) | 415 | static struct task_desc *register_pid(struct perf_sched *sched, |
416 | unsigned long pid, const char *comm) | ||
312 | { | 417 | { |
313 | struct task_desc *task; | 418 | struct task_desc *task; |
314 | 419 | ||
315 | BUG_ON(pid >= MAX_PID); | 420 | BUG_ON(pid >= MAX_PID); |
316 | 421 | ||
317 | task = pid_to_task[pid]; | 422 | task = sched->pid_to_task[pid]; |
318 | 423 | ||
319 | if (task) | 424 | if (task) |
320 | return task; | 425 | return task; |
321 | 426 | ||
322 | task = zalloc(sizeof(*task)); | 427 | task = zalloc(sizeof(*task)); |
323 | task->pid = pid; | 428 | task->pid = pid; |
324 | task->nr = nr_tasks; | 429 | task->nr = sched->nr_tasks; |
325 | strcpy(task->comm, comm); | 430 | strcpy(task->comm, comm); |
326 | /* | 431 | /* |
327 | * every task starts in sleeping state - this gets ignored | 432 | * every task starts in sleeping state - this gets ignored |
328 | * if there's no wakeup pointing to this sleep state: | 433 | * if there's no wakeup pointing to this sleep state: |
329 | */ | 434 | */ |
330 | add_sched_event_sleep(task, 0, 0); | 435 | add_sched_event_sleep(sched, task, 0, 0); |
331 | 436 | ||
332 | pid_to_task[pid] = task; | 437 | sched->pid_to_task[pid] = task; |
333 | nr_tasks++; | 438 | sched->nr_tasks++; |
334 | tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); | 439 | sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); |
335 | BUG_ON(!tasks); | 440 | BUG_ON(!sched->tasks); |
336 | tasks[task->nr] = task; | 441 | sched->tasks[task->nr] = task; |
337 | 442 | ||
338 | if (verbose) | 443 | if (verbose) |
339 | printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); | 444 | printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); |
340 | 445 | ||
341 | return task; | 446 | return task; |
342 | } | 447 | } |
343 | 448 | ||
344 | 449 | ||
345 | static void print_task_traces(void) | 450 | static void print_task_traces(struct perf_sched *sched) |
346 | { | 451 | { |
347 | struct task_desc *task; | 452 | struct task_desc *task; |
348 | unsigned long i; | 453 | unsigned long i; |
349 | 454 | ||
350 | for (i = 0; i < nr_tasks; i++) { | 455 | for (i = 0; i < sched->nr_tasks; i++) { |
351 | task = tasks[i]; | 456 | task = sched->tasks[i]; |
352 | printf("task %6ld (%20s:%10ld), nr_events: %ld\n", | 457 | printf("task %6ld (%20s:%10ld), nr_events: %ld\n", |
353 | task->nr, task->comm, task->pid, task->nr_events); | 458 | task->nr, task->comm, task->pid, task->nr_events); |
354 | } | 459 | } |
355 | } | 460 | } |
356 | 461 | ||
357 | static void add_cross_task_wakeups(void) | 462 | static void add_cross_task_wakeups(struct perf_sched *sched) |
358 | { | 463 | { |
359 | struct task_desc *task1, *task2; | 464 | struct task_desc *task1, *task2; |
360 | unsigned long i, j; | 465 | unsigned long i, j; |
361 | 466 | ||
362 | for (i = 0; i < nr_tasks; i++) { | 467 | for (i = 0; i < sched->nr_tasks; i++) { |
363 | task1 = tasks[i]; | 468 | task1 = sched->tasks[i]; |
364 | j = i + 1; | 469 | j = i + 1; |
365 | if (j == nr_tasks) | 470 | if (j == sched->nr_tasks) |
366 | j = 0; | 471 | j = 0; |
367 | task2 = tasks[j]; | 472 | task2 = sched->tasks[j]; |
368 | add_sched_event_wakeup(task1, 0, task2); | 473 | add_sched_event_wakeup(sched, task1, 0, task2); |
369 | } | 474 | } |
370 | } | 475 | } |
371 | 476 | ||
372 | static void process_sched_event(struct task_desc *this_task __maybe_unused, | 477 | static void perf_sched__process_event(struct perf_sched *sched, |
373 | struct sched_atom *atom) | 478 | struct sched_atom *atom) |
374 | { | 479 | { |
375 | int ret = 0; | 480 | int ret = 0; |
376 | 481 | ||
377 | switch (atom->type) { | 482 | switch (atom->type) { |
378 | case SCHED_EVENT_RUN: | 483 | case SCHED_EVENT_RUN: |
379 | burn_nsecs(atom->duration); | 484 | burn_nsecs(sched, atom->duration); |
380 | break; | 485 | break; |
381 | case SCHED_EVENT_SLEEP: | 486 | case SCHED_EVENT_SLEEP: |
382 | if (atom->wait_sem) | 487 | if (atom->wait_sem) |
@@ -439,14 +544,23 @@ static u64 get_cpu_usage_nsec_self(int fd) | |||
439 | return runtime; | 544 | return runtime; |
440 | } | 545 | } |
441 | 546 | ||
547 | struct sched_thread_parms { | ||
548 | struct task_desc *task; | ||
549 | struct perf_sched *sched; | ||
550 | }; | ||
551 | |||
442 | static void *thread_func(void *ctx) | 552 | static void *thread_func(void *ctx) |
443 | { | 553 | { |
444 | struct task_desc *this_task = ctx; | 554 | struct sched_thread_parms *parms = ctx; |
555 | struct task_desc *this_task = parms->task; | ||
556 | struct perf_sched *sched = parms->sched; | ||
445 | u64 cpu_usage_0, cpu_usage_1; | 557 | u64 cpu_usage_0, cpu_usage_1; |
446 | unsigned long i, ret; | 558 | unsigned long i, ret; |
447 | char comm2[22]; | 559 | char comm2[22]; |
448 | int fd; | 560 | int fd; |
449 | 561 | ||
562 | free(parms); | ||
563 | |||
450 | sprintf(comm2, ":%s", this_task->comm); | 564 | sprintf(comm2, ":%s", this_task->comm); |
451 | prctl(PR_SET_NAME, comm2); | 565 | prctl(PR_SET_NAME, comm2); |
452 | fd = self_open_counters(); | 566 | fd = self_open_counters(); |
@@ -455,16 +569,16 @@ static void *thread_func(void *ctx) | |||
455 | again: | 569 | again: |
456 | ret = sem_post(&this_task->ready_for_work); | 570 | ret = sem_post(&this_task->ready_for_work); |
457 | BUG_ON(ret); | 571 | BUG_ON(ret); |
458 | ret = pthread_mutex_lock(&start_work_mutex); | 572 | ret = pthread_mutex_lock(&sched->start_work_mutex); |
459 | BUG_ON(ret); | 573 | BUG_ON(ret); |
460 | ret = pthread_mutex_unlock(&start_work_mutex); | 574 | ret = pthread_mutex_unlock(&sched->start_work_mutex); |
461 | BUG_ON(ret); | 575 | BUG_ON(ret); |
462 | 576 | ||
463 | cpu_usage_0 = get_cpu_usage_nsec_self(fd); | 577 | cpu_usage_0 = get_cpu_usage_nsec_self(fd); |
464 | 578 | ||
465 | for (i = 0; i < this_task->nr_events; i++) { | 579 | for (i = 0; i < this_task->nr_events; i++) { |
466 | this_task->curr_event = i; | 580 | this_task->curr_event = i; |
467 | process_sched_event(this_task, this_task->atoms[i]); | 581 | perf_sched__process_event(sched, this_task->atoms[i]); |
468 | } | 582 | } |
469 | 583 | ||
470 | cpu_usage_1 = get_cpu_usage_nsec_self(fd); | 584 | cpu_usage_1 = get_cpu_usage_nsec_self(fd); |
@@ -472,15 +586,15 @@ again: | |||
472 | ret = sem_post(&this_task->work_done_sem); | 586 | ret = sem_post(&this_task->work_done_sem); |
473 | BUG_ON(ret); | 587 | BUG_ON(ret); |
474 | 588 | ||
475 | ret = pthread_mutex_lock(&work_done_wait_mutex); | 589 | ret = pthread_mutex_lock(&sched->work_done_wait_mutex); |
476 | BUG_ON(ret); | 590 | BUG_ON(ret); |
477 | ret = pthread_mutex_unlock(&work_done_wait_mutex); | 591 | ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); |
478 | BUG_ON(ret); | 592 | BUG_ON(ret); |
479 | 593 | ||
480 | goto again; | 594 | goto again; |
481 | } | 595 | } |
482 | 596 | ||
483 | static void create_tasks(void) | 597 | static void create_tasks(struct perf_sched *sched) |
484 | { | 598 | { |
485 | struct task_desc *task; | 599 | struct task_desc *task; |
486 | pthread_attr_t attr; | 600 | pthread_attr_t attr; |
@@ -492,128 +606,129 @@ static void create_tasks(void) | |||
492 | err = pthread_attr_setstacksize(&attr, | 606 | err = pthread_attr_setstacksize(&attr, |
493 | (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); | 607 | (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); |
494 | BUG_ON(err); | 608 | BUG_ON(err); |
495 | err = pthread_mutex_lock(&start_work_mutex); | 609 | err = pthread_mutex_lock(&sched->start_work_mutex); |
496 | BUG_ON(err); | 610 | BUG_ON(err); |
497 | err = pthread_mutex_lock(&work_done_wait_mutex); | 611 | err = pthread_mutex_lock(&sched->work_done_wait_mutex); |
498 | BUG_ON(err); | 612 | BUG_ON(err); |
499 | for (i = 0; i < nr_tasks; i++) { | 613 | for (i = 0; i < sched->nr_tasks; i++) { |
500 | task = tasks[i]; | 614 | struct sched_thread_parms *parms = malloc(sizeof(*parms)); |
615 | BUG_ON(parms == NULL); | ||
616 | parms->task = task = sched->tasks[i]; | ||
617 | parms->sched = sched; | ||
501 | sem_init(&task->sleep_sem, 0, 0); | 618 | sem_init(&task->sleep_sem, 0, 0); |
502 | sem_init(&task->ready_for_work, 0, 0); | 619 | sem_init(&task->ready_for_work, 0, 0); |
503 | sem_init(&task->work_done_sem, 0, 0); | 620 | sem_init(&task->work_done_sem, 0, 0); |
504 | task->curr_event = 0; | 621 | task->curr_event = 0; |
505 | err = pthread_create(&task->thread, &attr, thread_func, task); | 622 | err = pthread_create(&task->thread, &attr, thread_func, parms); |
506 | BUG_ON(err); | 623 | BUG_ON(err); |
507 | } | 624 | } |
508 | } | 625 | } |
509 | 626 | ||
510 | static void wait_for_tasks(void) | 627 | static void wait_for_tasks(struct perf_sched *sched) |
511 | { | 628 | { |
512 | u64 cpu_usage_0, cpu_usage_1; | 629 | u64 cpu_usage_0, cpu_usage_1; |
513 | struct task_desc *task; | 630 | struct task_desc *task; |
514 | unsigned long i, ret; | 631 | unsigned long i, ret; |
515 | 632 | ||
516 | start_time = get_nsecs(); | 633 | sched->start_time = get_nsecs(); |
517 | cpu_usage = 0; | 634 | sched->cpu_usage = 0; |
518 | pthread_mutex_unlock(&work_done_wait_mutex); | 635 | pthread_mutex_unlock(&sched->work_done_wait_mutex); |
519 | 636 | ||
520 | for (i = 0; i < nr_tasks; i++) { | 637 | for (i = 0; i < sched->nr_tasks; i++) { |
521 | task = tasks[i]; | 638 | task = sched->tasks[i]; |
522 | ret = sem_wait(&task->ready_for_work); | 639 | ret = sem_wait(&task->ready_for_work); |
523 | BUG_ON(ret); | 640 | BUG_ON(ret); |
524 | sem_init(&task->ready_for_work, 0, 0); | 641 | sem_init(&task->ready_for_work, 0, 0); |
525 | } | 642 | } |
526 | ret = pthread_mutex_lock(&work_done_wait_mutex); | 643 | ret = pthread_mutex_lock(&sched->work_done_wait_mutex); |
527 | BUG_ON(ret); | 644 | BUG_ON(ret); |
528 | 645 | ||
529 | cpu_usage_0 = get_cpu_usage_nsec_parent(); | 646 | cpu_usage_0 = get_cpu_usage_nsec_parent(); |
530 | 647 | ||
531 | pthread_mutex_unlock(&start_work_mutex); | 648 | pthread_mutex_unlock(&sched->start_work_mutex); |
532 | 649 | ||
533 | for (i = 0; i < nr_tasks; i++) { | 650 | for (i = 0; i < sched->nr_tasks; i++) { |
534 | task = tasks[i]; | 651 | task = sched->tasks[i]; |
535 | ret = sem_wait(&task->work_done_sem); | 652 | ret = sem_wait(&task->work_done_sem); |
536 | BUG_ON(ret); | 653 | BUG_ON(ret); |
537 | sem_init(&task->work_done_sem, 0, 0); | 654 | sem_init(&task->work_done_sem, 0, 0); |
538 | cpu_usage += task->cpu_usage; | 655 | sched->cpu_usage += task->cpu_usage; |
539 | task->cpu_usage = 0; | 656 | task->cpu_usage = 0; |
540 | } | 657 | } |
541 | 658 | ||
542 | cpu_usage_1 = get_cpu_usage_nsec_parent(); | 659 | cpu_usage_1 = get_cpu_usage_nsec_parent(); |
543 | if (!runavg_cpu_usage) | 660 | if (!sched->runavg_cpu_usage) |
544 | runavg_cpu_usage = cpu_usage; | 661 | sched->runavg_cpu_usage = sched->cpu_usage; |
545 | runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; | 662 | sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; |
546 | 663 | ||
547 | parent_cpu_usage = cpu_usage_1 - cpu_usage_0; | 664 | sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; |
548 | if (!runavg_parent_cpu_usage) | 665 | if (!sched->runavg_parent_cpu_usage) |
549 | runavg_parent_cpu_usage = parent_cpu_usage; | 666 | sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; |
550 | runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + | 667 | sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + |
551 | parent_cpu_usage)/10; | 668 | sched->parent_cpu_usage)/10; |
552 | 669 | ||
553 | ret = pthread_mutex_lock(&start_work_mutex); | 670 | ret = pthread_mutex_lock(&sched->start_work_mutex); |
554 | BUG_ON(ret); | 671 | BUG_ON(ret); |
555 | 672 | ||
556 | for (i = 0; i < nr_tasks; i++) { | 673 | for (i = 0; i < sched->nr_tasks; i++) { |
557 | task = tasks[i]; | 674 | task = sched->tasks[i]; |
558 | sem_init(&task->sleep_sem, 0, 0); | 675 | sem_init(&task->sleep_sem, 0, 0); |
559 | task->curr_event = 0; | 676 | task->curr_event = 0; |
560 | } | 677 | } |
561 | } | 678 | } |
562 | 679 | ||
563 | static void run_one_test(void) | 680 | static void run_one_test(struct perf_sched *sched) |
564 | { | 681 | { |
565 | u64 T0, T1, delta, avg_delta, fluct; | 682 | u64 T0, T1, delta, avg_delta, fluct; |
566 | 683 | ||
567 | T0 = get_nsecs(); | 684 | T0 = get_nsecs(); |
568 | wait_for_tasks(); | 685 | wait_for_tasks(sched); |
569 | T1 = get_nsecs(); | 686 | T1 = get_nsecs(); |
570 | 687 | ||
571 | delta = T1 - T0; | 688 | delta = T1 - T0; |
572 | sum_runtime += delta; | 689 | sched->sum_runtime += delta; |
573 | nr_runs++; | 690 | sched->nr_runs++; |
574 | 691 | ||
575 | avg_delta = sum_runtime / nr_runs; | 692 | avg_delta = sched->sum_runtime / sched->nr_runs; |
576 | if (delta < avg_delta) | 693 | if (delta < avg_delta) |
577 | fluct = avg_delta - delta; | 694 | fluct = avg_delta - delta; |
578 | else | 695 | else |
579 | fluct = delta - avg_delta; | 696 | fluct = delta - avg_delta; |
580 | sum_fluct += fluct; | 697 | sched->sum_fluct += fluct; |
581 | if (!run_avg) | 698 | if (!sched->run_avg) |
582 | run_avg = delta; | 699 | sched->run_avg = delta; |
583 | run_avg = (run_avg*9 + delta)/10; | 700 | sched->run_avg = (sched->run_avg * 9 + delta) / 10; |
584 | 701 | ||
585 | printf("#%-3ld: %0.3f, ", | 702 | printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); |
586 | nr_runs, (double)delta/1000000.0); | ||
587 | 703 | ||
588 | printf("ravg: %0.2f, ", | 704 | printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6); |
589 | (double)run_avg/1e6); | ||
590 | 705 | ||
591 | printf("cpu: %0.2f / %0.2f", | 706 | printf("cpu: %0.2f / %0.2f", |
592 | (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); | 707 | (double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6); |
593 | 708 | ||
594 | #if 0 | 709 | #if 0 |
595 | /* | 710 | /* |
596 | * rusage statistics done by the parent, these are less | 711 | * rusage statistics done by the parent, these are less |
597 | * accurate than the sum_exec_runtime based statistics: | 712 | * accurate than the sched->sum_exec_runtime based statistics: |
598 | */ | 713 | */ |
599 | printf(" [%0.2f / %0.2f]", | 714 | printf(" [%0.2f / %0.2f]", |
600 | (double)parent_cpu_usage/1e6, | 715 | (double)sched->parent_cpu_usage/1e6, |
601 | (double)runavg_parent_cpu_usage/1e6); | 716 | (double)sched->runavg_parent_cpu_usage/1e6); |
602 | #endif | 717 | #endif |
603 | 718 | ||
604 | printf("\n"); | 719 | printf("\n"); |
605 | 720 | ||
606 | if (nr_sleep_corrections) | 721 | if (sched->nr_sleep_corrections) |
607 | printf(" (%ld sleep corrections)\n", nr_sleep_corrections); | 722 | printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); |
608 | nr_sleep_corrections = 0; | 723 | sched->nr_sleep_corrections = 0; |
609 | } | 724 | } |
610 | 725 | ||
611 | static void test_calibrations(void) | 726 | static void test_calibrations(struct perf_sched *sched) |
612 | { | 727 | { |
613 | u64 T0, T1; | 728 | u64 T0, T1; |
614 | 729 | ||
615 | T0 = get_nsecs(); | 730 | T0 = get_nsecs(); |
616 | burn_nsecs(1e6); | 731 | burn_nsecs(sched, 1e6); |
617 | T1 = get_nsecs(); | 732 | T1 = get_nsecs(); |
618 | 733 | ||
619 | printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); | 734 | printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); |
@@ -643,115 +758,9 @@ do { \ | |||
643 | FILL_FIELD(ptr, common_tgid, event, data); \ | 758 | FILL_FIELD(ptr, common_tgid, event, data); \ |
644 | } while (0) | 759 | } while (0) |
645 | 760 | ||
646 | |||
647 | |||
648 | struct trace_switch_event { | ||
649 | u32 size; | ||
650 | |||
651 | u16 common_type; | ||
652 | u8 common_flags; | ||
653 | u8 common_preempt_count; | ||
654 | u32 common_pid; | ||
655 | u32 common_tgid; | ||
656 | |||
657 | char prev_comm[16]; | ||
658 | u32 prev_pid; | ||
659 | u32 prev_prio; | ||
660 | u64 prev_state; | ||
661 | char next_comm[16]; | ||
662 | u32 next_pid; | ||
663 | u32 next_prio; | ||
664 | }; | ||
665 | |||
666 | struct trace_runtime_event { | ||
667 | u32 size; | ||
668 | |||
669 | u16 common_type; | ||
670 | u8 common_flags; | ||
671 | u8 common_preempt_count; | ||
672 | u32 common_pid; | ||
673 | u32 common_tgid; | ||
674 | |||
675 | char comm[16]; | ||
676 | u32 pid; | ||
677 | u64 runtime; | ||
678 | u64 vruntime; | ||
679 | }; | ||
680 | |||
681 | struct trace_wakeup_event { | ||
682 | u32 size; | ||
683 | |||
684 | u16 common_type; | ||
685 | u8 common_flags; | ||
686 | u8 common_preempt_count; | ||
687 | u32 common_pid; | ||
688 | u32 common_tgid; | ||
689 | |||
690 | char comm[16]; | ||
691 | u32 pid; | ||
692 | |||
693 | u32 prio; | ||
694 | u32 success; | ||
695 | u32 cpu; | ||
696 | }; | ||
697 | |||
698 | struct trace_fork_event { | ||
699 | u32 size; | ||
700 | |||
701 | u16 common_type; | ||
702 | u8 common_flags; | ||
703 | u8 common_preempt_count; | ||
704 | u32 common_pid; | ||
705 | u32 common_tgid; | ||
706 | |||
707 | char parent_comm[16]; | ||
708 | u32 parent_pid; | ||
709 | char child_comm[16]; | ||
710 | u32 child_pid; | ||
711 | }; | ||
712 | |||
713 | struct trace_migrate_task_event { | ||
714 | u32 size; | ||
715 | |||
716 | u16 common_type; | ||
717 | u8 common_flags; | ||
718 | u8 common_preempt_count; | ||
719 | u32 common_pid; | ||
720 | u32 common_tgid; | ||
721 | |||
722 | char comm[16]; | ||
723 | u32 pid; | ||
724 | |||
725 | u32 prio; | ||
726 | u32 cpu; | ||
727 | }; | ||
728 | |||
729 | struct trace_sched_handler { | ||
730 | int (*switch_event)(struct trace_switch_event *event, | ||
731 | struct machine *machine, | ||
732 | struct event_format *tp_format, | ||
733 | struct perf_sample *sample); | ||
734 | |||
735 | int (*runtime_event)(struct trace_runtime_event *event, | ||
736 | struct machine *machine, | ||
737 | struct perf_sample *sample); | ||
738 | |||
739 | int (*wakeup_event)(struct trace_wakeup_event *event, | ||
740 | struct machine *machine, | ||
741 | struct event_format *tp_format, | ||
742 | struct perf_sample *sample); | ||
743 | |||
744 | int (*fork_event)(struct trace_fork_event *event, | ||
745 | struct event_format *tp_format); | ||
746 | |||
747 | int (*migrate_task_event)(struct trace_migrate_task_event *event, | ||
748 | struct machine *machine, | ||
749 | struct perf_sample *sample); | ||
750 | }; | ||
751 | |||
752 | |||
753 | static int | 761 | static int |
754 | replay_wakeup_event(struct trace_wakeup_event *wakeup_event, | 762 | replay_wakeup_event(struct perf_sched *sched, |
763 | struct trace_wakeup_event *wakeup_event, | ||
755 | struct machine *machine __maybe_unused, | 764 | struct machine *machine __maybe_unused, |
756 | struct event_format *event, struct perf_sample *sample) | 765 | struct event_format *event, struct perf_sample *sample) |
757 | { | 766 | { |
@@ -761,22 +770,19 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
761 | printf("sched_wakeup event %p\n", event); | 770 | printf("sched_wakeup event %p\n", event); |
762 | 771 | ||
763 | printf(" ... pid %d woke up %s/%d\n", | 772 | printf(" ... pid %d woke up %s/%d\n", |
764 | wakeup_event->common_pid, | 773 | wakeup_event->common_pid, wakeup_event->comm, wakeup_event->pid); |
765 | wakeup_event->comm, | ||
766 | wakeup_event->pid); | ||
767 | } | 774 | } |
768 | 775 | ||
769 | waker = register_pid(wakeup_event->common_pid, "<unknown>"); | 776 | waker = register_pid(sched, wakeup_event->common_pid, "<unknown>"); |
770 | wakee = register_pid(wakeup_event->pid, wakeup_event->comm); | 777 | wakee = register_pid(sched, wakeup_event->pid, wakeup_event->comm); |
771 | 778 | ||
772 | add_sched_event_wakeup(waker, sample->time, wakee); | 779 | add_sched_event_wakeup(sched, waker, sample->time, wakee); |
773 | return 0; | 780 | return 0; |
774 | } | 781 | } |
775 | 782 | ||
776 | static u64 cpu_last_switched[MAX_CPUS]; | ||
777 | |||
778 | static int | 783 | static int |
779 | replay_switch_event(struct trace_switch_event *switch_event, | 784 | replay_switch_event(struct perf_sched *sched, |
785 | struct trace_switch_event *switch_event, | ||
780 | struct machine *machine __maybe_unused, | 786 | struct machine *machine __maybe_unused, |
781 | struct event_format *event, | 787 | struct event_format *event, |
782 | struct perf_sample *sample) | 788 | struct perf_sample *sample) |
@@ -792,7 +798,7 @@ replay_switch_event(struct trace_switch_event *switch_event, | |||
792 | if (cpu >= MAX_CPUS || cpu < 0) | 798 | if (cpu >= MAX_CPUS || cpu < 0) |
793 | return 0; | 799 | return 0; |
794 | 800 | ||
795 | timestamp0 = cpu_last_switched[cpu]; | 801 | timestamp0 = sched->cpu_last_switched[cpu]; |
796 | if (timestamp0) | 802 | if (timestamp0) |
797 | delta = timestamp - timestamp0; | 803 | delta = timestamp - timestamp0; |
798 | else | 804 | else |
@@ -810,20 +816,19 @@ replay_switch_event(struct trace_switch_event *switch_event, | |||
810 | delta); | 816 | delta); |
811 | } | 817 | } |
812 | 818 | ||
813 | prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); | 819 | prev = register_pid(sched, switch_event->prev_pid, switch_event->prev_comm); |
814 | next = register_pid(switch_event->next_pid, switch_event->next_comm); | 820 | next = register_pid(sched, switch_event->next_pid, switch_event->next_comm); |
815 | 821 | ||
816 | cpu_last_switched[cpu] = timestamp; | 822 | sched->cpu_last_switched[cpu] = timestamp; |
817 | 823 | ||
818 | add_sched_event_run(prev, timestamp, delta); | 824 | add_sched_event_run(sched, prev, timestamp, delta); |
819 | add_sched_event_sleep(prev, timestamp, switch_event->prev_state); | 825 | add_sched_event_sleep(sched, prev, timestamp, switch_event->prev_state); |
820 | 826 | ||
821 | return 0; | 827 | return 0; |
822 | } | 828 | } |
823 | 829 | ||
824 | |||
825 | static int | 830 | static int |
826 | replay_fork_event(struct trace_fork_event *fork_event, | 831 | replay_fork_event(struct perf_sched *sched, struct trace_fork_event *fork_event, |
827 | struct event_format *event) | 832 | struct event_format *event) |
828 | { | 833 | { |
829 | if (verbose) { | 834 | if (verbose) { |
@@ -831,25 +836,17 @@ replay_fork_event(struct trace_fork_event *fork_event, | |||
831 | printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); | 836 | printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); |
832 | printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); | 837 | printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); |
833 | } | 838 | } |
834 | register_pid(fork_event->parent_pid, fork_event->parent_comm); | 839 | register_pid(sched, fork_event->parent_pid, fork_event->parent_comm); |
835 | register_pid(fork_event->child_pid, fork_event->child_comm); | 840 | register_pid(sched, fork_event->child_pid, fork_event->child_comm); |
836 | return 0; | 841 | return 0; |
837 | } | 842 | } |
838 | 843 | ||
839 | static struct trace_sched_handler replay_ops = { | ||
840 | .wakeup_event = replay_wakeup_event, | ||
841 | .switch_event = replay_switch_event, | ||
842 | .fork_event = replay_fork_event, | ||
843 | }; | ||
844 | |||
845 | struct sort_dimension { | 844 | struct sort_dimension { |
846 | const char *name; | 845 | const char *name; |
847 | sort_fn_t cmp; | 846 | sort_fn_t cmp; |
848 | struct list_head list; | 847 | struct list_head list; |
849 | }; | 848 | }; |
850 | 849 | ||
851 | static LIST_HEAD(cmp_pid); | ||
852 | |||
853 | static int | 850 | static int |
854 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) | 851 | thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) |
855 | { | 852 | { |
@@ -918,7 +915,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, | |||
918 | rb_insert_color(&data->node, root); | 915 | rb_insert_color(&data->node, root); |
919 | } | 916 | } |
920 | 917 | ||
921 | static int thread_atoms_insert(struct thread *thread) | 918 | static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) |
922 | { | 919 | { |
923 | struct work_atoms *atoms = zalloc(sizeof(*atoms)); | 920 | struct work_atoms *atoms = zalloc(sizeof(*atoms)); |
924 | if (!atoms) { | 921 | if (!atoms) { |
@@ -928,11 +925,12 @@ static int thread_atoms_insert(struct thread *thread) | |||
928 | 925 | ||
929 | atoms->thread = thread; | 926 | atoms->thread = thread; |
930 | INIT_LIST_HEAD(&atoms->work_list); | 927 | INIT_LIST_HEAD(&atoms->work_list); |
931 | __thread_latency_insert(&atom_root, atoms, &cmp_pid); | 928 | __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); |
932 | return 0; | 929 | return 0; |
933 | } | 930 | } |
934 | 931 | ||
935 | static int latency_fork_event(struct trace_fork_event *fork_event __maybe_unused, | 932 | static int latency_fork_event(struct perf_sched *sched __maybe_unused, |
933 | struct trace_fork_event *fork_event __maybe_unused, | ||
936 | struct event_format *event __maybe_unused) | 934 | struct event_format *event __maybe_unused) |
937 | { | 935 | { |
938 | /* should insert the newcomer */ | 936 | /* should insert the newcomer */ |
@@ -1014,7 +1012,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) | |||
1014 | } | 1012 | } |
1015 | 1013 | ||
1016 | static int | 1014 | static int |
1017 | latency_switch_event(struct trace_switch_event *switch_event, | 1015 | latency_switch_event(struct perf_sched *sched, |
1016 | struct trace_switch_event *switch_event, | ||
1018 | struct machine *machine, | 1017 | struct machine *machine, |
1019 | struct event_format *event __maybe_unused, | 1018 | struct event_format *event __maybe_unused, |
1020 | struct perf_sample *sample) | 1019 | struct perf_sample *sample) |
@@ -1027,8 +1026,8 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1027 | 1026 | ||
1028 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | 1027 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
1029 | 1028 | ||
1030 | timestamp0 = cpu_last_switched[cpu]; | 1029 | timestamp0 = sched->cpu_last_switched[cpu]; |
1031 | cpu_last_switched[cpu] = timestamp; | 1030 | sched->cpu_last_switched[cpu] = timestamp; |
1032 | if (timestamp0) | 1031 | if (timestamp0) |
1033 | delta = timestamp - timestamp0; | 1032 | delta = timestamp - timestamp0; |
1034 | else | 1033 | else |
@@ -1042,11 +1041,11 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1042 | sched_out = machine__findnew_thread(machine, switch_event->prev_pid); | 1041 | sched_out = machine__findnew_thread(machine, switch_event->prev_pid); |
1043 | sched_in = machine__findnew_thread(machine, switch_event->next_pid); | 1042 | sched_in = machine__findnew_thread(machine, switch_event->next_pid); |
1044 | 1043 | ||
1045 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1044 | out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); |
1046 | if (!out_events) { | 1045 | if (!out_events) { |
1047 | if (thread_atoms_insert(sched_out)) | 1046 | if (thread_atoms_insert(sched, sched_out)) |
1048 | return -1; | 1047 | return -1; |
1049 | out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); | 1048 | out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); |
1050 | if (!out_events) { | 1049 | if (!out_events) { |
1051 | pr_err("out-event: Internal tree error"); | 1050 | pr_err("out-event: Internal tree error"); |
1052 | return -1; | 1051 | return -1; |
@@ -1055,11 +1054,11 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1055 | if (add_sched_out_event(out_events, sched_out_state(switch_event), timestamp)) | 1054 | if (add_sched_out_event(out_events, sched_out_state(switch_event), timestamp)) |
1056 | return -1; | 1055 | return -1; |
1057 | 1056 | ||
1058 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1057 | in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); |
1059 | if (!in_events) { | 1058 | if (!in_events) { |
1060 | if (thread_atoms_insert(sched_in)) | 1059 | if (thread_atoms_insert(sched, sched_in)) |
1061 | return -1; | 1060 | return -1; |
1062 | in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); | 1061 | in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); |
1063 | if (!in_events) { | 1062 | if (!in_events) { |
1064 | pr_err("in-event: Internal tree error"); | 1063 | pr_err("in-event: Internal tree error"); |
1065 | return -1; | 1064 | return -1; |
@@ -1077,19 +1076,20 @@ latency_switch_event(struct trace_switch_event *switch_event, | |||
1077 | } | 1076 | } |
1078 | 1077 | ||
1079 | static int | 1078 | static int |
1080 | latency_runtime_event(struct trace_runtime_event *runtime_event, | 1079 | latency_runtime_event(struct perf_sched *sched, |
1080 | struct trace_runtime_event *runtime_event, | ||
1081 | struct machine *machine, struct perf_sample *sample) | 1081 | struct machine *machine, struct perf_sample *sample) |
1082 | { | 1082 | { |
1083 | struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); | 1083 | struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); |
1084 | struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | 1084 | struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); |
1085 | u64 timestamp = sample->time; | 1085 | u64 timestamp = sample->time; |
1086 | int cpu = sample->cpu; | 1086 | int cpu = sample->cpu; |
1087 | 1087 | ||
1088 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); | 1088 | BUG_ON(cpu >= MAX_CPUS || cpu < 0); |
1089 | if (!atoms) { | 1089 | if (!atoms) { |
1090 | if (thread_atoms_insert(thread)) | 1090 | if (thread_atoms_insert(sched, thread)) |
1091 | return -1; | 1091 | return -1; |
1092 | atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); | 1092 | atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); |
1093 | if (!atoms) { | 1093 | if (!atoms) { |
1094 | pr_debug("in-event: Internal tree error"); | 1094 | pr_debug("in-event: Internal tree error"); |
1095 | return -1; | 1095 | return -1; |
@@ -1103,7 +1103,8 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, | |||
1103 | } | 1103 | } |
1104 | 1104 | ||
1105 | static int | 1105 | static int |
1106 | latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | 1106 | latency_wakeup_event(struct perf_sched *sched, |
1107 | struct trace_wakeup_event *wakeup_event, | ||
1107 | struct machine *machine, | 1108 | struct machine *machine, |
1108 | struct event_format *event __maybe_unused, | 1109 | struct event_format *event __maybe_unused, |
1109 | struct perf_sample *sample) | 1110 | struct perf_sample *sample) |
@@ -1118,11 +1119,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1118 | return 0; | 1119 | return 0; |
1119 | 1120 | ||
1120 | wakee = machine__findnew_thread(machine, wakeup_event->pid); | 1121 | wakee = machine__findnew_thread(machine, wakeup_event->pid); |
1121 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1122 | atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); |
1122 | if (!atoms) { | 1123 | if (!atoms) { |
1123 | if (thread_atoms_insert(wakee)) | 1124 | if (thread_atoms_insert(sched, wakee)) |
1124 | return -1; | 1125 | return -1; |
1125 | atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); | 1126 | atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); |
1126 | if (!atoms) { | 1127 | if (!atoms) { |
1127 | pr_debug("wakeup-event: Internal tree error"); | 1128 | pr_debug("wakeup-event: Internal tree error"); |
1128 | return -1; | 1129 | return -1; |
@@ -1140,12 +1141,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1140 | * one CPU, or are only looking at only one, so don't | 1141 | * one CPU, or are only looking at only one, so don't |
1141 | * make useless noise. | 1142 | * make useless noise. |
1142 | */ | 1143 | */ |
1143 | if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) | 1144 | if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) |
1144 | nr_state_machine_bugs++; | 1145 | sched->nr_state_machine_bugs++; |
1145 | 1146 | ||
1146 | nr_timestamps++; | 1147 | sched->nr_timestamps++; |
1147 | if (atom->sched_out_time > timestamp) { | 1148 | if (atom->sched_out_time > timestamp) { |
1148 | nr_unordered_timestamps++; | 1149 | sched->nr_unordered_timestamps++; |
1149 | return 0; | 1150 | return 0; |
1150 | } | 1151 | } |
1151 | 1152 | ||
@@ -1155,7 +1156,8 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, | |||
1155 | } | 1156 | } |
1156 | 1157 | ||
1157 | static int | 1158 | static int |
1158 | latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, | 1159 | latency_migrate_task_event(struct perf_sched *sched, |
1160 | struct trace_migrate_task_event *migrate_task_event, | ||
1159 | struct machine *machine, struct perf_sample *sample) | 1161 | struct machine *machine, struct perf_sample *sample) |
1160 | { | 1162 | { |
1161 | u64 timestamp = sample->time; | 1163 | u64 timestamp = sample->time; |
@@ -1166,16 +1168,16 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, | |||
1166 | /* | 1168 | /* |
1167 | * Only need to worry about migration when profiling one CPU. | 1169 | * Only need to worry about migration when profiling one CPU. |
1168 | */ | 1170 | */ |
1169 | if (profile_cpu == -1) | 1171 | if (sched->profile_cpu == -1) |
1170 | return 0; | 1172 | return 0; |
1171 | 1173 | ||
1172 | migrant = machine__findnew_thread(machine, migrate_task_event->pid); | 1174 | migrant = machine__findnew_thread(machine, migrate_task_event->pid); |
1173 | atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); | 1175 | atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); |
1174 | if (!atoms) { | 1176 | if (!atoms) { |
1175 | if (thread_atoms_insert(migrant)) | 1177 | if (thread_atoms_insert(sched, migrant)) |
1176 | return -1; | 1178 | return -1; |
1177 | register_pid(migrant->pid, migrant->comm); | 1179 | register_pid(sched, migrant->pid, migrant->comm); |
1178 | atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); | 1180 | atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); |
1179 | if (!atoms) { | 1181 | if (!atoms) { |
1180 | pr_debug("migration-event: Internal tree error"); | 1182 | pr_debug("migration-event: Internal tree error"); |
1181 | return -1; | 1183 | return -1; |
@@ -1189,23 +1191,15 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, | |||
1189 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); | 1191 | atom = list_entry(atoms->work_list.prev, struct work_atom, list); |
1190 | atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; | 1192 | atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; |
1191 | 1193 | ||
1192 | nr_timestamps++; | 1194 | sched->nr_timestamps++; |
1193 | 1195 | ||
1194 | if (atom->sched_out_time > timestamp) | 1196 | if (atom->sched_out_time > timestamp) |
1195 | nr_unordered_timestamps++; | 1197 | sched->nr_unordered_timestamps++; |
1196 | 1198 | ||
1197 | return 0; | 1199 | return 0; |
1198 | } | 1200 | } |
1199 | 1201 | ||
1200 | static struct trace_sched_handler lat_ops = { | 1202 | static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) |
1201 | .wakeup_event = latency_wakeup_event, | ||
1202 | .switch_event = latency_switch_event, | ||
1203 | .runtime_event = latency_runtime_event, | ||
1204 | .fork_event = latency_fork_event, | ||
1205 | .migrate_task_event = latency_migrate_task_event, | ||
1206 | }; | ||
1207 | |||
1208 | static void output_lat_thread(struct work_atoms *work_list) | ||
1209 | { | 1203 | { |
1210 | int i; | 1204 | int i; |
1211 | int ret; | 1205 | int ret; |
@@ -1219,8 +1213,8 @@ static void output_lat_thread(struct work_atoms *work_list) | |||
1219 | if (!strcmp(work_list->thread->comm, "swapper")) | 1213 | if (!strcmp(work_list->thread->comm, "swapper")) |
1220 | return; | 1214 | return; |
1221 | 1215 | ||
1222 | all_runtime += work_list->total_runtime; | 1216 | sched->all_runtime += work_list->total_runtime; |
1223 | all_count += work_list->nb_atoms; | 1217 | sched->all_count += work_list->nb_atoms; |
1224 | 1218 | ||
1225 | ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); | 1219 | ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); |
1226 | 1220 | ||
@@ -1246,11 +1240,6 @@ static int pid_cmp(struct work_atoms *l, struct work_atoms *r) | |||
1246 | return 0; | 1240 | return 0; |
1247 | } | 1241 | } |
1248 | 1242 | ||
1249 | static struct sort_dimension pid_sort_dimension = { | ||
1250 | .name = "pid", | ||
1251 | .cmp = pid_cmp, | ||
1252 | }; | ||
1253 | |||
1254 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) | 1243 | static int avg_cmp(struct work_atoms *l, struct work_atoms *r) |
1255 | { | 1244 | { |
1256 | u64 avgl, avgr; | 1245 | u64 avgl, avgr; |
@@ -1272,11 +1261,6 @@ static int avg_cmp(struct work_atoms *l, struct work_atoms *r) | |||
1272 | return 0; | 1261 | return 0; |
1273 | } | 1262 | } |
1274 | 1263 | ||
1275 | static struct sort_dimension avg_sort_dimension = { | ||
1276 | .name = "avg", | ||
1277 | .cmp = avg_cmp, | ||
1278 | }; | ||
1279 | |||
1280 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) | 1264 | static int max_cmp(struct work_atoms *l, struct work_atoms *r) |
1281 | { | 1265 | { |
1282 | if (l->max_lat < r->max_lat) | 1266 | if (l->max_lat < r->max_lat) |
@@ -1287,11 +1271,6 @@ static int max_cmp(struct work_atoms *l, struct work_atoms *r) | |||
1287 | return 0; | 1271 | return 0; |
1288 | } | 1272 | } |
1289 | 1273 | ||
1290 | static struct sort_dimension max_sort_dimension = { | ||
1291 | .name = "max", | ||
1292 | .cmp = max_cmp, | ||
1293 | }; | ||
1294 | |||
1295 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) | 1274 | static int switch_cmp(struct work_atoms *l, struct work_atoms *r) |
1296 | { | 1275 | { |
1297 | if (l->nb_atoms < r->nb_atoms) | 1276 | if (l->nb_atoms < r->nb_atoms) |
@@ -1302,11 +1281,6 @@ static int switch_cmp(struct work_atoms *l, struct work_atoms *r) | |||
1302 | return 0; | 1281 | return 0; |
1303 | } | 1282 | } |
1304 | 1283 | ||
1305 | static struct sort_dimension switch_sort_dimension = { | ||
1306 | .name = "switch", | ||
1307 | .cmp = switch_cmp, | ||
1308 | }; | ||
1309 | |||
1310 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) | 1284 | static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) |
1311 | { | 1285 | { |
1312 | if (l->total_runtime < r->total_runtime) | 1286 | if (l->total_runtime < r->total_runtime) |
@@ -1317,28 +1291,38 @@ static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) | |||
1317 | return 0; | 1291 | return 0; |
1318 | } | 1292 | } |
1319 | 1293 | ||
1320 | static struct sort_dimension runtime_sort_dimension = { | ||
1321 | .name = "runtime", | ||
1322 | .cmp = runtime_cmp, | ||
1323 | }; | ||
1324 | |||
1325 | static struct sort_dimension *available_sorts[] = { | ||
1326 | &pid_sort_dimension, | ||
1327 | &avg_sort_dimension, | ||
1328 | &max_sort_dimension, | ||
1329 | &switch_sort_dimension, | ||
1330 | &runtime_sort_dimension, | ||
1331 | }; | ||
1332 | |||
1333 | #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) | ||
1334 | |||
1335 | static LIST_HEAD(sort_list); | ||
1336 | |||
1337 | static int sort_dimension__add(const char *tok, struct list_head *list) | 1294 | static int sort_dimension__add(const char *tok, struct list_head *list) |
1338 | { | 1295 | { |
1339 | int i; | 1296 | size_t i; |
1297 | static struct sort_dimension avg_sort_dimension = { | ||
1298 | .name = "avg", | ||
1299 | .cmp = avg_cmp, | ||
1300 | }; | ||
1301 | static struct sort_dimension max_sort_dimension = { | ||
1302 | .name = "max", | ||
1303 | .cmp = max_cmp, | ||
1304 | }; | ||
1305 | static struct sort_dimension pid_sort_dimension = { | ||
1306 | .name = "pid", | ||
1307 | .cmp = pid_cmp, | ||
1308 | }; | ||
1309 | static struct sort_dimension runtime_sort_dimension = { | ||
1310 | .name = "runtime", | ||
1311 | .cmp = runtime_cmp, | ||
1312 | }; | ||
1313 | static struct sort_dimension switch_sort_dimension = { | ||
1314 | .name = "switch", | ||
1315 | .cmp = switch_cmp, | ||
1316 | }; | ||
1317 | struct sort_dimension *available_sorts[] = { | ||
1318 | &pid_sort_dimension, | ||
1319 | &avg_sort_dimension, | ||
1320 | &max_sort_dimension, | ||
1321 | &switch_sort_dimension, | ||
1322 | &runtime_sort_dimension, | ||
1323 | }; | ||
1340 | 1324 | ||
1341 | for (i = 0; i < NB_AVAILABLE_SORTS; i++) { | 1325 | for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { |
1342 | if (!strcmp(available_sorts[i]->name, tok)) { | 1326 | if (!strcmp(available_sorts[i]->name, tok)) { |
1343 | list_add_tail(&available_sorts[i]->list, list); | 1327 | list_add_tail(&available_sorts[i]->list, list); |
1344 | 1328 | ||
@@ -1349,31 +1333,28 @@ static int sort_dimension__add(const char *tok, struct list_head *list) | |||
1349 | return -1; | 1333 | return -1; |
1350 | } | 1334 | } |
1351 | 1335 | ||
1352 | static void setup_sorting(void); | 1336 | static void perf_sched__sort_lat(struct perf_sched *sched) |
1353 | |||
1354 | static void sort_lat(void) | ||
1355 | { | 1337 | { |
1356 | struct rb_node *node; | 1338 | struct rb_node *node; |
1357 | 1339 | ||
1358 | for (;;) { | 1340 | for (;;) { |
1359 | struct work_atoms *data; | 1341 | struct work_atoms *data; |
1360 | node = rb_first(&atom_root); | 1342 | node = rb_first(&sched->atom_root); |
1361 | if (!node) | 1343 | if (!node) |
1362 | break; | 1344 | break; |
1363 | 1345 | ||
1364 | rb_erase(node, &atom_root); | 1346 | rb_erase(node, &sched->atom_root); |
1365 | data = rb_entry(node, struct work_atoms, node); | 1347 | data = rb_entry(node, struct work_atoms, node); |
1366 | __thread_latency_insert(&sorted_atom_root, data, &sort_list); | 1348 | __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); |
1367 | } | 1349 | } |
1368 | } | 1350 | } |
1369 | 1351 | ||
1370 | static struct trace_sched_handler *trace_handler; | 1352 | static int process_sched_wakeup_event(struct perf_tool *tool, |
1371 | |||
1372 | static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, | ||
1373 | struct event_format *event, | 1353 | struct event_format *event, |
1374 | struct perf_sample *sample, | 1354 | struct perf_sample *sample, |
1375 | struct machine *machine) | 1355 | struct machine *machine) |
1376 | { | 1356 | { |
1357 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
1377 | void *data = sample->raw_data; | 1358 | void *data = sample->raw_data; |
1378 | struct trace_wakeup_event wakeup_event; | 1359 | struct trace_wakeup_event wakeup_event; |
1379 | int err = 0; | 1360 | int err = 0; |
@@ -1386,27 +1367,15 @@ static int process_sched_wakeup_event(struct perf_tool *tool __maybe_unused, | |||
1386 | FILL_FIELD(wakeup_event, success, event, data); | 1367 | FILL_FIELD(wakeup_event, success, event, data); |
1387 | FILL_FIELD(wakeup_event, cpu, event, data); | 1368 | FILL_FIELD(wakeup_event, cpu, event, data); |
1388 | 1369 | ||
1389 | if (trace_handler->wakeup_event) | 1370 | if (sched->tp_handler->wakeup_event) |
1390 | err = trace_handler->wakeup_event(&wakeup_event, machine, event, sample); | 1371 | err = sched->tp_handler->wakeup_event(sched, &wakeup_event, machine, event, sample); |
1391 | 1372 | ||
1392 | return err; | 1373 | return err; |
1393 | } | 1374 | } |
1394 | 1375 | ||
1395 | /* | ||
1396 | * Track the current task - that way we can know whether there's any | ||
1397 | * weird events, such as a task being switched away that is not current. | ||
1398 | */ | ||
1399 | static int max_cpu; | ||
1400 | |||
1401 | static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; | ||
1402 | |||
1403 | static struct thread *curr_thread[MAX_CPUS]; | ||
1404 | |||
1405 | static char next_shortname1 = 'A'; | ||
1406 | static char next_shortname2 = '0'; | ||
1407 | |||
1408 | static int | 1376 | static int |
1409 | map_switch_event(struct trace_switch_event *switch_event, | 1377 | map_switch_event(struct perf_sched *sched, |
1378 | struct trace_switch_event *switch_event, | ||
1410 | struct machine *machine, | 1379 | struct machine *machine, |
1411 | struct event_format *event __maybe_unused, | 1380 | struct event_format *event __maybe_unused, |
1412 | struct perf_sample *sample) | 1381 | struct perf_sample *sample) |
@@ -1419,11 +1388,11 @@ map_switch_event(struct trace_switch_event *switch_event, | |||
1419 | 1388 | ||
1420 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); | 1389 | BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); |
1421 | 1390 | ||
1422 | if (this_cpu > max_cpu) | 1391 | if (this_cpu > sched->max_cpu) |
1423 | max_cpu = this_cpu; | 1392 | sched->max_cpu = this_cpu; |
1424 | 1393 | ||
1425 | timestamp0 = cpu_last_switched[this_cpu]; | 1394 | timestamp0 = sched->cpu_last_switched[this_cpu]; |
1426 | cpu_last_switched[this_cpu] = timestamp; | 1395 | sched->cpu_last_switched[this_cpu] = timestamp; |
1427 | if (timestamp0) | 1396 | if (timestamp0) |
1428 | delta = timestamp - timestamp0; | 1397 | delta = timestamp - timestamp0; |
1429 | else | 1398 | else |
@@ -1437,37 +1406,37 @@ map_switch_event(struct trace_switch_event *switch_event, | |||
1437 | sched_out = machine__findnew_thread(machine, switch_event->prev_pid); | 1406 | sched_out = machine__findnew_thread(machine, switch_event->prev_pid); |
1438 | sched_in = machine__findnew_thread(machine, switch_event->next_pid); | 1407 | sched_in = machine__findnew_thread(machine, switch_event->next_pid); |
1439 | 1408 | ||
1440 | curr_thread[this_cpu] = sched_in; | 1409 | sched->curr_thread[this_cpu] = sched_in; |
1441 | 1410 | ||
1442 | printf(" "); | 1411 | printf(" "); |
1443 | 1412 | ||
1444 | new_shortname = 0; | 1413 | new_shortname = 0; |
1445 | if (!sched_in->shortname[0]) { | 1414 | if (!sched_in->shortname[0]) { |
1446 | sched_in->shortname[0] = next_shortname1; | 1415 | sched_in->shortname[0] = sched->next_shortname1; |
1447 | sched_in->shortname[1] = next_shortname2; | 1416 | sched_in->shortname[1] = sched->next_shortname2; |
1448 | 1417 | ||
1449 | if (next_shortname1 < 'Z') { | 1418 | if (sched->next_shortname1 < 'Z') { |
1450 | next_shortname1++; | 1419 | sched->next_shortname1++; |
1451 | } else { | 1420 | } else { |
1452 | next_shortname1='A'; | 1421 | sched->next_shortname1='A'; |
1453 | if (next_shortname2 < '9') { | 1422 | if (sched->next_shortname2 < '9') { |
1454 | next_shortname2++; | 1423 | sched->next_shortname2++; |
1455 | } else { | 1424 | } else { |
1456 | next_shortname2='0'; | 1425 | sched->next_shortname2='0'; |
1457 | } | 1426 | } |
1458 | } | 1427 | } |
1459 | new_shortname = 1; | 1428 | new_shortname = 1; |
1460 | } | 1429 | } |
1461 | 1430 | ||
1462 | for (cpu = 0; cpu <= max_cpu; cpu++) { | 1431 | for (cpu = 0; cpu <= sched->max_cpu; cpu++) { |
1463 | if (cpu != this_cpu) | 1432 | if (cpu != this_cpu) |
1464 | printf(" "); | 1433 | printf(" "); |
1465 | else | 1434 | else |
1466 | printf("*"); | 1435 | printf("*"); |
1467 | 1436 | ||
1468 | if (curr_thread[cpu]) { | 1437 | if (sched->curr_thread[cpu]) { |
1469 | if (curr_thread[cpu]->pid) | 1438 | if (sched->curr_thread[cpu]->pid) |
1470 | printf("%2s ", curr_thread[cpu]->shortname); | 1439 | printf("%2s ", sched->curr_thread[cpu]->shortname); |
1471 | else | 1440 | else |
1472 | printf(". "); | 1441 | printf(". "); |
1473 | } else | 1442 | } else |
@@ -1485,11 +1454,12 @@ map_switch_event(struct trace_switch_event *switch_event, | |||
1485 | return 0; | 1454 | return 0; |
1486 | } | 1455 | } |
1487 | 1456 | ||
1488 | static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, | 1457 | static int process_sched_switch_event(struct perf_tool *tool, |
1489 | struct event_format *event, | 1458 | struct event_format *event, |
1490 | struct perf_sample *sample, | 1459 | struct perf_sample *sample, |
1491 | struct machine *machine) | 1460 | struct machine *machine) |
1492 | { | 1461 | { |
1462 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
1493 | int this_cpu = sample->cpu, err = 0; | 1463 | int this_cpu = sample->cpu, err = 0; |
1494 | void *data = sample->raw_data; | 1464 | void *data = sample->raw_data; |
1495 | struct trace_switch_event switch_event; | 1465 | struct trace_switch_event switch_event; |
@@ -1504,26 +1474,27 @@ static int process_sched_switch_event(struct perf_tool *tool __maybe_unused, | |||
1504 | FILL_FIELD(switch_event, next_pid, event, data); | 1474 | FILL_FIELD(switch_event, next_pid, event, data); |
1505 | FILL_FIELD(switch_event, next_prio, event, data); | 1475 | FILL_FIELD(switch_event, next_prio, event, data); |
1506 | 1476 | ||
1507 | if (curr_pid[this_cpu] != (u32)-1) { | 1477 | if (sched->curr_pid[this_cpu] != (u32)-1) { |
1508 | /* | 1478 | /* |
1509 | * Are we trying to switch away a PID that is | 1479 | * Are we trying to switch away a PID that is |
1510 | * not current? | 1480 | * not current? |
1511 | */ | 1481 | */ |
1512 | if (curr_pid[this_cpu] != switch_event.prev_pid) | 1482 | if (sched->curr_pid[this_cpu] != switch_event.prev_pid) |
1513 | nr_context_switch_bugs++; | 1483 | sched->nr_context_switch_bugs++; |
1514 | } | 1484 | } |
1515 | if (trace_handler->switch_event) | 1485 | if (sched->tp_handler->switch_event) |
1516 | err = trace_handler->switch_event(&switch_event, machine, event, sample); | 1486 | err = sched->tp_handler->switch_event(sched, &switch_event, machine, event, sample); |
1517 | 1487 | ||
1518 | curr_pid[this_cpu] = switch_event.next_pid; | 1488 | sched->curr_pid[this_cpu] = switch_event.next_pid; |
1519 | return err; | 1489 | return err; |
1520 | } | 1490 | } |
1521 | 1491 | ||
1522 | static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, | 1492 | static int process_sched_runtime_event(struct perf_tool *tool, |
1523 | struct event_format *event, | 1493 | struct event_format *event, |
1524 | struct perf_sample *sample, | 1494 | struct perf_sample *sample, |
1525 | struct machine *machine) | 1495 | struct machine *machine) |
1526 | { | 1496 | { |
1497 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
1527 | void *data = sample->raw_data; | 1498 | void *data = sample->raw_data; |
1528 | struct trace_runtime_event runtime_event; | 1499 | struct trace_runtime_event runtime_event; |
1529 | int err = 0; | 1500 | int err = 0; |
@@ -1533,17 +1504,18 @@ static int process_sched_runtime_event(struct perf_tool *tool __maybe_unused, | |||
1533 | FILL_FIELD(runtime_event, runtime, event, data); | 1504 | FILL_FIELD(runtime_event, runtime, event, data); |
1534 | FILL_FIELD(runtime_event, vruntime, event, data); | 1505 | FILL_FIELD(runtime_event, vruntime, event, data); |
1535 | 1506 | ||
1536 | if (trace_handler->runtime_event) | 1507 | if (sched->tp_handler->runtime_event) |
1537 | err = trace_handler->runtime_event(&runtime_event, machine, sample); | 1508 | err = sched->tp_handler->runtime_event(sched, &runtime_event, machine, sample); |
1538 | 1509 | ||
1539 | return err; | 1510 | return err; |
1540 | } | 1511 | } |
1541 | 1512 | ||
1542 | static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, | 1513 | static int process_sched_fork_event(struct perf_tool *tool, |
1543 | struct event_format *event, | 1514 | struct event_format *event, |
1544 | struct perf_sample *sample, | 1515 | struct perf_sample *sample, |
1545 | struct machine *machine __maybe_unused) | 1516 | struct machine *machine __maybe_unused) |
1546 | { | 1517 | { |
1518 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
1547 | void *data = sample->raw_data; | 1519 | void *data = sample->raw_data; |
1548 | struct trace_fork_event fork_event; | 1520 | struct trace_fork_event fork_event; |
1549 | int err = 0; | 1521 | int err = 0; |
@@ -1555,8 +1527,8 @@ static int process_sched_fork_event(struct perf_tool *tool __maybe_unused, | |||
1555 | FILL_ARRAY(fork_event, child_comm, event, data); | 1527 | FILL_ARRAY(fork_event, child_comm, event, data); |
1556 | FILL_FIELD(fork_event, child_pid, event, data); | 1528 | FILL_FIELD(fork_event, child_pid, event, data); |
1557 | 1529 | ||
1558 | if (trace_handler->fork_event) | 1530 | if (sched->tp_handler->fork_event) |
1559 | err = trace_handler->fork_event(&fork_event, event); | 1531 | err = sched->tp_handler->fork_event(sched, &fork_event, event); |
1560 | 1532 | ||
1561 | return err; | 1533 | return err; |
1562 | } | 1534 | } |
@@ -1572,11 +1544,12 @@ static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, | |||
1572 | return 0; | 1544 | return 0; |
1573 | } | 1545 | } |
1574 | 1546 | ||
1575 | static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unused, | 1547 | static int process_sched_migrate_task_event(struct perf_tool *tool, |
1576 | struct event_format *event, | 1548 | struct event_format *event, |
1577 | struct perf_sample *sample, | 1549 | struct perf_sample *sample, |
1578 | struct machine *machine) | 1550 | struct machine *machine) |
1579 | { | 1551 | { |
1552 | struct perf_sched *sched = container_of(tool, struct perf_sched, tool); | ||
1580 | void *data = sample->raw_data; | 1553 | void *data = sample->raw_data; |
1581 | struct trace_migrate_task_event migrate_task_event; | 1554 | struct trace_migrate_task_event migrate_task_event; |
1582 | int err = 0; | 1555 | int err = 0; |
@@ -1588,8 +1561,8 @@ static int process_sched_migrate_task_event(struct perf_tool *tool __maybe_unuse | |||
1588 | FILL_FIELD(migrate_task_event, prio, event, data); | 1561 | FILL_FIELD(migrate_task_event, prio, event, data); |
1589 | FILL_FIELD(migrate_task_event, cpu, event, data); | 1562 | FILL_FIELD(migrate_task_event, cpu, event, data); |
1590 | 1563 | ||
1591 | if (trace_handler->migrate_task_event) | 1564 | if (sched->tp_handler->migrate_task_event) |
1592 | err = trace_handler->migrate_task_event(&migrate_task_event, machine, sample); | 1565 | err = sched->tp_handler->migrate_task_event(sched, &migrate_task_event, machine, sample); |
1593 | 1566 | ||
1594 | return err; | 1567 | return err; |
1595 | } | 1568 | } |
@@ -1625,15 +1598,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ | |||
1625 | return err; | 1598 | return err; |
1626 | } | 1599 | } |
1627 | 1600 | ||
1628 | static struct perf_tool perf_sched = { | 1601 | static int perf_sched__read_events(struct perf_sched *sched, bool destroy, |
1629 | .sample = perf_sched__process_tracepoint_sample, | 1602 | struct perf_session **psession) |
1630 | .comm = perf_event__process_comm, | ||
1631 | .lost = perf_event__process_lost, | ||
1632 | .fork = perf_event__process_task, | ||
1633 | .ordered_samples = true, | ||
1634 | }; | ||
1635 | |||
1636 | static int read_events(bool destroy, struct perf_session **psession) | ||
1637 | { | 1603 | { |
1638 | const struct perf_evsel_str_handler handlers[] = { | 1604 | const struct perf_evsel_str_handler handlers[] = { |
1639 | { "sched:sched_switch", process_sched_switch_event, }, | 1605 | { "sched:sched_switch", process_sched_switch_event, }, |
@@ -1646,7 +1612,7 @@ static int read_events(bool destroy, struct perf_session **psession) | |||
1646 | }; | 1612 | }; |
1647 | struct perf_session *session; | 1613 | struct perf_session *session; |
1648 | 1614 | ||
1649 | session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched); | 1615 | session = perf_session__new(sched->input_name, O_RDONLY, 0, false, &sched->tool); |
1650 | if (session == NULL) { | 1616 | if (session == NULL) { |
1651 | pr_debug("No Memory for session\n"); | 1617 | pr_debug("No Memory for session\n"); |
1652 | return -1; | 1618 | return -1; |
@@ -1656,15 +1622,15 @@ static int read_events(bool destroy, struct perf_session **psession) | |||
1656 | goto out_delete; | 1622 | goto out_delete; |
1657 | 1623 | ||
1658 | if (perf_session__has_traces(session, "record -R")) { | 1624 | if (perf_session__has_traces(session, "record -R")) { |
1659 | int err = perf_session__process_events(session, &perf_sched); | 1625 | int err = perf_session__process_events(session, &sched->tool); |
1660 | if (err) { | 1626 | if (err) { |
1661 | pr_err("Failed to process events, error %d", err); | 1627 | pr_err("Failed to process events, error %d", err); |
1662 | goto out_delete; | 1628 | goto out_delete; |
1663 | } | 1629 | } |
1664 | 1630 | ||
1665 | nr_events = session->hists.stats.nr_events[0]; | 1631 | sched->nr_events = session->hists.stats.nr_events[0]; |
1666 | nr_lost_events = session->hists.stats.total_lost; | 1632 | sched->nr_lost_events = session->hists.stats.total_lost; |
1667 | nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; | 1633 | sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; |
1668 | } | 1634 | } |
1669 | 1635 | ||
1670 | if (destroy) | 1636 | if (destroy) |
@@ -1680,213 +1646,158 @@ out_delete: | |||
1680 | return -1; | 1646 | return -1; |
1681 | } | 1647 | } |
1682 | 1648 | ||
1683 | static void print_bad_events(void) | 1649 | static void print_bad_events(struct perf_sched *sched) |
1684 | { | 1650 | { |
1685 | if (nr_unordered_timestamps && nr_timestamps) { | 1651 | if (sched->nr_unordered_timestamps && sched->nr_timestamps) { |
1686 | printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", | 1652 | printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", |
1687 | (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, | 1653 | (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, |
1688 | nr_unordered_timestamps, nr_timestamps); | 1654 | sched->nr_unordered_timestamps, sched->nr_timestamps); |
1689 | } | 1655 | } |
1690 | if (nr_lost_events && nr_events) { | 1656 | if (sched->nr_lost_events && sched->nr_events) { |
1691 | printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", | 1657 | printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", |
1692 | (double)nr_lost_events/(double)nr_events*100.0, | 1658 | (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, |
1693 | nr_lost_events, nr_events, nr_lost_chunks); | 1659 | sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); |
1694 | } | 1660 | } |
1695 | if (nr_state_machine_bugs && nr_timestamps) { | 1661 | if (sched->nr_state_machine_bugs && sched->nr_timestamps) { |
1696 | printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", | 1662 | printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", |
1697 | (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, | 1663 | (double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0, |
1698 | nr_state_machine_bugs, nr_timestamps); | 1664 | sched->nr_state_machine_bugs, sched->nr_timestamps); |
1699 | if (nr_lost_events) | 1665 | if (sched->nr_lost_events) |
1700 | printf(" (due to lost events?)"); | 1666 | printf(" (due to lost events?)"); |
1701 | printf("\n"); | 1667 | printf("\n"); |
1702 | } | 1668 | } |
1703 | if (nr_context_switch_bugs && nr_timestamps) { | 1669 | if (sched->nr_context_switch_bugs && sched->nr_timestamps) { |
1704 | printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", | 1670 | printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", |
1705 | (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, | 1671 | (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, |
1706 | nr_context_switch_bugs, nr_timestamps); | 1672 | sched->nr_context_switch_bugs, sched->nr_timestamps); |
1707 | if (nr_lost_events) | 1673 | if (sched->nr_lost_events) |
1708 | printf(" (due to lost events?)"); | 1674 | printf(" (due to lost events?)"); |
1709 | printf("\n"); | 1675 | printf("\n"); |
1710 | } | 1676 | } |
1711 | } | 1677 | } |
1712 | 1678 | ||
1713 | static int __cmd_lat(void) | 1679 | static int perf_sched__lat(struct perf_sched *sched) |
1714 | { | 1680 | { |
1715 | struct rb_node *next; | 1681 | struct rb_node *next; |
1716 | struct perf_session *session; | 1682 | struct perf_session *session; |
1717 | 1683 | ||
1718 | setup_pager(); | 1684 | setup_pager(); |
1719 | if (read_events(false, &session)) | 1685 | if (perf_sched__read_events(sched, false, &session)) |
1720 | return -1; | 1686 | return -1; |
1721 | sort_lat(); | 1687 | perf_sched__sort_lat(sched); |
1722 | 1688 | ||
1723 | printf("\n ---------------------------------------------------------------------------------------------------------------\n"); | 1689 | printf("\n ---------------------------------------------------------------------------------------------------------------\n"); |
1724 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); | 1690 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); |
1725 | printf(" ---------------------------------------------------------------------------------------------------------------\n"); | 1691 | printf(" ---------------------------------------------------------------------------------------------------------------\n"); |
1726 | 1692 | ||
1727 | next = rb_first(&sorted_atom_root); | 1693 | next = rb_first(&sched->sorted_atom_root); |
1728 | 1694 | ||
1729 | while (next) { | 1695 | while (next) { |
1730 | struct work_atoms *work_list; | 1696 | struct work_atoms *work_list; |
1731 | 1697 | ||
1732 | work_list = rb_entry(next, struct work_atoms, node); | 1698 | work_list = rb_entry(next, struct work_atoms, node); |
1733 | output_lat_thread(work_list); | 1699 | output_lat_thread(sched, work_list); |
1734 | next = rb_next(next); | 1700 | next = rb_next(next); |
1735 | } | 1701 | } |
1736 | 1702 | ||
1737 | printf(" -----------------------------------------------------------------------------------------\n"); | 1703 | printf(" -----------------------------------------------------------------------------------------\n"); |
1738 | printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", | 1704 | printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", |
1739 | (double)all_runtime/1e6, all_count); | 1705 | (double)sched->all_runtime / 1e6, sched->all_count); |
1740 | 1706 | ||
1741 | printf(" ---------------------------------------------------\n"); | 1707 | printf(" ---------------------------------------------------\n"); |
1742 | 1708 | ||
1743 | print_bad_events(); | 1709 | print_bad_events(sched); |
1744 | printf("\n"); | 1710 | printf("\n"); |
1745 | 1711 | ||
1746 | perf_session__delete(session); | 1712 | perf_session__delete(session); |
1747 | return 0; | 1713 | return 0; |
1748 | } | 1714 | } |
1749 | 1715 | ||
1750 | static struct trace_sched_handler map_ops = { | 1716 | static int perf_sched__map(struct perf_sched *sched) |
1751 | .wakeup_event = NULL, | ||
1752 | .switch_event = map_switch_event, | ||
1753 | .runtime_event = NULL, | ||
1754 | .fork_event = NULL, | ||
1755 | }; | ||
1756 | |||
1757 | static int __cmd_map(void) | ||
1758 | { | 1717 | { |
1759 | max_cpu = sysconf(_SC_NPROCESSORS_CONF); | 1718 | sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); |
1760 | 1719 | ||
1761 | setup_pager(); | 1720 | setup_pager(); |
1762 | if (read_events(true, NULL)) | 1721 | if (perf_sched__read_events(sched, true, NULL)) |
1763 | return -1; | 1722 | return -1; |
1764 | print_bad_events(); | 1723 | print_bad_events(sched); |
1765 | return 0; | 1724 | return 0; |
1766 | } | 1725 | } |
1767 | 1726 | ||
1768 | static int __cmd_replay(void) | 1727 | static int perf_sched__replay(struct perf_sched *sched) |
1769 | { | 1728 | { |
1770 | unsigned long i; | 1729 | unsigned long i; |
1771 | 1730 | ||
1772 | calibrate_run_measurement_overhead(); | 1731 | calibrate_run_measurement_overhead(sched); |
1773 | calibrate_sleep_measurement_overhead(); | 1732 | calibrate_sleep_measurement_overhead(sched); |
1774 | 1733 | ||
1775 | test_calibrations(); | 1734 | test_calibrations(sched); |
1776 | 1735 | ||
1777 | if (read_events(true, NULL)) | 1736 | if (perf_sched__read_events(sched, true, NULL)) |
1778 | return -1; | 1737 | return -1; |
1779 | 1738 | ||
1780 | printf("nr_run_events: %ld\n", nr_run_events); | 1739 | printf("nr_run_events: %ld\n", sched->nr_run_events); |
1781 | printf("nr_sleep_events: %ld\n", nr_sleep_events); | 1740 | printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); |
1782 | printf("nr_wakeup_events: %ld\n", nr_wakeup_events); | 1741 | printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); |
1783 | 1742 | ||
1784 | if (targetless_wakeups) | 1743 | if (sched->targetless_wakeups) |
1785 | printf("target-less wakeups: %ld\n", targetless_wakeups); | 1744 | printf("target-less wakeups: %ld\n", sched->targetless_wakeups); |
1786 | if (multitarget_wakeups) | 1745 | if (sched->multitarget_wakeups) |
1787 | printf("multi-target wakeups: %ld\n", multitarget_wakeups); | 1746 | printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); |
1788 | if (nr_run_events_optimized) | 1747 | if (sched->nr_run_events_optimized) |
1789 | printf("run atoms optimized: %ld\n", | 1748 | printf("run atoms optimized: %ld\n", |
1790 | nr_run_events_optimized); | 1749 | sched->nr_run_events_optimized); |
1791 | 1750 | ||
1792 | print_task_traces(); | 1751 | print_task_traces(sched); |
1793 | add_cross_task_wakeups(); | 1752 | add_cross_task_wakeups(sched); |
1794 | 1753 | ||
1795 | create_tasks(); | 1754 | create_tasks(sched); |
1796 | printf("------------------------------------------------------------\n"); | 1755 | printf("------------------------------------------------------------\n"); |
1797 | for (i = 0; i < replay_repeat; i++) | 1756 | for (i = 0; i < sched->replay_repeat; i++) |
1798 | run_one_test(); | 1757 | run_one_test(sched); |
1799 | 1758 | ||
1800 | return 0; | 1759 | return 0; |
1801 | } | 1760 | } |
1802 | 1761 | ||
1803 | 1762 | static void setup_sorting(struct perf_sched *sched, const struct option *options, | |
1804 | static const char * const sched_usage[] = { | 1763 | const char * const usage_msg[]) |
1805 | "perf sched [<options>] {record|latency|map|replay|script}", | ||
1806 | NULL | ||
1807 | }; | ||
1808 | |||
1809 | static const struct option sched_options[] = { | ||
1810 | OPT_STRING('i', "input", &input_name, "file", | ||
1811 | "input file name"), | ||
1812 | OPT_INCR('v', "verbose", &verbose, | ||
1813 | "be more verbose (show symbol address, etc)"), | ||
1814 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1815 | "dump raw trace in ASCII"), | ||
1816 | OPT_END() | ||
1817 | }; | ||
1818 | |||
1819 | static const char * const latency_usage[] = { | ||
1820 | "perf sched latency [<options>]", | ||
1821 | NULL | ||
1822 | }; | ||
1823 | |||
1824 | static const struct option latency_options[] = { | ||
1825 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | ||
1826 | "sort by key(s): runtime, switch, avg, max"), | ||
1827 | OPT_INCR('v', "verbose", &verbose, | ||
1828 | "be more verbose (show symbol address, etc)"), | ||
1829 | OPT_INTEGER('C', "CPU", &profile_cpu, | ||
1830 | "CPU to profile on"), | ||
1831 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1832 | "dump raw trace in ASCII"), | ||
1833 | OPT_END() | ||
1834 | }; | ||
1835 | |||
1836 | static const char * const replay_usage[] = { | ||
1837 | "perf sched replay [<options>]", | ||
1838 | NULL | ||
1839 | }; | ||
1840 | |||
1841 | static const struct option replay_options[] = { | ||
1842 | OPT_UINTEGER('r', "repeat", &replay_repeat, | ||
1843 | "repeat the workload replay N times (-1: infinite)"), | ||
1844 | OPT_INCR('v', "verbose", &verbose, | ||
1845 | "be more verbose (show symbol address, etc)"), | ||
1846 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1847 | "dump raw trace in ASCII"), | ||
1848 | OPT_END() | ||
1849 | }; | ||
1850 | |||
1851 | static void setup_sorting(void) | ||
1852 | { | 1764 | { |
1853 | char *tmp, *tok, *str = strdup(sort_order); | 1765 | char *tmp, *tok, *str = strdup(sched->sort_order); |
1854 | 1766 | ||
1855 | for (tok = strtok_r(str, ", ", &tmp); | 1767 | for (tok = strtok_r(str, ", ", &tmp); |
1856 | tok; tok = strtok_r(NULL, ", ", &tmp)) { | 1768 | tok; tok = strtok_r(NULL, ", ", &tmp)) { |
1857 | if (sort_dimension__add(tok, &sort_list) < 0) { | 1769 | if (sort_dimension__add(tok, &sched->sort_list) < 0) { |
1858 | error("Unknown --sort key: `%s'", tok); | 1770 | error("Unknown --sort key: `%s'", tok); |
1859 | usage_with_options(latency_usage, latency_options); | 1771 | usage_with_options(usage_msg, options); |
1860 | } | 1772 | } |
1861 | } | 1773 | } |
1862 | 1774 | ||
1863 | free(str); | 1775 | free(str); |
1864 | 1776 | ||
1865 | sort_dimension__add("pid", &cmp_pid); | 1777 | sort_dimension__add("pid", &sched->cmp_pid); |
1866 | } | 1778 | } |
1867 | 1779 | ||
1868 | static const char *record_args[] = { | ||
1869 | "record", | ||
1870 | "-a", | ||
1871 | "-R", | ||
1872 | "-f", | ||
1873 | "-m", "1024", | ||
1874 | "-c", "1", | ||
1875 | "-e", "sched:sched_switch", | ||
1876 | "-e", "sched:sched_stat_wait", | ||
1877 | "-e", "sched:sched_stat_sleep", | ||
1878 | "-e", "sched:sched_stat_iowait", | ||
1879 | "-e", "sched:sched_stat_runtime", | ||
1880 | "-e", "sched:sched_process_exit", | ||
1881 | "-e", "sched:sched_process_fork", | ||
1882 | "-e", "sched:sched_wakeup", | ||
1883 | "-e", "sched:sched_migrate_task", | ||
1884 | }; | ||
1885 | |||
1886 | static int __cmd_record(int argc, const char **argv) | 1780 | static int __cmd_record(int argc, const char **argv) |
1887 | { | 1781 | { |
1888 | unsigned int rec_argc, i, j; | 1782 | unsigned int rec_argc, i, j; |
1889 | const char **rec_argv; | 1783 | const char **rec_argv; |
1784 | const char * const record_args[] = { | ||
1785 | "record", | ||
1786 | "-a", | ||
1787 | "-R", | ||
1788 | "-f", | ||
1789 | "-m", "1024", | ||
1790 | "-c", "1", | ||
1791 | "-e", "sched:sched_switch", | ||
1792 | "-e", "sched:sched_stat_wait", | ||
1793 | "-e", "sched:sched_stat_sleep", | ||
1794 | "-e", "sched:sched_stat_iowait", | ||
1795 | "-e", "sched:sched_stat_runtime", | ||
1796 | "-e", "sched:sched_process_exit", | ||
1797 | "-e", "sched:sched_process_fork", | ||
1798 | "-e", "sched:sched_wakeup", | ||
1799 | "-e", "sched:sched_migrate_task", | ||
1800 | }; | ||
1890 | 1801 | ||
1891 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 1802 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
1892 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 1803 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
@@ -1907,6 +1818,83 @@ static int __cmd_record(int argc, const char **argv) | |||
1907 | 1818 | ||
1908 | int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | 1819 | int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) |
1909 | { | 1820 | { |
1821 | const char default_sort_order[] = "avg, max, switch, runtime"; | ||
1822 | struct perf_sched sched = { | ||
1823 | .tool = { | ||
1824 | .sample = perf_sched__process_tracepoint_sample, | ||
1825 | .comm = perf_event__process_comm, | ||
1826 | .lost = perf_event__process_lost, | ||
1827 | .fork = perf_event__process_task, | ||
1828 | .ordered_samples = true, | ||
1829 | }, | ||
1830 | .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), | ||
1831 | .sort_list = LIST_HEAD_INIT(sched.sort_list), | ||
1832 | .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, | ||
1833 | .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, | ||
1834 | .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, | ||
1835 | .sort_order = default_sort_order, | ||
1836 | .replay_repeat = 10, | ||
1837 | .profile_cpu = -1, | ||
1838 | .next_shortname1 = 'A', | ||
1839 | .next_shortname2 = '0', | ||
1840 | }; | ||
1841 | const struct option latency_options[] = { | ||
1842 | OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", | ||
1843 | "sort by key(s): runtime, switch, avg, max"), | ||
1844 | OPT_INCR('v', "verbose", &verbose, | ||
1845 | "be more verbose (show symbol address, etc)"), | ||
1846 | OPT_INTEGER('C', "CPU", &sched.profile_cpu, | ||
1847 | "CPU to profile on"), | ||
1848 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1849 | "dump raw trace in ASCII"), | ||
1850 | OPT_END() | ||
1851 | }; | ||
1852 | const struct option replay_options[] = { | ||
1853 | OPT_UINTEGER('r', "repeat", &sched.replay_repeat, | ||
1854 | "repeat the workload replay N times (-1: infinite)"), | ||
1855 | OPT_INCR('v', "verbose", &verbose, | ||
1856 | "be more verbose (show symbol address, etc)"), | ||
1857 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1858 | "dump raw trace in ASCII"), | ||
1859 | OPT_END() | ||
1860 | }; | ||
1861 | const struct option sched_options[] = { | ||
1862 | OPT_STRING('i', "input", &sched.input_name, "file", | ||
1863 | "input file name"), | ||
1864 | OPT_INCR('v', "verbose", &verbose, | ||
1865 | "be more verbose (show symbol address, etc)"), | ||
1866 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1867 | "dump raw trace in ASCII"), | ||
1868 | OPT_END() | ||
1869 | }; | ||
1870 | const char * const latency_usage[] = { | ||
1871 | "perf sched latency [<options>]", | ||
1872 | NULL | ||
1873 | }; | ||
1874 | const char * const replay_usage[] = { | ||
1875 | "perf sched replay [<options>]", | ||
1876 | NULL | ||
1877 | }; | ||
1878 | const char * const sched_usage[] = { | ||
1879 | "perf sched [<options>] {record|latency|map|replay|script}", | ||
1880 | NULL | ||
1881 | }; | ||
1882 | struct trace_sched_handler lat_ops = { | ||
1883 | .wakeup_event = latency_wakeup_event, | ||
1884 | .switch_event = latency_switch_event, | ||
1885 | .runtime_event = latency_runtime_event, | ||
1886 | .fork_event = latency_fork_event, | ||
1887 | .migrate_task_event = latency_migrate_task_event, | ||
1888 | }; | ||
1889 | struct trace_sched_handler map_ops = { | ||
1890 | .switch_event = map_switch_event, | ||
1891 | }; | ||
1892 | struct trace_sched_handler replay_ops = { | ||
1893 | .wakeup_event = replay_wakeup_event, | ||
1894 | .switch_event = replay_switch_event, | ||
1895 | .fork_event = replay_fork_event, | ||
1896 | }; | ||
1897 | |||
1910 | argc = parse_options(argc, argv, sched_options, sched_usage, | 1898 | argc = parse_options(argc, argv, sched_options, sched_usage, |
1911 | PARSE_OPT_STOP_AT_NON_OPTION); | 1899 | PARSE_OPT_STOP_AT_NON_OPTION); |
1912 | if (!argc) | 1900 | if (!argc) |
@@ -1922,26 +1910,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1922 | if (!strncmp(argv[0], "rec", 3)) { | 1910 | if (!strncmp(argv[0], "rec", 3)) { |
1923 | return __cmd_record(argc, argv); | 1911 | return __cmd_record(argc, argv); |
1924 | } else if (!strncmp(argv[0], "lat", 3)) { | 1912 | } else if (!strncmp(argv[0], "lat", 3)) { |
1925 | trace_handler = &lat_ops; | 1913 | sched.tp_handler = &lat_ops; |
1926 | if (argc > 1) { | 1914 | if (argc > 1) { |
1927 | argc = parse_options(argc, argv, latency_options, latency_usage, 0); | 1915 | argc = parse_options(argc, argv, latency_options, latency_usage, 0); |
1928 | if (argc) | 1916 | if (argc) |
1929 | usage_with_options(latency_usage, latency_options); | 1917 | usage_with_options(latency_usage, latency_options); |
1930 | } | 1918 | } |
1931 | setup_sorting(); | 1919 | setup_sorting(&sched, latency_options, latency_usage); |
1932 | return __cmd_lat(); | 1920 | return perf_sched__lat(&sched); |
1933 | } else if (!strcmp(argv[0], "map")) { | 1921 | } else if (!strcmp(argv[0], "map")) { |
1934 | trace_handler = &map_ops; | 1922 | sched.tp_handler = &map_ops; |
1935 | setup_sorting(); | 1923 | setup_sorting(&sched, latency_options, latency_usage); |
1936 | return __cmd_map(); | 1924 | return perf_sched__map(&sched); |
1937 | } else if (!strncmp(argv[0], "rep", 3)) { | 1925 | } else if (!strncmp(argv[0], "rep", 3)) { |
1938 | trace_handler = &replay_ops; | 1926 | sched.tp_handler = &replay_ops; |
1939 | if (argc) { | 1927 | if (argc) { |
1940 | argc = parse_options(argc, argv, replay_options, replay_usage, 0); | 1928 | argc = parse_options(argc, argv, replay_options, replay_usage, 0); |
1941 | if (argc) | 1929 | if (argc) |
1942 | usage_with_options(replay_usage, replay_options); | 1930 | usage_with_options(replay_usage, replay_options); |
1943 | } | 1931 | } |
1944 | return __cmd_replay(); | 1932 | return perf_sched__replay(&sched); |
1945 | } else { | 1933 | } else { |
1946 | usage_with_options(sched_usage, sched_options); | 1934 | usage_with_options(sched_usage, sched_options); |
1947 | } | 1935 | } |