summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-16 11:40:48 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-16 10:41:30 -0400
commit0ec04e16d08b69d8da46abbcfa3e3f2cd9738852 (patch)
tree8a63209fd4f45ff1c15a89fd0e3aa5dbdbe58b33
parent80ed0987f363d7eb50193df3e6f6d71451f74bc3 (diff)
perf sched: Add 'perf sched map' scheduling event map printout
This prints a textual context-switching outline of workload captured via perf sched record. For example, on a 16 CPU box it outputs: N1 O1 . . . S1 . . . B0 . *I0 C1 . M1 . 23002.773423 secs N1 O1 . *Q0 . S1 . . . B0 . I0 C1 . M1 . 23002.773423 secs N1 O1 . Q0 . S1 . . . B0 . *R1 C1 . M1 . 23002.773485 secs N1 O1 . Q0 . S1 . *S0 . B0 . R1 C1 . M1 . 23002.773478 secs *L0 O1 . Q0 . S1 . S0 . B0 . R1 C1 . M1 . 23002.773523 secs L0 O1 . *. . S1 . S0 . B0 . R1 C1 . M1 . 23002.773531 secs L0 O1 . . . S1 . S0 . B0 . R1 C1 *T1 M1 . 23002.773547 secs T1 => irqbalance:2089 L0 O1 . . . S1 . S0 . *P0 . R1 C1 T1 M1 . 23002.773549 secs *N1 O1 . . . S1 . S0 . P0 . R1 C1 T1 M1 . 23002.773566 secs N1 O1 . . . *J0 . S0 . P0 . R1 C1 T1 M1 . 23002.773571 secs N1 O1 . . . J0 . S0 *B0 P0 . R1 C1 T1 M1 . 23002.773592 secs N1 O1 . . . J0 . *U0 B0 P0 . R1 C1 T1 M1 . 23002.773582 secs N1 O1 . . . *S1 . U0 B0 P0 . R1 C1 T1 M1 . 23002.773604 secs N1 O1 . . . S1 . U0 B0 *. . R1 C1 T1 M1 . 23002.773615 secs N1 O1 . . . S1 . U0 B0 . . *K0 C1 T1 M1 . 23002.773631 secs N1 O1 . *M0 . S1 . U0 B0 . . K0 C1 T1 M1 . 23002.773624 secs N1 O1 . M0 . S1 . U0 *. . . K0 C1 T1 M1 . 23002.773644 secs N1 O1 . M0 . S1 . U0 . . . *R1 C1 T1 M1 . 23002.773662 secs N1 O1 . M0 . S1 . *. . . . R1 C1 T1 M1 . 23002.773648 secs N1 O1 . *. . S1 . . . . . R1 C1 T1 M1 . 23002.773680 secs N1 O1 . . . *L0 . . . . . R1 C1 T1 M1 . 23002.773717 secs *N0 O1 . . . L0 . . . . . R1 C1 T1 M1 . 23002.773709 secs *N1 O1 . . . L0 . . . . . R1 C1 T1 M1 . 23002.773747 secs Columns stand for individual CPUs, from CPU0 to CPU15, and the two-letter shortcuts stand for tasks that are running on a CPU. '*' denotes the CPU that had the event. A dot signals an idle CPU. New tasks are assigned new two-letter shortcuts - when they occur first they are printed. In the above example 'T1' stood for irqbalance: T1 => irqbalance:2089 Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--tools/perf/builtin-sched.c312
-rw-r--r--tools/perf/util/thread.c2
-rw-r--r--tools/perf/util/thread.h1
3 files changed, 214 insertions, 101 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index da8f67483ae7..f67e351b050b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -159,8 +159,6 @@ static struct rb_root atom_root, sorted_atom_root;
159static u64 all_runtime; 159static u64 all_runtime;
160static u64 all_count; 160static u64 all_count;
161 161
162static int read_events(void);
163
164 162
165static u64 get_nsecs(void) 163static u64 get_nsecs(void)
166{ 164{
@@ -634,38 +632,6 @@ static void test_calibrations(void)
634 printf("the sleep test took %Ld nsecs\n", T1-T0); 632 printf("the sleep test took %Ld nsecs\n", T1-T0);
635} 633}
636 634
637static void __cmd_replay(void)
638{
639 unsigned long i;
640
641 calibrate_run_measurement_overhead();
642 calibrate_sleep_measurement_overhead();
643
644 test_calibrations();
645
646 read_events();
647
648 printf("nr_run_events: %ld\n", nr_run_events);
649 printf("nr_sleep_events: %ld\n", nr_sleep_events);
650 printf("nr_wakeup_events: %ld\n", nr_wakeup_events);
651
652 if (targetless_wakeups)
653 printf("target-less wakeups: %ld\n", targetless_wakeups);
654 if (multitarget_wakeups)
655 printf("multi-target wakeups: %ld\n", multitarget_wakeups);
656 if (nr_run_events_optimized)
657 printf("run atoms optimized: %ld\n",
658 nr_run_events_optimized);
659
660 print_task_traces();
661 add_cross_task_wakeups();
662
663 create_tasks();
664 printf("------------------------------------------------------------\n");
665 for (i = 0; i < replay_repeat; i++)
666 run_one_test();
667}
668
669static int 635static int
670process_comm_event(event_t *event, unsigned long offset, unsigned long head) 636process_comm_event(event_t *event, unsigned long offset, unsigned long head)
671{ 637{
@@ -1354,64 +1320,6 @@ static void sort_lat(void)
1354 } 1320 }
1355} 1321}
1356 1322
1357static void __cmd_lat(void)
1358{
1359 struct rb_node *next;
1360
1361 setup_pager();
1362 read_events();
1363 sort_lat();
1364
1365 printf("\n -----------------------------------------------------------------------------------------\n");
1366 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
1367 printf(" -----------------------------------------------------------------------------------------\n");
1368
1369 next = rb_first(&sorted_atom_root);
1370
1371 while (next) {
1372 struct work_atoms *work_list;
1373
1374 work_list = rb_entry(next, struct work_atoms, node);
1375 output_lat_thread(work_list);
1376 next = rb_next(next);
1377 }
1378
1379 printf(" -----------------------------------------------------------------------------------------\n");
1380 printf(" TOTAL: |%11.3f ms |%9Ld |\n",
1381 (double)all_runtime/1e6, all_count);
1382
1383 printf(" ---------------------------------------------------\n");
1384 if (nr_unordered_timestamps && nr_timestamps) {
1385 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
1386 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
1387 nr_unordered_timestamps, nr_timestamps);
1388 } else {
1389 }
1390 if (nr_lost_events && nr_events) {
1391 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
1392 (double)nr_lost_events/(double)nr_events*100.0,
1393 nr_lost_events, nr_events, nr_lost_chunks);
1394 }
1395 if (nr_state_machine_bugs && nr_timestamps) {
1396 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
1397 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
1398 nr_state_machine_bugs, nr_timestamps);
1399 if (nr_lost_events)
1400 printf(" (due to lost events?)");
1401 printf("\n");
1402 }
1403 if (nr_context_switch_bugs && nr_timestamps) {
1404 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
1405 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
1406 nr_context_switch_bugs, nr_timestamps);
1407 if (nr_lost_events)
1408 printf(" (due to lost events?)");
1409 printf("\n");
1410 }
1411 printf("\n");
1412
1413}
1414
1415static struct trace_sched_handler *trace_handler; 1323static struct trace_sched_handler *trace_handler;
1416 1324
1417static void 1325static void
@@ -1431,19 +1339,106 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
1431 FILL_FIELD(wakeup_event, success, event, raw->data); 1339 FILL_FIELD(wakeup_event, success, event, raw->data);
1432 FILL_FIELD(wakeup_event, cpu, event, raw->data); 1340 FILL_FIELD(wakeup_event, cpu, event, raw->data);
1433 1341
1434 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); 1342 if (trace_handler->wakeup_event)
1343 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
1435} 1344}
1436 1345
1437/* 1346/*
1438 * Track the current task - that way we can know whether there's any 1347 * Track the current task - that way we can know whether there's any
1439 * weird events, such as a task being switched away that is not current. 1348 * weird events, such as a task being switched away that is not current.
1440 */ 1349 */
1350static int max_cpu = 15;
1351
1441static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1352static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
1442 1353
1354static struct thread *curr_thread[MAX_CPUS];
1355
1356static char next_shortname1 = 'A';
1357static char next_shortname2 = '0';
1358
1359static void
1360map_switch_event(struct trace_switch_event *switch_event,
1361 struct event *event __used,
1362 int this_cpu,
1363 u64 timestamp,
1364 struct thread *thread __used)
1365{
1366 struct thread *sched_out, *sched_in;
1367 int new_shortname;
1368 u64 timestamp0;
1369 s64 delta;
1370 int cpu;
1371
1372 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
1373
1374 if (this_cpu > max_cpu)
1375 max_cpu = this_cpu;
1376
1377 timestamp0 = cpu_last_switched[this_cpu];
1378 cpu_last_switched[this_cpu] = timestamp;
1379 if (timestamp0)
1380 delta = timestamp - timestamp0;
1381 else
1382 delta = 0;
1383
1384 if (delta < 0)
1385 die("hm, delta: %Ld < 0 ?\n", delta);
1386
1387
1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
1390
1391 curr_thread[this_cpu] = sched_in;
1392
1393 printf(" ");
1394
1395 new_shortname = 0;
1396 if (!sched_in->shortname[0]) {
1397 sched_in->shortname[0] = next_shortname1;
1398 sched_in->shortname[1] = next_shortname2;
1399
1400 if (next_shortname1 < 'Z') {
1401 next_shortname1++;
1402 } else {
1403 next_shortname1='A';
1404 if (next_shortname2 < '9') {
1405 next_shortname2++;
1406 } else {
1407 next_shortname2='0';
1408 }
1409 }
1410 new_shortname = 1;
1411 }
1412
1413 for (cpu = 0; cpu <= max_cpu; cpu++) {
1414 if (cpu != this_cpu)
1415 printf(" ");
1416 else
1417 printf("*");
1418
1419 if (curr_thread[cpu]) {
1420 if (curr_thread[cpu]->pid)
1421 printf("%2s ", curr_thread[cpu]->shortname);
1422 else
1423 printf(". ");
1424 } else
1425 printf(" ");
1426 }
1427
1428 printf(" %12.6f secs ", (double)timestamp/1e9);
1429 if (new_shortname) {
1430 printf("%s => %s:%d\n",
1431 sched_in->shortname, sched_in->comm, sched_in->pid);
1432 } else {
1433 printf("\n");
1434 }
1435}
1436
1437
1443static void 1438static void
1444process_sched_switch_event(struct raw_event_sample *raw, 1439process_sched_switch_event(struct raw_event_sample *raw,
1445 struct event *event, 1440 struct event *event,
1446 int cpu, 1441 int this_cpu,
1447 u64 timestamp __used, 1442 u64 timestamp __used,
1448 struct thread *thread __used) 1443 struct thread *thread __used)
1449{ 1444{
@@ -1459,17 +1454,18 @@ process_sched_switch_event(struct raw_event_sample *raw,
1459 FILL_FIELD(switch_event, next_pid, event, raw->data); 1454 FILL_FIELD(switch_event, next_pid, event, raw->data);
1460 FILL_FIELD(switch_event, next_prio, event, raw->data); 1455 FILL_FIELD(switch_event, next_prio, event, raw->data);
1461 1456
1462 if (curr_pid[cpu] != (u32)-1) { 1457 if (curr_pid[this_cpu] != (u32)-1) {
1463 /* 1458 /*
1464 * Are we trying to switch away a PID that is 1459 * Are we trying to switch away a PID that is
1465 * not current? 1460 * not current?
1466 */ 1461 */
1467 if (curr_pid[cpu] != switch_event.prev_pid) 1462 if (curr_pid[this_cpu] != switch_event.prev_pid)
1468 nr_context_switch_bugs++; 1463 nr_context_switch_bugs++;
1469 } 1464 }
1470 curr_pid[cpu] = switch_event.next_pid; 1465 if (trace_handler->switch_event)
1466 trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread);
1471 1467
1472 trace_handler->switch_event(&switch_event, event, cpu, timestamp, thread); 1468 curr_pid[this_cpu] = switch_event.next_pid;
1473} 1469}
1474 1470
1475static void 1471static void
@@ -1486,7 +1482,8 @@ process_sched_runtime_event(struct raw_event_sample *raw,
1486 FILL_FIELD(runtime_event, runtime, event, raw->data); 1482 FILL_FIELD(runtime_event, runtime, event, raw->data);
1487 FILL_FIELD(runtime_event, vruntime, event, raw->data); 1483 FILL_FIELD(runtime_event, vruntime, event, raw->data);
1488 1484
1489 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); 1485 if (trace_handler->runtime_event)
1486 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
1490} 1487}
1491 1488
1492static void 1489static void
@@ -1505,7 +1502,8 @@ process_sched_fork_event(struct raw_event_sample *raw,
1505 FILL_ARRAY(fork_event, child_comm, event, raw->data); 1502 FILL_ARRAY(fork_event, child_comm, event, raw->data);
1506 FILL_FIELD(fork_event, child_pid, event, raw->data); 1503 FILL_FIELD(fork_event, child_pid, event, raw->data);
1507 1504
1508 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); 1505 if (trace_handler->fork_event)
1506 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
1509} 1507}
1510 1508
1511static void 1509static void
@@ -1748,6 +1746,116 @@ more:
1748 return rc; 1746 return rc;
1749} 1747}
1750 1748
1749static void print_bad_events(void)
1750{
1751 if (nr_unordered_timestamps && nr_timestamps) {
1752 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
1753 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
1754 nr_unordered_timestamps, nr_timestamps);
1755 }
1756 if (nr_lost_events && nr_events) {
1757 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
1758 (double)nr_lost_events/(double)nr_events*100.0,
1759 nr_lost_events, nr_events, nr_lost_chunks);
1760 }
1761 if (nr_state_machine_bugs && nr_timestamps) {
1762 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
1763 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
1764 nr_state_machine_bugs, nr_timestamps);
1765 if (nr_lost_events)
1766 printf(" (due to lost events?)");
1767 printf("\n");
1768 }
1769 if (nr_context_switch_bugs && nr_timestamps) {
1770 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
1771 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
1772 nr_context_switch_bugs, nr_timestamps);
1773 if (nr_lost_events)
1774 printf(" (due to lost events?)");
1775 printf("\n");
1776 }
1777}
1778
1779static void __cmd_lat(void)
1780{
1781 struct rb_node *next;
1782
1783 setup_pager();
1784 read_events();
1785 sort_lat();
1786
1787 printf("\n -----------------------------------------------------------------------------------------\n");
1788 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
1789 printf(" -----------------------------------------------------------------------------------------\n");
1790
1791 next = rb_first(&sorted_atom_root);
1792
1793 while (next) {
1794 struct work_atoms *work_list;
1795
1796 work_list = rb_entry(next, struct work_atoms, node);
1797 output_lat_thread(work_list);
1798 next = rb_next(next);
1799 }
1800
1801 printf(" -----------------------------------------------------------------------------------------\n");
1802 printf(" TOTAL: |%11.3f ms |%9Ld |\n",
1803 (double)all_runtime/1e6, all_count);
1804
1805 printf(" ---------------------------------------------------\n");
1806
1807 print_bad_events();
1808 printf("\n");
1809
1810}
1811
1812static struct trace_sched_handler map_ops = {
1813 .wakeup_event = NULL,
1814 .switch_event = map_switch_event,
1815 .runtime_event = NULL,
1816 .fork_event = NULL,
1817};
1818
1819static void __cmd_map(void)
1820{
1821 setup_pager();
1822 read_events();
1823 print_bad_events();
1824}
1825
1826static void __cmd_replay(void)
1827{
1828 unsigned long i;
1829
1830 calibrate_run_measurement_overhead();
1831 calibrate_sleep_measurement_overhead();
1832
1833 test_calibrations();
1834
1835 read_events();
1836
1837 printf("nr_run_events: %ld\n", nr_run_events);
1838 printf("nr_sleep_events: %ld\n", nr_sleep_events);
1839 printf("nr_wakeup_events: %ld\n", nr_wakeup_events);
1840
1841 if (targetless_wakeups)
1842 printf("target-less wakeups: %ld\n", targetless_wakeups);
1843 if (multitarget_wakeups)
1844 printf("multi-target wakeups: %ld\n", multitarget_wakeups);
1845 if (nr_run_events_optimized)
1846 printf("run atoms optimized: %ld\n",
1847 nr_run_events_optimized);
1848
1849 print_task_traces();
1850 add_cross_task_wakeups();
1851
1852 create_tasks();
1853 printf("------------------------------------------------------------\n");
1854 for (i = 0; i < replay_repeat; i++)
1855 run_one_test();
1856}
1857
1858
1751static const char * const sched_usage[] = { 1859static const char * const sched_usage[] = {
1752 "perf sched [<options>] {record|latency|replay|trace}", 1860 "perf sched [<options>] {record|latency|replay|trace}",
1753 NULL 1861 NULL
@@ -1867,6 +1975,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1867 } 1975 }
1868 setup_sorting(); 1976 setup_sorting();
1869 __cmd_lat(); 1977 __cmd_lat();
1978 } else if (!strcmp(argv[0], "map")) {
1979 trace_handler = &map_ops;
1980 setup_sorting();
1981 __cmd_map();
1870 } else if (!strncmp(argv[0], "rep", 3)) { 1982 } else if (!strncmp(argv[0], "rep", 3)) {
1871 trace_handler = &replay_ops; 1983 trace_handler = &replay_ops;
1872 if (argc) { 1984 if (argc) {
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 12c4341078f9..45efb5db0d19 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -8,7 +8,7 @@
8 8
9static struct thread *thread__new(pid_t pid) 9static struct thread *thread__new(pid_t pid)
10{ 10{
11 struct thread *self = malloc(sizeof(*self)); 11 struct thread *self = calloc(1, sizeof(*self));
12 12
13 if (self != NULL) { 13 if (self != NULL) {
14 self->pid = pid; 14 self->pid = pid;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 665d1f3dc977..32aea3c1c2ad 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -7,6 +7,7 @@ struct thread {
7 struct rb_node rb_node; 7 struct rb_node rb_node;
8 struct list_head maps; 8 struct list_head maps;
9 pid_t pid; 9 pid_t pid;
10 char shortname[3];
10 char *comm; 11 char *comm;
11}; 12};
12 13