aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-sched.c')
-rw-r--r--tools/perf/builtin-sched.c306
1 files changed, 142 insertions, 164 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index ea9c15c0cdfe..c9c68563e964 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,7 @@
11#include "util/trace-event.h" 11#include "util/trace-event.h"
12 12
13#include "util/debug.h" 13#include "util/debug.h"
14#include "util/data_map.h"
14 15
15#include <sys/types.h> 16#include <sys/types.h>
16#include <sys/prctl.h> 17#include <sys/prctl.h>
@@ -20,21 +21,20 @@
20#include <math.h> 21#include <math.h>
21 22
22static char const *input_name = "perf.data"; 23static char const *input_name = "perf.data";
23static int input;
24static unsigned long page_size;
25static unsigned long mmap_window = 32;
26 24
27static unsigned long total_comm = 0; 25static unsigned long total_comm = 0;
28 26
29static struct rb_root threads;
30static struct thread *last_match;
31
32static struct perf_header *header; 27static struct perf_header *header;
33static u64 sample_type; 28static u64 sample_type;
34 29
35static char default_sort_order[] = "avg, max, switch, runtime"; 30static char default_sort_order[] = "avg, max, switch, runtime";
36static char *sort_order = default_sort_order; 31static char *sort_order = default_sort_order;
37 32
33static int profile_cpu = -1;
34
35static char *cwd;
36static int cwdlen;
37
38#define PR_SET_NAME 15 /* Set process name */ 38#define PR_SET_NAME 15 /* Set process name */
39#define MAX_CPUS 4096 39#define MAX_CPUS 4096
40 40
@@ -74,6 +74,7 @@ enum sched_event_type {
74 SCHED_EVENT_RUN, 74 SCHED_EVENT_RUN,
75 SCHED_EVENT_SLEEP, 75 SCHED_EVENT_SLEEP,
76 SCHED_EVENT_WAKEUP, 76 SCHED_EVENT_WAKEUP,
77 SCHED_EVENT_MIGRATION,
77}; 78};
78 79
79struct sched_atom { 80struct sched_atom {
@@ -398,6 +399,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
398 ret = sem_post(atom->wait_sem); 399 ret = sem_post(atom->wait_sem);
399 BUG_ON(ret); 400 BUG_ON(ret);
400 break; 401 break;
402 case SCHED_EVENT_MIGRATION:
403 break;
401 default: 404 default:
402 BUG_ON(1); 405 BUG_ON(1);
403 } 406 }
@@ -635,9 +638,7 @@ static void test_calibrations(void)
635static int 638static int
636process_comm_event(event_t *event, unsigned long offset, unsigned long head) 639process_comm_event(event_t *event, unsigned long offset, unsigned long head)
637{ 640{
638 struct thread *thread; 641 struct thread *thread = threads__findnew(event->comm.tid);
639
640 thread = threads__findnew(event->comm.pid, &threads, &last_match);
641 642
642 dump_printf("%p [%p]: perf_event_comm: %s:%d\n", 643 dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
643 (void *)(offset + head), 644 (void *)(offset + head),
@@ -745,6 +746,22 @@ struct trace_fork_event {
745 u32 child_pid; 746 u32 child_pid;
746}; 747};
747 748
749struct trace_migrate_task_event {
750 u32 size;
751
752 u16 common_type;
753 u8 common_flags;
754 u8 common_preempt_count;
755 u32 common_pid;
756 u32 common_tgid;
757
758 char comm[16];
759 u32 pid;
760
761 u32 prio;
762 u32 cpu;
763};
764
748struct trace_sched_handler { 765struct trace_sched_handler {
749 void (*switch_event)(struct trace_switch_event *, 766 void (*switch_event)(struct trace_switch_event *,
750 struct event *, 767 struct event *,
@@ -769,6 +786,12 @@ struct trace_sched_handler {
769 int cpu, 786 int cpu,
770 u64 timestamp, 787 u64 timestamp,
771 struct thread *thread); 788 struct thread *thread);
789
790 void (*migrate_task_event)(struct trace_migrate_task_event *,
791 struct event *,
792 int cpu,
793 u64 timestamp,
794 struct thread *thread);
772}; 795};
773 796
774 797
@@ -1058,8 +1081,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
1058 die("hm, delta: %Ld < 0 ?\n", delta); 1081 die("hm, delta: %Ld < 0 ?\n", delta);
1059 1082
1060 1083
1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1084 sched_out = threads__findnew(switch_event->prev_pid);
1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1085 sched_in = threads__findnew(switch_event->next_pid);
1063 1086
1064 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1087 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1065 if (!out_events) { 1088 if (!out_events) {
@@ -1092,13 +1115,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
1092 u64 timestamp, 1115 u64 timestamp,
1093 struct thread *this_thread __used) 1116 struct thread *this_thread __used)
1094{ 1117{
1095 struct work_atoms *atoms; 1118 struct thread *thread = threads__findnew(runtime_event->pid);
1096 struct thread *thread; 1119 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1097 1120
1098 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1121 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1099
1100 thread = threads__findnew(runtime_event->pid, &threads, &last_match);
1101 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1102 if (!atoms) { 1122 if (!atoms) {
1103 thread_atoms_insert(thread); 1123 thread_atoms_insert(thread);
1104 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1124 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
@@ -1125,7 +1145,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1125 if (!wakeup_event->success) 1145 if (!wakeup_event->success)
1126 return; 1146 return;
1127 1147
1128 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); 1148 wakee = threads__findnew(wakeup_event->pid);
1129 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1149 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1130 if (!atoms) { 1150 if (!atoms) {
1131 thread_atoms_insert(wakee); 1151 thread_atoms_insert(wakee);
@@ -1139,7 +1159,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1139 1159
1140 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1160 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1141 1161
1142 if (atom->state != THREAD_SLEEPING) 1162 /*
1163 * You WILL be missing events if you've recorded only
1164 * one CPU, or are only looking at only one, so don't
1165 * make useless noise.
1166 */
1167 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1143 nr_state_machine_bugs++; 1168 nr_state_machine_bugs++;
1144 1169
1145 nr_timestamps++; 1170 nr_timestamps++;
@@ -1152,11 +1177,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1152 atom->wake_up_time = timestamp; 1177 atom->wake_up_time = timestamp;
1153} 1178}
1154 1179
1180static void
1181latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1182 struct event *__event __used,
1183 int cpu __used,
1184 u64 timestamp,
1185 struct thread *thread __used)
1186{
1187 struct work_atoms *atoms;
1188 struct work_atom *atom;
1189 struct thread *migrant;
1190
1191 /*
1192 * Only need to worry about migration when profiling one CPU.
1193 */
1194 if (profile_cpu == -1)
1195 return;
1196
1197 migrant = threads__findnew(migrate_task_event->pid);
1198 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1199 if (!atoms) {
1200 thread_atoms_insert(migrant);
1201 register_pid(migrant->pid, migrant->comm);
1202 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1203 if (!atoms)
1204 die("migration-event: Internal tree error");
1205 add_sched_out_event(atoms, 'R', timestamp);
1206 }
1207
1208 BUG_ON(list_empty(&atoms->work_list));
1209
1210 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1211 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
1212
1213 nr_timestamps++;
1214
1215 if (atom->sched_out_time > timestamp)
1216 nr_unordered_timestamps++;
1217}
1218
1155static struct trace_sched_handler lat_ops = { 1219static struct trace_sched_handler lat_ops = {
1156 .wakeup_event = latency_wakeup_event, 1220 .wakeup_event = latency_wakeup_event,
1157 .switch_event = latency_switch_event, 1221 .switch_event = latency_switch_event,
1158 .runtime_event = latency_runtime_event, 1222 .runtime_event = latency_runtime_event,
1159 .fork_event = latency_fork_event, 1223 .fork_event = latency_fork_event,
1224 .migrate_task_event = latency_migrate_task_event,
1160}; 1225};
1161 1226
1162static void output_lat_thread(struct work_atoms *work_list) 1227static void output_lat_thread(struct work_atoms *work_list)
@@ -1287,7 +1352,7 @@ static struct sort_dimension *available_sorts[] = {
1287 1352
1288static LIST_HEAD(sort_list); 1353static LIST_HEAD(sort_list);
1289 1354
1290static int sort_dimension__add(char *tok, struct list_head *list) 1355static int sort_dimension__add(const char *tok, struct list_head *list)
1291{ 1356{
1292 int i; 1357 int i;
1293 1358
@@ -1385,8 +1450,8 @@ map_switch_event(struct trace_switch_event *switch_event,
1385 die("hm, delta: %Ld < 0 ?\n", delta); 1450 die("hm, delta: %Ld < 0 ?\n", delta);
1386 1451
1387 1452
1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1453 sched_out = threads__findnew(switch_event->prev_pid);
1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1454 sched_in = threads__findnew(switch_event->next_pid);
1390 1455
1391 curr_thread[this_cpu] = sched_in; 1456 curr_thread[this_cpu] = sched_in;
1392 1457
@@ -1517,6 +1582,26 @@ process_sched_exit_event(struct event *event,
1517} 1582}
1518 1583
1519static void 1584static void
1585process_sched_migrate_task_event(struct raw_event_sample *raw,
1586 struct event *event,
1587 int cpu __used,
1588 u64 timestamp __used,
1589 struct thread *thread __used)
1590{
1591 struct trace_migrate_task_event migrate_task_event;
1592
1593 FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
1594
1595 FILL_ARRAY(migrate_task_event, comm, event, raw->data);
1596 FILL_FIELD(migrate_task_event, pid, event, raw->data);
1597 FILL_FIELD(migrate_task_event, prio, event, raw->data);
1598 FILL_FIELD(migrate_task_event, cpu, event, raw->data);
1599
1600 if (trace_handler->migrate_task_event)
1601 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
1602}
1603
1604static void
1520process_raw_event(event_t *raw_event __used, void *more_data, 1605process_raw_event(event_t *raw_event __used, void *more_data,
1521 int cpu, u64 timestamp, struct thread *thread) 1606 int cpu, u64 timestamp, struct thread *thread)
1522{ 1607{
@@ -1539,23 +1624,24 @@ process_raw_event(event_t *raw_event __used, void *more_data,
1539 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1624 process_sched_fork_event(raw, event, cpu, timestamp, thread);
1540 if (!strcmp(event->name, "sched_process_exit")) 1625 if (!strcmp(event->name, "sched_process_exit"))
1541 process_sched_exit_event(event, cpu, timestamp, thread); 1626 process_sched_exit_event(event, cpu, timestamp, thread);
1627 if (!strcmp(event->name, "sched_migrate_task"))
1628 process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
1542} 1629}
1543 1630
1544static int 1631static int
1545process_sample_event(event_t *event, unsigned long offset, unsigned long head) 1632process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1546{ 1633{
1547 char level;
1548 int show = 0;
1549 struct dso *dso = NULL;
1550 struct thread *thread; 1634 struct thread *thread;
1551 u64 ip = event->ip.ip; 1635 u64 ip = event->ip.ip;
1552 u64 timestamp = -1; 1636 u64 timestamp = -1;
1553 u32 cpu = -1; 1637 u32 cpu = -1;
1554 u64 period = 1; 1638 u64 period = 1;
1555 void *more_data = event->ip.__more_data; 1639 void *more_data = event->ip.__more_data;
1556 int cpumode;
1557 1640
1558 thread = threads__findnew(event->ip.pid, &threads, &last_match); 1641 if (!(sample_type & PERF_SAMPLE_RAW))
1642 return 0;
1643
1644 thread = threads__findnew(event->ip.pid);
1559 1645
1560 if (sample_type & PERF_SAMPLE_TIME) { 1646 if (sample_type & PERF_SAMPLE_TIME) {
1561 timestamp = *(u64 *)more_data; 1647 timestamp = *(u64 *)more_data;
@@ -1589,161 +1675,52 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1589 return -1; 1675 return -1;
1590 } 1676 }
1591 1677
1592 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1678 if (profile_cpu != -1 && profile_cpu != (int) cpu)
1593 1679 return 0;
1594 if (cpumode == PERF_RECORD_MISC_KERNEL) {
1595 show = SHOW_KERNEL;
1596 level = 'k';
1597
1598 dso = kernel_dso;
1599
1600 dump_printf(" ...... dso: %s\n", dso->name);
1601
1602 } else if (cpumode == PERF_RECORD_MISC_USER) {
1603
1604 show = SHOW_USER;
1605 level = '.';
1606
1607 } else {
1608 show = SHOW_HV;
1609 level = 'H';
1610
1611 dso = hypervisor_dso;
1612
1613 dump_printf(" ...... dso: [hypervisor]\n");
1614 }
1615 1680
1616 if (sample_type & PERF_SAMPLE_RAW) 1681 process_raw_event(event, more_data, cpu, timestamp, thread);
1617 process_raw_event(event, more_data, cpu, timestamp, thread);
1618 1682
1619 return 0; 1683 return 0;
1620} 1684}
1621 1685
1622static int 1686static int
1623process_event(event_t *event, unsigned long offset, unsigned long head) 1687process_lost_event(event_t *event __used,
1688 unsigned long offset __used,
1689 unsigned long head __used)
1624{ 1690{
1625 trace_event(event); 1691 nr_lost_chunks++;
1692 nr_lost_events += event->lost.lost;
1626 1693
1627 nr_events++; 1694 return 0;
1628 switch (event->header.type) { 1695}
1629 case PERF_RECORD_MMAP:
1630 return 0;
1631 case PERF_RECORD_LOST:
1632 nr_lost_chunks++;
1633 nr_lost_events += event->lost.lost;
1634 return 0;
1635
1636 case PERF_RECORD_COMM:
1637 return process_comm_event(event, offset, head);
1638
1639 case PERF_RECORD_EXIT ... PERF_RECORD_READ:
1640 return 0;
1641 1696
1642 case PERF_RECORD_SAMPLE: 1697static int sample_type_check(u64 type)
1643 return process_sample_event(event, offset, head); 1698{
1699 sample_type = type;
1644 1700
1645 case PERF_RECORD_MAX: 1701 if (!(sample_type & PERF_SAMPLE_RAW)) {
1646 default: 1702 fprintf(stderr,
1703 "No trace sample to read. Did you call perf record "
1704 "without -R?");
1647 return -1; 1705 return -1;
1648 } 1706 }
1649 1707
1650 return 0; 1708 return 0;
1651} 1709}
1652 1710
1711static struct perf_file_handler file_handler = {
1712 .process_sample_event = process_sample_event,
1713 .process_comm_event = process_comm_event,
1714 .process_lost_event = process_lost_event,
1715 .sample_type_check = sample_type_check,
1716};
1717
1653static int read_events(void) 1718static int read_events(void)
1654{ 1719{
1655 int ret, rc = EXIT_FAILURE; 1720 register_idle_thread();
1656 unsigned long offset = 0; 1721 register_perf_file_handler(&file_handler);
1657 unsigned long head = 0;
1658 struct stat perf_stat;
1659 event_t *event;
1660 uint32_t size;
1661 char *buf;
1662
1663 trace_report();
1664 register_idle_thread(&threads, &last_match);
1665
1666 input = open(input_name, O_RDONLY);
1667 if (input < 0) {
1668 perror("failed to open file");
1669 exit(-1);
1670 }
1671
1672 ret = fstat(input, &perf_stat);
1673 if (ret < 0) {
1674 perror("failed to stat file");
1675 exit(-1);
1676 }
1677
1678 if (!perf_stat.st_size) {
1679 fprintf(stderr, "zero-sized file, nothing to do!\n");
1680 exit(0);
1681 }
1682 header = perf_header__read(input);
1683 head = header->data_offset;
1684 sample_type = perf_header__sample_type(header);
1685
1686 if (!(sample_type & PERF_SAMPLE_RAW))
1687 die("No trace sample to read. Did you call perf record "
1688 "without -R?");
1689
1690 if (load_kernel() < 0) {
1691 perror("failed to load kernel symbols");
1692 return EXIT_FAILURE;
1693 }
1694
1695remap:
1696 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1697 MAP_SHARED, input, offset);
1698 if (buf == MAP_FAILED) {
1699 perror("failed to mmap file");
1700 exit(-1);
1701 }
1702
1703more:
1704 event = (event_t *)(buf + head);
1705
1706 size = event->header.size;
1707 if (!size)
1708 size = 8;
1709
1710 if (head + event->header.size >= page_size * mmap_window) {
1711 unsigned long shift = page_size * (head / page_size);
1712 int res;
1713
1714 res = munmap(buf, page_size * mmap_window);
1715 assert(res == 0);
1716
1717 offset += shift;
1718 head -= shift;
1719 goto remap;
1720 }
1721
1722 size = event->header.size;
1723
1724
1725 if (!size || process_event(event, offset, head) < 0) {
1726
1727 /*
1728 * assume we lost track of the stream, check alignment, and
1729 * increment a single u64 in the hope to catch on again 'soon'.
1730 */
1731
1732 if (unlikely(head & 7))
1733 head &= ~7ULL;
1734
1735 size = 8;
1736 }
1737
1738 head += size;
1739
1740 if (offset + head < (unsigned long)perf_stat.st_size)
1741 goto more;
1742
1743 rc = EXIT_SUCCESS;
1744 close(input);
1745 1722
1746 return rc; 1723 return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
1747} 1724}
1748 1725
1749static void print_bad_events(void) 1726static void print_bad_events(void)
@@ -1883,6 +1860,8 @@ static const struct option latency_options[] = {
1883 "sort by key(s): runtime, switch, avg, max"), 1860 "sort by key(s): runtime, switch, avg, max"),
1884 OPT_BOOLEAN('v', "verbose", &verbose, 1861 OPT_BOOLEAN('v', "verbose", &verbose,
1885 "be more verbose (show symbol address, etc)"), 1862 "be more verbose (show symbol address, etc)"),
1863 OPT_INTEGER('C', "CPU", &profile_cpu,
1864 "CPU to profile on"),
1886 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1865 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1887 "dump raw trace in ASCII"), 1866 "dump raw trace in ASCII"),
1888 OPT_END() 1867 OPT_END()
@@ -1917,7 +1896,7 @@ static void setup_sorting(void)
1917 1896
1918 free(str); 1897 free(str);
1919 1898
1920 sort_dimension__add((char *)"pid", &cmp_pid); 1899 sort_dimension__add("pid", &cmp_pid);
1921} 1900}
1922 1901
1923static const char *record_args[] = { 1902static const char *record_args[] = {
@@ -1961,7 +1940,6 @@ static int __cmd_record(int argc, const char **argv)
1961int cmd_sched(int argc, const char **argv, const char *prefix __used) 1940int cmd_sched(int argc, const char **argv, const char *prefix __used)
1962{ 1941{
1963 symbol__init(); 1942 symbol__init();
1964 page_size = getpagesize();
1965 1943
1966 argc = parse_options(argc, argv, sched_options, sched_usage, 1944 argc = parse_options(argc, argv, sched_options, sched_usage,
1967 PARSE_OPT_STOP_AT_NON_OPTION); 1945 PARSE_OPT_STOP_AT_NON_OPTION);