aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/builtin-sched.c')
-rw-r--r--tools/perf/builtin-sched.c312
1 files changed, 144 insertions, 168 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index ce2d5be4f30e..df44b756cecc 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,7 @@
11#include "util/trace-event.h" 11#include "util/trace-event.h"
12 12
13#include "util/debug.h" 13#include "util/debug.h"
14#include "util/data_map.h"
14 15
15#include <sys/types.h> 16#include <sys/types.h>
16#include <sys/prctl.h> 17#include <sys/prctl.h>
@@ -20,26 +21,23 @@
20#include <math.h> 21#include <math.h>
21 22
22static char const *input_name = "perf.data"; 23static char const *input_name = "perf.data";
23static int input;
24static unsigned long page_size;
25static unsigned long mmap_window = 32;
26 24
27static unsigned long total_comm = 0; 25static unsigned long total_comm = 0;
28 26
29static struct rb_root threads;
30static struct thread *last_match;
31
32static struct perf_header *header; 27static struct perf_header *header;
33static u64 sample_type; 28static u64 sample_type;
34 29
35static char default_sort_order[] = "avg, max, switch, runtime"; 30static char default_sort_order[] = "avg, max, switch, runtime";
36static char *sort_order = default_sort_order; 31static char *sort_order = default_sort_order;
37 32
33static int profile_cpu = -1;
34
35static char *cwd;
36static int cwdlen;
37
38#define PR_SET_NAME 15 /* Set process name */ 38#define PR_SET_NAME 15 /* Set process name */
39#define MAX_CPUS 4096 39#define MAX_CPUS 4096
40 40
41#define BUG_ON(x) assert(!(x))
42
43static u64 run_measurement_overhead; 41static u64 run_measurement_overhead;
44static u64 sleep_measurement_overhead; 42static u64 sleep_measurement_overhead;
45 43
@@ -74,6 +72,7 @@ enum sched_event_type {
74 SCHED_EVENT_RUN, 72 SCHED_EVENT_RUN,
75 SCHED_EVENT_SLEEP, 73 SCHED_EVENT_SLEEP,
76 SCHED_EVENT_WAKEUP, 74 SCHED_EVENT_WAKEUP,
75 SCHED_EVENT_MIGRATION,
77}; 76};
78 77
79struct sched_atom { 78struct sched_atom {
@@ -398,6 +397,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
398 ret = sem_post(atom->wait_sem); 397 ret = sem_post(atom->wait_sem);
399 BUG_ON(ret); 398 BUG_ON(ret);
400 break; 399 break;
400 case SCHED_EVENT_MIGRATION:
401 break;
401 default: 402 default:
402 BUG_ON(1); 403 BUG_ON(1);
403 } 404 }
@@ -635,9 +636,7 @@ static void test_calibrations(void)
635static int 636static int
636process_comm_event(event_t *event, unsigned long offset, unsigned long head) 637process_comm_event(event_t *event, unsigned long offset, unsigned long head)
637{ 638{
638 struct thread *thread; 639 struct thread *thread = threads__findnew(event->comm.tid);
639
640 thread = threads__findnew(event->comm.pid, &threads, &last_match);
641 640
642 dump_printf("%p [%p]: perf_event_comm: %s:%d\n", 641 dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
643 (void *)(offset + head), 642 (void *)(offset + head),
@@ -745,6 +744,22 @@ struct trace_fork_event {
745 u32 child_pid; 744 u32 child_pid;
746}; 745};
747 746
747struct trace_migrate_task_event {
748 u32 size;
749
750 u16 common_type;
751 u8 common_flags;
752 u8 common_preempt_count;
753 u32 common_pid;
754 u32 common_tgid;
755
756 char comm[16];
757 u32 pid;
758
759 u32 prio;
760 u32 cpu;
761};
762
748struct trace_sched_handler { 763struct trace_sched_handler {
749 void (*switch_event)(struct trace_switch_event *, 764 void (*switch_event)(struct trace_switch_event *,
750 struct event *, 765 struct event *,
@@ -769,6 +784,12 @@ struct trace_sched_handler {
769 int cpu, 784 int cpu,
770 u64 timestamp, 785 u64 timestamp,
771 struct thread *thread); 786 struct thread *thread);
787
788 void (*migrate_task_event)(struct trace_migrate_task_event *,
789 struct event *,
790 int cpu,
791 u64 timestamp,
792 struct thread *thread);
772}; 793};
773 794
774 795
@@ -1058,8 +1079,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
1058 die("hm, delta: %Ld < 0 ?\n", delta); 1079 die("hm, delta: %Ld < 0 ?\n", delta);
1059 1080
1060 1081
1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1082 sched_out = threads__findnew(switch_event->prev_pid);
1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1083 sched_in = threads__findnew(switch_event->next_pid);
1063 1084
1064 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1085 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1065 if (!out_events) { 1086 if (!out_events) {
@@ -1092,13 +1113,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
1092 u64 timestamp, 1113 u64 timestamp,
1093 struct thread *this_thread __used) 1114 struct thread *this_thread __used)
1094{ 1115{
1095 struct work_atoms *atoms; 1116 struct thread *thread = threads__findnew(runtime_event->pid);
1096 struct thread *thread; 1117 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1097 1118
1098 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1119 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1099
1100 thread = threads__findnew(runtime_event->pid, &threads, &last_match);
1101 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1102 if (!atoms) { 1120 if (!atoms) {
1103 thread_atoms_insert(thread); 1121 thread_atoms_insert(thread);
1104 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1122 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
@@ -1125,7 +1143,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1125 if (!wakeup_event->success) 1143 if (!wakeup_event->success)
1126 return; 1144 return;
1127 1145
1128 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); 1146 wakee = threads__findnew(wakeup_event->pid);
1129 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1147 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1130 if (!atoms) { 1148 if (!atoms) {
1131 thread_atoms_insert(wakee); 1149 thread_atoms_insert(wakee);
@@ -1139,7 +1157,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1139 1157
1140 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1158 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1141 1159
1142 if (atom->state != THREAD_SLEEPING) 1160 /*
1161 * You WILL be missing events if you've recorded only
1162 * one CPU, or are only looking at only one, so don't
1163 * make useless noise.
1164 */
1165 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1143 nr_state_machine_bugs++; 1166 nr_state_machine_bugs++;
1144 1167
1145 nr_timestamps++; 1168 nr_timestamps++;
@@ -1152,11 +1175,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1152 atom->wake_up_time = timestamp; 1175 atom->wake_up_time = timestamp;
1153} 1176}
1154 1177
1178static void
1179latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1180 struct event *__event __used,
1181 int cpu __used,
1182 u64 timestamp,
1183 struct thread *thread __used)
1184{
1185 struct work_atoms *atoms;
1186 struct work_atom *atom;
1187 struct thread *migrant;
1188
1189 /*
1190 * Only need to worry about migration when profiling one CPU.
1191 */
1192 if (profile_cpu == -1)
1193 return;
1194
1195 migrant = threads__findnew(migrate_task_event->pid);
1196 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1197 if (!atoms) {
1198 thread_atoms_insert(migrant);
1199 register_pid(migrant->pid, migrant->comm);
1200 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1201 if (!atoms)
1202 die("migration-event: Internal tree error");
1203 add_sched_out_event(atoms, 'R', timestamp);
1204 }
1205
1206 BUG_ON(list_empty(&atoms->work_list));
1207
1208 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1209 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
1210
1211 nr_timestamps++;
1212
1213 if (atom->sched_out_time > timestamp)
1214 nr_unordered_timestamps++;
1215}
1216
1155static struct trace_sched_handler lat_ops = { 1217static struct trace_sched_handler lat_ops = {
1156 .wakeup_event = latency_wakeup_event, 1218 .wakeup_event = latency_wakeup_event,
1157 .switch_event = latency_switch_event, 1219 .switch_event = latency_switch_event,
1158 .runtime_event = latency_runtime_event, 1220 .runtime_event = latency_runtime_event,
1159 .fork_event = latency_fork_event, 1221 .fork_event = latency_fork_event,
1222 .migrate_task_event = latency_migrate_task_event,
1160}; 1223};
1161 1224
1162static void output_lat_thread(struct work_atoms *work_list) 1225static void output_lat_thread(struct work_atoms *work_list)
@@ -1385,8 +1448,8 @@ map_switch_event(struct trace_switch_event *switch_event,
1385 die("hm, delta: %Ld < 0 ?\n", delta); 1448 die("hm, delta: %Ld < 0 ?\n", delta);
1386 1449
1387 1450
1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); 1451 sched_out = threads__findnew(switch_event->prev_pid);
1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); 1452 sched_in = threads__findnew(switch_event->next_pid);
1390 1453
1391 curr_thread[this_cpu] = sched_in; 1454 curr_thread[this_cpu] = sched_in;
1392 1455
@@ -1517,6 +1580,26 @@ process_sched_exit_event(struct event *event,
1517} 1580}
1518 1581
1519static void 1582static void
1583process_sched_migrate_task_event(struct raw_event_sample *raw,
1584 struct event *event,
1585 int cpu __used,
1586 u64 timestamp __used,
1587 struct thread *thread __used)
1588{
1589 struct trace_migrate_task_event migrate_task_event;
1590
1591 FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
1592
1593 FILL_ARRAY(migrate_task_event, comm, event, raw->data);
1594 FILL_FIELD(migrate_task_event, pid, event, raw->data);
1595 FILL_FIELD(migrate_task_event, prio, event, raw->data);
1596 FILL_FIELD(migrate_task_event, cpu, event, raw->data);
1597
1598 if (trace_handler->migrate_task_event)
1599 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
1600}
1601
1602static void
1520process_raw_event(event_t *raw_event __used, void *more_data, 1603process_raw_event(event_t *raw_event __used, void *more_data,
1521 int cpu, u64 timestamp, struct thread *thread) 1604 int cpu, u64 timestamp, struct thread *thread)
1522{ 1605{
@@ -1539,23 +1622,24 @@ process_raw_event(event_t *raw_event __used, void *more_data,
1539 process_sched_fork_event(raw, event, cpu, timestamp, thread); 1622 process_sched_fork_event(raw, event, cpu, timestamp, thread);
1540 if (!strcmp(event->name, "sched_process_exit")) 1623 if (!strcmp(event->name, "sched_process_exit"))
1541 process_sched_exit_event(event, cpu, timestamp, thread); 1624 process_sched_exit_event(event, cpu, timestamp, thread);
1625 if (!strcmp(event->name, "sched_migrate_task"))
1626 process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
1542} 1627}
1543 1628
1544static int 1629static int
1545process_sample_event(event_t *event, unsigned long offset, unsigned long head) 1630process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1546{ 1631{
1547 char level;
1548 int show = 0;
1549 struct dso *dso = NULL;
1550 struct thread *thread; 1632 struct thread *thread;
1551 u64 ip = event->ip.ip; 1633 u64 ip = event->ip.ip;
1552 u64 timestamp = -1; 1634 u64 timestamp = -1;
1553 u32 cpu = -1; 1635 u32 cpu = -1;
1554 u64 period = 1; 1636 u64 period = 1;
1555 void *more_data = event->ip.__more_data; 1637 void *more_data = event->ip.__more_data;
1556 int cpumode;
1557 1638
1558 thread = threads__findnew(event->ip.pid, &threads, &last_match); 1639 if (!(sample_type & PERF_SAMPLE_RAW))
1640 return 0;
1641
1642 thread = threads__findnew(event->ip.pid);
1559 1643
1560 if (sample_type & PERF_SAMPLE_TIME) { 1644 if (sample_type & PERF_SAMPLE_TIME) {
1561 timestamp = *(u64 *)more_data; 1645 timestamp = *(u64 *)more_data;
@@ -1581,169 +1665,60 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1581 (void *)(long)ip, 1665 (void *)(long)ip,
1582 (long long)period); 1666 (long long)period);
1583 1667
1584 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1585
1586 if (thread == NULL) { 1668 if (thread == NULL) {
1587 eprintf("problem processing %d event, skipping it.\n", 1669 pr_debug("problem processing %d event, skipping it.\n",
1588 event->header.type); 1670 event->header.type);
1589 return -1; 1671 return -1;
1590 } 1672 }
1591 1673
1592 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1674 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1593
1594 if (cpumode == PERF_RECORD_MISC_KERNEL) {
1595 show = SHOW_KERNEL;
1596 level = 'k';
1597
1598 dso = kernel_dso;
1599
1600 dump_printf(" ...... dso: %s\n", dso->name);
1601
1602 } else if (cpumode == PERF_RECORD_MISC_USER) {
1603
1604 show = SHOW_USER;
1605 level = '.';
1606
1607 } else {
1608 show = SHOW_HV;
1609 level = 'H';
1610
1611 dso = hypervisor_dso;
1612 1675
1613 dump_printf(" ...... dso: [hypervisor]\n"); 1676 if (profile_cpu != -1 && profile_cpu != (int) cpu)
1614 } 1677 return 0;
1615 1678
1616 if (sample_type & PERF_SAMPLE_RAW) 1679 process_raw_event(event, more_data, cpu, timestamp, thread);
1617 process_raw_event(event, more_data, cpu, timestamp, thread);
1618 1680
1619 return 0; 1681 return 0;
1620} 1682}
1621 1683
1622static int 1684static int
1623process_event(event_t *event, unsigned long offset, unsigned long head) 1685process_lost_event(event_t *event __used,
1686 unsigned long offset __used,
1687 unsigned long head __used)
1624{ 1688{
1625 trace_event(event); 1689 nr_lost_chunks++;
1690 nr_lost_events += event->lost.lost;
1626 1691
1627 nr_events++; 1692 return 0;
1628 switch (event->header.type) { 1693}
1629 case PERF_RECORD_MMAP:
1630 return 0;
1631 case PERF_RECORD_LOST:
1632 nr_lost_chunks++;
1633 nr_lost_events += event->lost.lost;
1634 return 0;
1635
1636 case PERF_RECORD_COMM:
1637 return process_comm_event(event, offset, head);
1638
1639 case PERF_RECORD_EXIT ... PERF_RECORD_READ:
1640 return 0;
1641 1694
1642 case PERF_RECORD_SAMPLE: 1695static int sample_type_check(u64 type)
1643 return process_sample_event(event, offset, head); 1696{
1697 sample_type = type;
1644 1698
1645 case PERF_RECORD_MAX: 1699 if (!(sample_type & PERF_SAMPLE_RAW)) {
1646 default: 1700 fprintf(stderr,
1701 "No trace sample to read. Did you call perf record "
1702 "without -R?");
1647 return -1; 1703 return -1;
1648 } 1704 }
1649 1705
1650 return 0; 1706 return 0;
1651} 1707}
1652 1708
1709static struct perf_file_handler file_handler = {
1710 .process_sample_event = process_sample_event,
1711 .process_comm_event = process_comm_event,
1712 .process_lost_event = process_lost_event,
1713 .sample_type_check = sample_type_check,
1714};
1715
1653static int read_events(void) 1716static int read_events(void)
1654{ 1717{
1655 int ret, rc = EXIT_FAILURE; 1718 register_idle_thread();
1656 unsigned long offset = 0; 1719 register_perf_file_handler(&file_handler);
1657 unsigned long head = 0;
1658 struct stat perf_stat;
1659 event_t *event;
1660 uint32_t size;
1661 char *buf;
1662
1663 trace_report();
1664 register_idle_thread(&threads, &last_match);
1665
1666 input = open(input_name, O_RDONLY);
1667 if (input < 0) {
1668 perror("failed to open file");
1669 exit(-1);
1670 }
1671
1672 ret = fstat(input, &perf_stat);
1673 if (ret < 0) {
1674 perror("failed to stat file");
1675 exit(-1);
1676 }
1677
1678 if (!perf_stat.st_size) {
1679 fprintf(stderr, "zero-sized file, nothing to do!\n");
1680 exit(0);
1681 }
1682 header = perf_header__read(input);
1683 head = header->data_offset;
1684 sample_type = perf_header__sample_type(header);
1685
1686 if (!(sample_type & PERF_SAMPLE_RAW))
1687 die("No trace sample to read. Did you call perf record "
1688 "without -R?");
1689
1690 if (load_kernel() < 0) {
1691 perror("failed to load kernel symbols");
1692 return EXIT_FAILURE;
1693 }
1694
1695remap:
1696 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1697 MAP_SHARED, input, offset);
1698 if (buf == MAP_FAILED) {
1699 perror("failed to mmap file");
1700 exit(-1);
1701 }
1702
1703more:
1704 event = (event_t *)(buf + head);
1705
1706 size = event->header.size;
1707 if (!size)
1708 size = 8;
1709
1710 if (head + event->header.size >= page_size * mmap_window) {
1711 unsigned long shift = page_size * (head / page_size);
1712 int res;
1713
1714 res = munmap(buf, page_size * mmap_window);
1715 assert(res == 0);
1716
1717 offset += shift;
1718 head -= shift;
1719 goto remap;
1720 }
1721
1722 size = event->header.size;
1723
1724
1725 if (!size || process_event(event, offset, head) < 0) {
1726
1727 /*
1728 * assume we lost track of the stream, check alignment, and
1729 * increment a single u64 in the hope to catch on again 'soon'.
1730 */
1731
1732 if (unlikely(head & 7))
1733 head &= ~7ULL;
1734
1735 size = 8;
1736 }
1737
1738 head += size;
1739
1740 if (offset + head < (unsigned long)perf_stat.st_size)
1741 goto more;
1742
1743 rc = EXIT_SUCCESS;
1744 close(input);
1745 1720
1746 return rc; 1721 return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
1747} 1722}
1748 1723
1749static void print_bad_events(void) 1724static void print_bad_events(void)
@@ -1883,6 +1858,8 @@ static const struct option latency_options[] = {
1883 "sort by key(s): runtime, switch, avg, max"), 1858 "sort by key(s): runtime, switch, avg, max"),
1884 OPT_BOOLEAN('v', "verbose", &verbose, 1859 OPT_BOOLEAN('v', "verbose", &verbose,
1885 "be more verbose (show symbol address, etc)"), 1860 "be more verbose (show symbol address, etc)"),
1861 OPT_INTEGER('C', "CPU", &profile_cpu,
1862 "CPU to profile on"),
1886 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1863 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1887 "dump raw trace in ASCII"), 1864 "dump raw trace in ASCII"),
1888 OPT_END() 1865 OPT_END()
@@ -1960,8 +1937,7 @@ static int __cmd_record(int argc, const char **argv)
1960 1937
1961int cmd_sched(int argc, const char **argv, const char *prefix __used) 1938int cmd_sched(int argc, const char **argv, const char *prefix __used)
1962{ 1939{
1963 symbol__init(); 1940 symbol__init(0);
1964 page_size = getpagesize();
1965 1941
1966 argc = parse_options(argc, argv, sched_options, sched_usage, 1942 argc = parse_options(argc, argv, sched_options, sched_usage,
1967 PARSE_OPT_STOP_AT_NON_OPTION); 1943 PARSE_OPT_STOP_AT_NON_OPTION);