aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-20 18:54:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-20 18:54:37 -0400
commit467f9957d9283be40101d7255d06fae7e211ff4c (patch)
tree71d155ab52b3a78bc88d0c8088b09b3c37f9357a
parent78f28b7c555359c67c2a0d23f7436e915329421e (diff)
parentcdf8073d6b2c6c5a3cd6ce0e6c1297157f7f99ba (diff)
Merge branch 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (58 commits) perf_counter: Fix perf_copy_attr() pointer arithmetic perf utils: Use a define for the maximum length of a trace event perf: Add timechart help text and add timechart to "perf help" tracing, x86, cpuidle: Move the end point of a C state in the power tracer perf utils: Be consistent about minimum text size in the svghelper perf timechart: Add "perf timechart record" perf: Add the timechart tool perf: Add a SVG helper library file tracing, perf: Convert the power tracer into an event tracer perf: Add a sample_event type to the event_union perf: Allow perf utilities to have "callback" options without arguments perf: Store trace event name/id pairs in perf.data perf: Add a timestamp to fork events sched_clock: Make it NMI safe perf_counter: Fix up swcounter throttling x86, perf_counter, bts: Optimize BTS overflow handling perf sched: Add --input=file option to builtin-sched.c perf trace: Sample timestamp and cpu when using record flag perf tools: Increase MAX_EVENT_LENGTH perf tools: Fix memory leak in read_ftrace_printk() ...
-rw-r--r--Documentation/trace/power.txt17
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c7
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c60
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--drivers/cpuidle/cpuidle.c2
-rw-r--r--include/linux/perf_counter.h80
-rw-r--r--include/trace/events/power.h81
-rw-r--r--include/trace/events/sched.h33
-rw-r--r--kernel/perf_counter.c394
-rw-r--r--kernel/sched_clock.c122
-rw-r--r--kernel/sched_fair.c1
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/power-traces.c20
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h17
-rw-r--r--kernel/trace/trace_power.c218
-rw-r--r--scripts/tracing/power.pl108
-rw-r--r--tools/perf/Documentation/perf-sched.txt41
-rw-r--r--tools/perf/Documentation/perf-timechart.txt35
-rw-r--r--tools/perf/Documentation/perf-trace.txt25
-rw-r--r--tools/perf/Makefile9
-rw-r--r--tools/perf/builtin-record.c56
-rw-r--r--tools/perf/builtin-sched.c2004
-rw-r--r--tools/perf/builtin-timechart.c1151
-rw-r--r--tools/perf/builtin.h6
-rw-r--r--tools/perf/command-list.txt3
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/util/event.h10
-rw-r--r--tools/perf/util/header.c67
-rw-r--r--tools/perf/util/header.h6
-rw-r--r--tools/perf/util/parse-events.c237
-rw-r--r--tools/perf/util/parse-options.h2
-rw-r--r--tools/perf/util/svghelper.c384
-rw-r--r--tools/perf/util/svghelper.h25
-rw-r--r--tools/perf/util/thread.c4
-rw-r--r--tools/perf/util/thread.h9
-rw-r--r--tools/perf/util/trace-event-info.c7
-rw-r--r--tools/perf/util/trace-event-parse.c45
-rw-r--r--tools/perf/util/trace-event-read.c6
-rw-r--r--tools/perf/util/trace-event.h5
40 files changed, 4570 insertions, 759 deletions
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
deleted file mode 100644
index cd805e16dc27..000000000000
--- a/Documentation/trace/power.txt
+++ /dev/null
@@ -1,17 +0,0 @@
1The power tracer collects detailed information about C-state and P-state
2transitions, instead of just looking at the high-level "average"
3information.
4
5There is a helper script found in scrips/tracing/power.pl in the kernel
6sources which can be used to parse this information and create a
7Scalable Vector Graphics (SVG) picture from the trace data.
8
9To use this tracer:
10
11 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
12 echo power > /sys/kernel/debug/tracing/current_tracer
13 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
14 sleep 1
15 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
16 cat /sys/kernel/debug/tracing/trace | \
17 perl scripts/tracing/power.pl > out.sv
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 7bb676c533aa..7d5c3b0ea8da 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <trace/power.h> 36#include <trace/events/power.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/io.h> 39#include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
72 72
73static DEFINE_PER_CPU(struct aperfmperf, old_perf); 73static DEFINE_PER_CPU(struct aperfmperf, old_perf);
74 74
75DEFINE_TRACE(power_mark);
76
77/* acpi_perf_data is a pointer to percpu data. */ 75/* acpi_perf_data is a pointer to percpu data. */
78static struct acpi_processor_performance *acpi_perf_data; 76static struct acpi_processor_performance *acpi_perf_data;
79 77
@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
332 unsigned int next_perf_state = 0; /* Index into perf table */ 330 unsigned int next_perf_state = 0; /* Index into perf table */
333 unsigned int i; 331 unsigned int i;
334 int result = 0; 332 int result = 0;
335 struct power_trace it;
336 333
337 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 334 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
338 335
@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
364 } 361 }
365 } 362 }
366 363
367 trace_power_mark(&it, POWER_PSTATE, next_perf_state); 364 trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
368 365
369 switch (data->cpu_feature) { 366 switch (data->cpu_feature) {
370 case SYSTEM_INTEL_MSR_CAPABLE: 367 case SYSTEM_INTEL_MSR_CAPABLE:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2732e2c1e4d3..dbdf712fae9e 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
36#define BTS_RECORD_SIZE 24 36#define BTS_RECORD_SIZE 24
37 37
38/* The size of a per-cpu BTS buffer in bytes: */ 38/* The size of a per-cpu BTS buffer in bytes: */
39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) 39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
40 40
41/* The BTS overflow threshold in bytes from the end of the buffer: */ 41/* The BTS overflow threshold in bytes from the end of the buffer: */
42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) 42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
43 43
44 44
45/* 45/*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
1488 local_irq_restore(flags); 1488 local_irq_restore(flags);
1489} 1489}
1490 1490
1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, 1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
1492 struct perf_sample_data *data)
1493{ 1492{
1494 struct debug_store *ds = cpuc->ds; 1493 struct debug_store *ds = cpuc->ds;
1495 struct bts_record { 1494 struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1498 u64 flags; 1497 u64 flags;
1499 }; 1498 };
1500 struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; 1499 struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
1501 unsigned long orig_ip = data->regs->ip;
1502 struct bts_record *at, *top; 1500 struct bts_record *at, *top;
1501 struct perf_output_handle handle;
1502 struct perf_event_header header;
1503 struct perf_sample_data data;
1504 struct pt_regs regs;
1503 1505
1504 if (!counter) 1506 if (!counter)
1505 return; 1507 return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
1510 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 1512 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1511 top = (struct bts_record *)(unsigned long)ds->bts_index; 1513 top = (struct bts_record *)(unsigned long)ds->bts_index;
1512 1514
1515 if (top <= at)
1516 return;
1517
1513 ds->bts_index = ds->bts_buffer_base; 1518 ds->bts_index = ds->bts_buffer_base;
1514 1519
1520
1521 data.period = counter->hw.last_period;
1522 data.addr = 0;
1523 regs.ip = 0;
1524
1525 /*
1526 * Prepare a generic sample, i.e. fill in the invariant fields.
1527 * We will overwrite the from and to address before we output
1528 * the sample.
1529 */
1530 perf_prepare_sample(&header, &data, counter, &regs);
1531
1532 if (perf_output_begin(&handle, counter,
1533 header.size * (top - at), 1, 1))
1534 return;
1535
1515 for (; at < top; at++) { 1536 for (; at < top; at++) {
1516 data->regs->ip = at->from; 1537 data.ip = at->from;
1517 data->addr = at->to; 1538 data.addr = at->to;
1518 1539
1519 perf_counter_output(counter, 1, data); 1540 perf_output_sample(&handle, &header, &data, counter);
1520 } 1541 }
1521 1542
1522 data->regs->ip = orig_ip; 1543 perf_output_end(&handle);
1523 data->addr = 0;
1524 1544
1525 /* There's new data available. */ 1545 /* There's new data available. */
1546 counter->hw.interrupts++;
1526 counter->pending_kill = POLL_IN; 1547 counter->pending_kill = POLL_IN;
1527} 1548}
1528 1549
@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
1552 x86_perf_counter_update(counter, hwc, idx); 1573 x86_perf_counter_update(counter, hwc, idx);
1553 1574
1554 /* Drain the remaining BTS records. */ 1575 /* Drain the remaining BTS records. */
1555 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 1576 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1556 struct perf_sample_data data; 1577 intel_pmu_drain_bts_buffer(cpuc);
1557 struct pt_regs regs;
1558 1578
1559 data.regs = &regs;
1560 intel_pmu_drain_bts_buffer(cpuc, &data);
1561 }
1562 cpuc->counters[idx] = NULL; 1579 cpuc->counters[idx] = NULL;
1563 clear_bit(idx, cpuc->used_mask); 1580 clear_bit(idx, cpuc->used_mask);
1564 1581
@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1619 int idx, handled = 0; 1636 int idx, handled = 0;
1620 u64 val; 1637 u64 val;
1621 1638
1622 data.regs = regs;
1623 data.addr = 0; 1639 data.addr = 0;
1624 1640
1625 cpuc = &__get_cpu_var(cpu_hw_counters); 1641 cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
1644 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1660 if (!x86_perf_counter_set_period(counter, hwc, idx))
1645 continue; 1661 continue;
1646 1662
1647 if (perf_counter_overflow(counter, 1, &data)) 1663 if (perf_counter_overflow(counter, 1, &data, regs))
1648 p6_pmu_disable_counter(hwc, idx); 1664 p6_pmu_disable_counter(hwc, idx);
1649 } 1665 }
1650 1666
@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1665 int bit, loops; 1681 int bit, loops;
1666 u64 ack, status; 1682 u64 ack, status;
1667 1683
1668 data.regs = regs;
1669 data.addr = 0; 1684 data.addr = 0;
1670 1685
1671 cpuc = &__get_cpu_var(cpu_hw_counters); 1686 cpuc = &__get_cpu_var(cpu_hw_counters);
1672 1687
1673 perf_disable(); 1688 perf_disable();
1674 intel_pmu_drain_bts_buffer(cpuc, &data); 1689 intel_pmu_drain_bts_buffer(cpuc);
1675 status = intel_pmu_get_status(); 1690 status = intel_pmu_get_status();
1676 if (!status) { 1691 if (!status) {
1677 perf_enable(); 1692 perf_enable();
@@ -1702,7 +1717,7 @@ again:
1702 1717
1703 data.period = counter->hw.last_period; 1718 data.period = counter->hw.last_period;
1704 1719
1705 if (perf_counter_overflow(counter, 1, &data)) 1720 if (perf_counter_overflow(counter, 1, &data, regs))
1706 intel_pmu_disable_counter(&counter->hw, bit); 1721 intel_pmu_disable_counter(&counter->hw, bit);
1707 } 1722 }
1708 1723
@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1729 int idx, handled = 0; 1744 int idx, handled = 0;
1730 u64 val; 1745 u64 val;
1731 1746
1732 data.regs = regs;
1733 data.addr = 0; 1747 data.addr = 0;
1734 1748
1735 cpuc = &__get_cpu_var(cpu_hw_counters); 1749 cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1754 if (!x86_perf_counter_set_period(counter, hwc, idx)) 1768 if (!x86_perf_counter_set_period(counter, hwc, idx))
1755 continue; 1769 continue;
1756 1770
1757 if (perf_counter_overflow(counter, 1, &data)) 1771 if (perf_counter_overflow(counter, 1, &data, regs))
1758 amd_pmu_disable_counter(hwc, idx); 1772 amd_pmu_disable_counter(hwc, idx);
1759 } 1773 }
1760 1774
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a4ba83..847ab4160315 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
9#include <linux/pm.h> 9#include <linux/pm.h>
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/random.h> 11#include <linux/random.h>
12#include <trace/power.h> 12#include <trace/events/power.h>
13#include <asm/system.h> 13#include <asm/system.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/syscalls.h> 15#include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
25 25
26struct kmem_cache *task_xstate_cachep; 26struct kmem_cache *task_xstate_cachep;
27 27
28DEFINE_TRACE(power_start);
29DEFINE_TRACE(power_end);
30
31int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 28int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
32{ 29{
33 *dst = *src; 30 *dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
299void default_idle(void) 296void default_idle(void)
300{ 297{
301 if (hlt_use_halt()) { 298 if (hlt_use_halt()) {
302 struct power_trace it; 299 trace_power_start(POWER_CSTATE, 1);
303
304 trace_power_start(&it, POWER_CSTATE, 1);
305 current_thread_info()->status &= ~TS_POLLING; 300 current_thread_info()->status &= ~TS_POLLING;
306 /* 301 /*
307 * TS_POLLING-cleared state must be visible before we 302 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,6 @@ void default_idle(void)
314 else 309 else
315 local_irq_enable(); 310 local_irq_enable();
316 current_thread_info()->status |= TS_POLLING; 311 current_thread_info()->status |= TS_POLLING;
317 trace_power_end(&it);
318 } else { 312 } else {
319 local_irq_enable(); 313 local_irq_enable();
320 /* loop is done by the caller */ 314 /* loop is done by the caller */
@@ -372,9 +366,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
372 */ 366 */
373void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 367void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
374{ 368{
375 struct power_trace it; 369 trace_power_start(POWER_CSTATE, (ax>>4)+1);
376
377 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
378 if (!need_resched()) { 370 if (!need_resched()) {
379 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 371 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
380 clflush((void *)&current_thread_info()->flags); 372 clflush((void *)&current_thread_info()->flags);
@@ -384,15 +376,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
384 if (!need_resched()) 376 if (!need_resched())
385 __mwait(ax, cx); 377 __mwait(ax, cx);
386 } 378 }
387 trace_power_end(&it);
388} 379}
389 380
390/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 381/* Default MONITOR/MWAIT with no hints, used for default C1 state */
391static void mwait_idle(void) 382static void mwait_idle(void)
392{ 383{
393 struct power_trace it;
394 if (!need_resched()) { 384 if (!need_resched()) {
395 trace_power_start(&it, POWER_CSTATE, 1); 385 trace_power_start(POWER_CSTATE, 1);
396 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 386 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
397 clflush((void *)&current_thread_info()->flags); 387 clflush((void *)&current_thread_info()->flags);
398 388
@@ -402,7 +392,6 @@ static void mwait_idle(void)
402 __sti_mwait(0, 0); 392 __sti_mwait(0, 0);
403 else 393 else
404 local_irq_enable(); 394 local_irq_enable();
405 trace_power_end(&it);
406 } else 395 } else
407 local_irq_enable(); 396 local_irq_enable();
408} 397}
@@ -414,13 +403,11 @@ static void mwait_idle(void)
414 */ 403 */
415static void poll_idle(void) 404static void poll_idle(void)
416{ 405{
417 struct power_trace it; 406 trace_power_start(POWER_CSTATE, 0);
418
419 trace_power_start(&it, POWER_CSTATE, 0);
420 local_irq_enable(); 407 local_irq_enable();
421 while (!need_resched()) 408 while (!need_resched())
422 cpu_relax(); 409 cpu_relax();
423 trace_power_end(&it); 410 trace_power_end(0);
424} 411}
425 412
426/* 413/*
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8504a2108557..ad41f19b8e3f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -17,6 +17,7 @@
17#include <linux/cpuidle.h> 17#include <linux/cpuidle.h>
18#include <linux/ktime.h> 18#include <linux/ktime.h>
19#include <linux/hrtimer.h> 19#include <linux/hrtimer.h>
20#include <trace/events/power.h>
20 21
21#include "cpuidle.h" 22#include "cpuidle.h"
22 23
@@ -91,6 +92,7 @@ static void cpuidle_idle_call(void)
91 /* give the governor an opportunity to reflect on the outcome */ 92 /* give the governor an opportunity to reflect on the outcome */
92 if (cpuidle_curr_governor->reflect) 93 if (cpuidle_curr_governor->reflect)
93 cpuidle_curr_governor->reflect(dev); 94 cpuidle_curr_governor->reflect(dev);
95 trace_power_end(0);
94} 96}
95 97
96/** 98/**
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d7a32f..bd341007c4fc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -199,10 +199,14 @@ struct perf_counter_attr {
199 inherit_stat : 1, /* per task counts */ 199 inherit_stat : 1, /* per task counts */
200 enable_on_exec : 1, /* next exec enables */ 200 enable_on_exec : 1, /* next exec enables */
201 task : 1, /* trace fork/exit */ 201 task : 1, /* trace fork/exit */
202 watermark : 1, /* wakeup_watermark */
202 203
203 __reserved_1 : 50; 204 __reserved_1 : 49;
204 205
205 __u32 wakeup_events; /* wakeup every n events */ 206 union {
207 __u32 wakeup_events; /* wakeup every n events */
208 __u32 wakeup_watermark; /* bytes before wakeup */
209 };
206 __u32 __reserved_2; 210 __u32 __reserved_2;
207 211
208 __u64 __reserved_3; 212 __u64 __reserved_3;
@@ -332,6 +336,7 @@ enum perf_event_type {
332 * struct perf_event_header header; 336 * struct perf_event_header header;
333 * u32 pid, ppid; 337 * u32 pid, ppid;
334 * u32 tid, ptid; 338 * u32 tid, ptid;
339 * u64 time;
335 * }; 340 * };
336 */ 341 */
337 PERF_EVENT_EXIT = 4, 342 PERF_EVENT_EXIT = 4,
@@ -352,6 +357,7 @@ enum perf_event_type {
352 * struct perf_event_header header; 357 * struct perf_event_header header;
353 * u32 pid, ppid; 358 * u32 pid, ppid;
354 * u32 tid, ptid; 359 * u32 tid, ptid;
360 * { u64 time; } && PERF_SAMPLE_TIME
355 * }; 361 * };
356 */ 362 */
357 PERF_EVENT_FORK = 7, 363 PERF_EVENT_FORK = 7,
@@ -521,6 +527,8 @@ struct perf_mmap_data {
521 atomic_t wakeup; /* needs a wakeup */ 527 atomic_t wakeup; /* needs a wakeup */
522 atomic_t lost; /* nr records lost */ 528 atomic_t lost; /* nr records lost */
523 529
530 long watermark; /* wakeup watermark */
531
524 struct perf_counter_mmap_page *user_page; 532 struct perf_counter_mmap_page *user_page;
525 void *data_pages[0]; 533 void *data_pages[0];
526}; 534};
@@ -685,6 +693,17 @@ struct perf_cpu_context {
685 int recursion[4]; 693 int recursion[4];
686}; 694};
687 695
696struct perf_output_handle {
697 struct perf_counter *counter;
698 struct perf_mmap_data *data;
699 unsigned long head;
700 unsigned long offset;
701 int nmi;
702 int sample;
703 int locked;
704 unsigned long flags;
705};
706
688#ifdef CONFIG_PERF_COUNTERS 707#ifdef CONFIG_PERF_COUNTERS
689 708
690/* 709/*
@@ -716,16 +735,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
716extern void perf_counter_update_userpage(struct perf_counter *counter); 735extern void perf_counter_update_userpage(struct perf_counter *counter);
717 736
718struct perf_sample_data { 737struct perf_sample_data {
719 struct pt_regs *regs; 738 u64 type;
739
740 u64 ip;
741 struct {
742 u32 pid;
743 u32 tid;
744 } tid_entry;
745 u64 time;
720 u64 addr; 746 u64 addr;
747 u64 id;
748 u64 stream_id;
749 struct {
750 u32 cpu;
751 u32 reserved;
752 } cpu_entry;
721 u64 period; 753 u64 period;
754 struct perf_callchain_entry *callchain;
722 struct perf_raw_record *raw; 755 struct perf_raw_record *raw;
723}; 756};
724 757
758extern void perf_output_sample(struct perf_output_handle *handle,
759 struct perf_event_header *header,
760 struct perf_sample_data *data,
761 struct perf_counter *counter);
762extern void perf_prepare_sample(struct perf_event_header *header,
763 struct perf_sample_data *data,
764 struct perf_counter *counter,
765 struct pt_regs *regs);
766
725extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 767extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
726 struct perf_sample_data *data); 768 struct perf_sample_data *data,
727extern void perf_counter_output(struct perf_counter *counter, int nmi, 769 struct pt_regs *regs);
728 struct perf_sample_data *data);
729 770
730/* 771/*
731 * Return 1 for a software counter, 0 for a hardware counter 772 * Return 1 for a software counter, 0 for a hardware counter
@@ -775,6 +816,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
775#define perf_instruction_pointer(regs) instruction_pointer(regs) 816#define perf_instruction_pointer(regs) instruction_pointer(regs)
776#endif 817#endif
777 818
819extern int perf_output_begin(struct perf_output_handle *handle,
820 struct perf_counter *counter, unsigned int size,
821 int nmi, int sample);
822extern void perf_output_end(struct perf_output_handle *handle);
823extern void perf_output_copy(struct perf_output_handle *handle,
824 const void *buf, unsigned int len);
778#else 825#else
779static inline void 826static inline void
780perf_counter_task_sched_in(struct task_struct *task, int cpu) { } 827perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
@@ -801,7 +848,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
801static inline void perf_counter_comm(struct task_struct *tsk) { } 848static inline void perf_counter_comm(struct task_struct *tsk) { }
802static inline void perf_counter_fork(struct task_struct *tsk) { } 849static inline void perf_counter_fork(struct task_struct *tsk) { }
803static inline void perf_counter_init(void) { } 850static inline void perf_counter_init(void) { }
851
852static inline int
853perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
854 unsigned int size, int nmi, int sample) { }
855static inline void perf_output_end(struct perf_output_handle *handle) { }
856static inline void
857perf_output_copy(struct perf_output_handle *handle,
858 const void *buf, unsigned int len) { }
859static inline void
860perf_output_sample(struct perf_output_handle *handle,
861 struct perf_event_header *header,
862 struct perf_sample_data *data,
863 struct perf_counter *counter) { }
864static inline void
865perf_prepare_sample(struct perf_event_header *header,
866 struct perf_sample_data *data,
867 struct perf_counter *counter,
868 struct pt_regs *regs) { }
804#endif 869#endif
805 870
871#define perf_output_put(handle, x) \
872 perf_output_copy((handle), &(x), sizeof(x))
873
806#endif /* __KERNEL__ */ 874#endif /* __KERNEL__ */
807#endif /* _LINUX_PERF_COUNTER_H */ 875#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
new file mode 100644
index 000000000000..ea6d579261ad
--- /dev/null
+++ b/include/trace/events/power.h
@@ -0,0 +1,81 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM power
3
4#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_POWER_H
6
7#include <linux/ktime.h>
8#include <linux/tracepoint.h>
9
10#ifndef _TRACE_POWER_ENUM_
11#define _TRACE_POWER_ENUM_
12enum {
13 POWER_NONE = 0,
14 POWER_CSTATE = 1,
15 POWER_PSTATE = 2,
16};
17#endif
18
19
20
21TRACE_EVENT(power_start,
22
23 TP_PROTO(unsigned int type, unsigned int state),
24
25 TP_ARGS(type, state),
26
27 TP_STRUCT__entry(
28 __field( u64, type )
29 __field( u64, state )
30 ),
31
32 TP_fast_assign(
33 __entry->type = type;
34 __entry->state = state;
35 ),
36
37 TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
38);
39
40TRACE_EVENT(power_end,
41
42 TP_PROTO(int dummy),
43
44 TP_ARGS(dummy),
45
46 TP_STRUCT__entry(
47 __field( u64, dummy )
48 ),
49
50 TP_fast_assign(
51 __entry->dummy = 0xffff;
52 ),
53
54 TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
55
56);
57
58
59TRACE_EVENT(power_frequency,
60
61 TP_PROTO(unsigned int type, unsigned int state),
62
63 TP_ARGS(type, state),
64
65 TP_STRUCT__entry(
66 __field( u64, type )
67 __field( u64, state )
68 ),
69
70 TP_fast_assign(
71 __entry->type = type;
72 __entry->state = state;
73 ),
74
75 TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
76);
77
78#endif /* _TRACE_POWER_H */
79
80/* This part must be outside protection */
81#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b48f1ad7c946..4069c43f4187 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -380,6 +380,39 @@ TRACE_EVENT(sched_stat_wait,
380); 380);
381 381
382/* 382/*
383 * Tracepoint for accounting runtime (time the task is executing
384 * on a CPU).
385 */
386TRACE_EVENT(sched_stat_runtime,
387
388 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
389
390 TP_ARGS(tsk, runtime, vruntime),
391
392 TP_STRUCT__entry(
393 __array( char, comm, TASK_COMM_LEN )
394 __field( pid_t, pid )
395 __field( u64, runtime )
396 __field( u64, vruntime )
397 ),
398
399 TP_fast_assign(
400 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
401 __entry->pid = tsk->pid;
402 __entry->runtime = runtime;
403 __entry->vruntime = vruntime;
404 )
405 TP_perf_assign(
406 __perf_count(runtime);
407 ),
408
409 TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
410 __entry->comm, __entry->pid,
411 (unsigned long long)__entry->runtime,
412 (unsigned long long)__entry->vruntime)
413);
414
415/*
383 * Tracepoint for accounting sleep time (time the task is not runnable, 416 * Tracepoint for accounting sleep time (time the task is not runnable,
384 * including iowait, see below). 417 * including iowait, see below).
385 */ 418 */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8cb94a52d1bb..cc768ab81ac8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
2176 data->nr_pages = nr_pages; 2176 data->nr_pages = nr_pages;
2177 atomic_set(&data->lock, -1); 2177 atomic_set(&data->lock, -1);
2178 2178
2179 if (counter->attr.watermark) {
2180 data->watermark = min_t(long, PAGE_SIZE * nr_pages,
2181 counter->attr.wakeup_watermark);
2182 }
2183 if (!data->watermark)
2184 data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
2185
2179 rcu_assign_pointer(counter->data, data); 2186 rcu_assign_pointer(counter->data, data);
2180 2187
2181 return 0; 2188 return 0;
@@ -2315,7 +2322,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2315 lock_limit >>= PAGE_SHIFT; 2322 lock_limit >>= PAGE_SHIFT;
2316 locked = vma->vm_mm->locked_vm + extra; 2323 locked = vma->vm_mm->locked_vm + extra;
2317 2324
2318 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 2325 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
2326 !capable(CAP_IPC_LOCK)) {
2319 ret = -EPERM; 2327 ret = -EPERM;
2320 goto unlock; 2328 goto unlock;
2321 } 2329 }
@@ -2504,35 +2512,15 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2504/* 2512/*
2505 * Output 2513 * Output
2506 */ 2514 */
2507 2515static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2508struct perf_output_handle { 2516 unsigned long offset, unsigned long head)
2509 struct perf_counter *counter;
2510 struct perf_mmap_data *data;
2511 unsigned long head;
2512 unsigned long offset;
2513 int nmi;
2514 int sample;
2515 int locked;
2516 unsigned long flags;
2517};
2518
2519static bool perf_output_space(struct perf_mmap_data *data,
2520 unsigned int offset, unsigned int head)
2521{ 2517{
2522 unsigned long tail;
2523 unsigned long mask; 2518 unsigned long mask;
2524 2519
2525 if (!data->writable) 2520 if (!data->writable)
2526 return true; 2521 return true;
2527 2522
2528 mask = (data->nr_pages << PAGE_SHIFT) - 1; 2523 mask = (data->nr_pages << PAGE_SHIFT) - 1;
2529 /*
2530 * Userspace could choose to issue a mb() before updating the tail
2531 * pointer. So that all reads will be completed before the write is
2532 * issued.
2533 */
2534 tail = ACCESS_ONCE(data->user_page->data_tail);
2535 smp_rmb();
2536 2524
2537 offset = (offset - tail) & mask; 2525 offset = (offset - tail) & mask;
2538 head = (head - tail) & mask; 2526 head = (head - tail) & mask;
@@ -2633,8 +2621,8 @@ out:
2633 local_irq_restore(handle->flags); 2621 local_irq_restore(handle->flags);
2634} 2622}
2635 2623
2636static void perf_output_copy(struct perf_output_handle *handle, 2624void perf_output_copy(struct perf_output_handle *handle,
2637 const void *buf, unsigned int len) 2625 const void *buf, unsigned int len)
2638{ 2626{
2639 unsigned int pages_mask; 2627 unsigned int pages_mask;
2640 unsigned int offset; 2628 unsigned int offset;
@@ -2669,16 +2657,13 @@ static void perf_output_copy(struct perf_output_handle *handle,
2669 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); 2657 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
2670} 2658}
2671 2659
2672#define perf_output_put(handle, x) \ 2660int perf_output_begin(struct perf_output_handle *handle,
2673 perf_output_copy((handle), &(x), sizeof(x)) 2661 struct perf_counter *counter, unsigned int size,
2674 2662 int nmi, int sample)
2675static int perf_output_begin(struct perf_output_handle *handle,
2676 struct perf_counter *counter, unsigned int size,
2677 int nmi, int sample)
2678{ 2663{
2679 struct perf_counter *output_counter; 2664 struct perf_counter *output_counter;
2680 struct perf_mmap_data *data; 2665 struct perf_mmap_data *data;
2681 unsigned int offset, head; 2666 unsigned long tail, offset, head;
2682 int have_lost; 2667 int have_lost;
2683 struct { 2668 struct {
2684 struct perf_event_header header; 2669 struct perf_event_header header;
@@ -2716,16 +2701,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
2716 perf_output_lock(handle); 2701 perf_output_lock(handle);
2717 2702
2718 do { 2703 do {
2704 /*
2705 * Userspace could choose to issue a mb() before updating the
2706 * tail pointer. So that all reads will be completed before the
2707 * write is issued.
2708 */
2709 tail = ACCESS_ONCE(data->user_page->data_tail);
2710 smp_rmb();
2719 offset = head = atomic_long_read(&data->head); 2711 offset = head = atomic_long_read(&data->head);
2720 head += size; 2712 head += size;
2721 if (unlikely(!perf_output_space(data, offset, head))) 2713 if (unlikely(!perf_output_space(data, tail, offset, head)))
2722 goto fail; 2714 goto fail;
2723 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); 2715 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
2724 2716
2725 handle->offset = offset; 2717 handle->offset = offset;
2726 handle->head = head; 2718 handle->head = head;
2727 2719
2728 if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) 2720 if (head - tail > data->watermark)
2729 atomic_set(&data->wakeup, 1); 2721 atomic_set(&data->wakeup, 1);
2730 2722
2731 if (have_lost) { 2723 if (have_lost) {
@@ -2749,7 +2741,7 @@ out:
2749 return -ENOSPC; 2741 return -ENOSPC;
2750} 2742}
2751 2743
2752static void perf_output_end(struct perf_output_handle *handle) 2744void perf_output_end(struct perf_output_handle *handle)
2753{ 2745{
2754 struct perf_counter *counter = handle->counter; 2746 struct perf_counter *counter = handle->counter;
2755 struct perf_mmap_data *data = handle->data; 2747 struct perf_mmap_data *data = handle->data;
@@ -2863,156 +2855,176 @@ static void perf_output_read(struct perf_output_handle *handle,
2863 perf_output_read_one(handle, counter); 2855 perf_output_read_one(handle, counter);
2864} 2856}
2865 2857
2866void perf_counter_output(struct perf_counter *counter, int nmi, 2858void perf_output_sample(struct perf_output_handle *handle,
2867 struct perf_sample_data *data) 2859 struct perf_event_header *header,
2860 struct perf_sample_data *data,
2861 struct perf_counter *counter)
2868{ 2862{
2869 int ret; 2863 u64 sample_type = data->type;
2870 u64 sample_type = counter->attr.sample_type;
2871 struct perf_output_handle handle;
2872 struct perf_event_header header;
2873 u64 ip;
2874 struct {
2875 u32 pid, tid;
2876 } tid_entry;
2877 struct perf_callchain_entry *callchain = NULL;
2878 int callchain_size = 0;
2879 u64 time;
2880 struct {
2881 u32 cpu, reserved;
2882 } cpu_entry;
2883 2864
2884 header.type = PERF_EVENT_SAMPLE; 2865 perf_output_put(handle, *header);
2885 header.size = sizeof(header);
2886 2866
2887 header.misc = 0; 2867 if (sample_type & PERF_SAMPLE_IP)
2888 header.misc |= perf_misc_flags(data->regs); 2868 perf_output_put(handle, data->ip);
2889
2890 if (sample_type & PERF_SAMPLE_IP) {
2891 ip = perf_instruction_pointer(data->regs);
2892 header.size += sizeof(ip);
2893 }
2894
2895 if (sample_type & PERF_SAMPLE_TID) {
2896 /* namespace issues */
2897 tid_entry.pid = perf_counter_pid(counter, current);
2898 tid_entry.tid = perf_counter_tid(counter, current);
2899
2900 header.size += sizeof(tid_entry);
2901 }
2902 2869
2903 if (sample_type & PERF_SAMPLE_TIME) { 2870 if (sample_type & PERF_SAMPLE_TID)
2904 /* 2871 perf_output_put(handle, data->tid_entry);
2905 * Maybe do better on x86 and provide cpu_clock_nmi()
2906 */
2907 time = sched_clock();
2908 2872
2909 header.size += sizeof(u64); 2873 if (sample_type & PERF_SAMPLE_TIME)
2910 } 2874 perf_output_put(handle, data->time);
2911 2875
2912 if (sample_type & PERF_SAMPLE_ADDR) 2876 if (sample_type & PERF_SAMPLE_ADDR)
2913 header.size += sizeof(u64); 2877 perf_output_put(handle, data->addr);
2914 2878
2915 if (sample_type & PERF_SAMPLE_ID) 2879 if (sample_type & PERF_SAMPLE_ID)
2916 header.size += sizeof(u64); 2880 perf_output_put(handle, data->id);
2917 2881
2918 if (sample_type & PERF_SAMPLE_STREAM_ID) 2882 if (sample_type & PERF_SAMPLE_STREAM_ID)
2919 header.size += sizeof(u64); 2883 perf_output_put(handle, data->stream_id);
2920
2921 if (sample_type & PERF_SAMPLE_CPU) {
2922 header.size += sizeof(cpu_entry);
2923 2884
2924 cpu_entry.cpu = raw_smp_processor_id(); 2885 if (sample_type & PERF_SAMPLE_CPU)
2925 cpu_entry.reserved = 0; 2886 perf_output_put(handle, data->cpu_entry);
2926 }
2927 2887
2928 if (sample_type & PERF_SAMPLE_PERIOD) 2888 if (sample_type & PERF_SAMPLE_PERIOD)
2929 header.size += sizeof(u64); 2889 perf_output_put(handle, data->period);
2930 2890
2931 if (sample_type & PERF_SAMPLE_READ) 2891 if (sample_type & PERF_SAMPLE_READ)
2932 header.size += perf_counter_read_size(counter); 2892 perf_output_read(handle, counter);
2933 2893
2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2894 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2935 callchain = perf_callchain(data->regs); 2895 if (data->callchain) {
2896 int size = 1;
2936 2897
2937 if (callchain) { 2898 if (data->callchain)
2938 callchain_size = (1 + callchain->nr) * sizeof(u64); 2899 size += data->callchain->nr;
2939 header.size += callchain_size; 2900
2940 } else 2901 size *= sizeof(u64);
2941 header.size += sizeof(u64); 2902
2903 perf_output_copy(handle, data->callchain, size);
2904 } else {
2905 u64 nr = 0;
2906 perf_output_put(handle, nr);
2907 }
2942 } 2908 }
2943 2909
2944 if (sample_type & PERF_SAMPLE_RAW) { 2910 if (sample_type & PERF_SAMPLE_RAW) {
2945 int size = sizeof(u32); 2911 if (data->raw) {
2912 perf_output_put(handle, data->raw->size);
2913 perf_output_copy(handle, data->raw->data,
2914 data->raw->size);
2915 } else {
2916 struct {
2917 u32 size;
2918 u32 data;
2919 } raw = {
2920 .size = sizeof(u32),
2921 .data = 0,
2922 };
2923 perf_output_put(handle, raw);
2924 }
2925 }
2926}
2946 2927
2947 if (data->raw) 2928void perf_prepare_sample(struct perf_event_header *header,
2948 size += data->raw->size; 2929 struct perf_sample_data *data,
2949 else 2930 struct perf_counter *counter,
2950 size += sizeof(u32); 2931 struct pt_regs *regs)
2932{
2933 u64 sample_type = counter->attr.sample_type;
2951 2934
2952 WARN_ON_ONCE(size & (sizeof(u64)-1)); 2935 data->type = sample_type;
2953 header.size += size;
2954 }
2955 2936
2956 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2937 header->type = PERF_EVENT_SAMPLE;
2957 if (ret) 2938 header->size = sizeof(*header);
2958 return;
2959 2939
2960 perf_output_put(&handle, header); 2940 header->misc = 0;
2941 header->misc |= perf_misc_flags(regs);
2961 2942
2962 if (sample_type & PERF_SAMPLE_IP) 2943 if (sample_type & PERF_SAMPLE_IP) {
2963 perf_output_put(&handle, ip); 2944 data->ip = perf_instruction_pointer(regs);
2964 2945
2965 if (sample_type & PERF_SAMPLE_TID) 2946 header->size += sizeof(data->ip);
2966 perf_output_put(&handle, tid_entry); 2947 }
2967 2948
2968 if (sample_type & PERF_SAMPLE_TIME) 2949 if (sample_type & PERF_SAMPLE_TID) {
2969 perf_output_put(&handle, time); 2950 /* namespace issues */
2951 data->tid_entry.pid = perf_counter_pid(counter, current);
2952 data->tid_entry.tid = perf_counter_tid(counter, current);
2953
2954 header->size += sizeof(data->tid_entry);
2955 }
2956
2957 if (sample_type & PERF_SAMPLE_TIME) {
2958 data->time = perf_clock();
2959
2960 header->size += sizeof(data->time);
2961 }
2970 2962
2971 if (sample_type & PERF_SAMPLE_ADDR) 2963 if (sample_type & PERF_SAMPLE_ADDR)
2972 perf_output_put(&handle, data->addr); 2964 header->size += sizeof(data->addr);
2973 2965
2974 if (sample_type & PERF_SAMPLE_ID) { 2966 if (sample_type & PERF_SAMPLE_ID) {
2975 u64 id = primary_counter_id(counter); 2967 data->id = primary_counter_id(counter);
2976 2968
2977 perf_output_put(&handle, id); 2969 header->size += sizeof(data->id);
2978 } 2970 }
2979 2971
2980 if (sample_type & PERF_SAMPLE_STREAM_ID) 2972 if (sample_type & PERF_SAMPLE_STREAM_ID) {
2981 perf_output_put(&handle, counter->id); 2973 data->stream_id = counter->id;
2982 2974
2983 if (sample_type & PERF_SAMPLE_CPU) 2975 header->size += sizeof(data->stream_id);
2984 perf_output_put(&handle, cpu_entry); 2976 }
2977
2978 if (sample_type & PERF_SAMPLE_CPU) {
2979 data->cpu_entry.cpu = raw_smp_processor_id();
2980 data->cpu_entry.reserved = 0;
2981
2982 header->size += sizeof(data->cpu_entry);
2983 }
2985 2984
2986 if (sample_type & PERF_SAMPLE_PERIOD) 2985 if (sample_type & PERF_SAMPLE_PERIOD)
2987 perf_output_put(&handle, data->period); 2986 header->size += sizeof(data->period);
2988 2987
2989 if (sample_type & PERF_SAMPLE_READ) 2988 if (sample_type & PERF_SAMPLE_READ)
2990 perf_output_read(&handle, counter); 2989 header->size += perf_counter_read_size(counter);
2991 2990
2992 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2991 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2993 if (callchain) 2992 int size = 1;
2994 perf_output_copy(&handle, callchain, callchain_size); 2993
2995 else { 2994 data->callchain = perf_callchain(regs);
2996 u64 nr = 0; 2995
2997 perf_output_put(&handle, nr); 2996 if (data->callchain)
2998 } 2997 size += data->callchain->nr;
2998
2999 header->size += size * sizeof(u64);
2999 } 3000 }
3000 3001
3001 if (sample_type & PERF_SAMPLE_RAW) { 3002 if (sample_type & PERF_SAMPLE_RAW) {
3002 if (data->raw) { 3003 int size = sizeof(u32);
3003 perf_output_put(&handle, data->raw->size); 3004
3004 perf_output_copy(&handle, data->raw->data, data->raw->size); 3005 if (data->raw)
3005 } else { 3006 size += data->raw->size;
3006 struct { 3007 else
3007 u32 size; 3008 size += sizeof(u32);
3008 u32 data; 3009
3009 } raw = { 3010 WARN_ON_ONCE(size & (sizeof(u64)-1));
3010 .size = sizeof(u32), 3011 header->size += size;
3011 .data = 0,
3012 };
3013 perf_output_put(&handle, raw);
3014 }
3015 } 3012 }
3013}
3014
3015static void perf_counter_output(struct perf_counter *counter, int nmi,
3016 struct perf_sample_data *data,
3017 struct pt_regs *regs)
3018{
3019 struct perf_output_handle handle;
3020 struct perf_event_header header;
3021
3022 perf_prepare_sample(&header, data, counter, regs);
3023
3024 if (perf_output_begin(&handle, counter, header.size, nmi, 1))
3025 return;
3026
3027 perf_output_sample(&handle, &header, data, counter);
3016 3028
3017 perf_output_end(&handle); 3029 perf_output_end(&handle);
3018} 3030}
@@ -3071,6 +3083,7 @@ struct perf_task_event {
3071 u32 ppid; 3083 u32 ppid;
3072 u32 tid; 3084 u32 tid;
3073 u32 ptid; 3085 u32 ptid;
3086 u64 time;
3074 } event; 3087 } event;
3075}; 3088};
3076 3089
@@ -3078,9 +3091,12 @@ static void perf_counter_task_output(struct perf_counter *counter,
3078 struct perf_task_event *task_event) 3091 struct perf_task_event *task_event)
3079{ 3092{
3080 struct perf_output_handle handle; 3093 struct perf_output_handle handle;
3081 int size = task_event->event.header.size; 3094 int size;
3082 struct task_struct *task = task_event->task; 3095 struct task_struct *task = task_event->task;
3083 int ret = perf_output_begin(&handle, counter, size, 0, 0); 3096 int ret;
3097
3098 size = task_event->event.header.size;
3099 ret = perf_output_begin(&handle, counter, size, 0, 0);
3084 3100
3085 if (ret) 3101 if (ret)
3086 return; 3102 return;
@@ -3091,7 +3107,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
3091 task_event->event.tid = perf_counter_tid(counter, task); 3107 task_event->event.tid = perf_counter_tid(counter, task);
3092 task_event->event.ptid = perf_counter_tid(counter, current); 3108 task_event->event.ptid = perf_counter_tid(counter, current);
3093 3109
3110 task_event->event.time = perf_clock();
3111
3094 perf_output_put(&handle, task_event->event); 3112 perf_output_put(&handle, task_event->event);
3113
3095 perf_output_end(&handle); 3114 perf_output_end(&handle);
3096} 3115}
3097 3116
@@ -3473,7 +3492,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3473 .misc = 0, 3492 .misc = 0,
3474 .size = sizeof(throttle_event), 3493 .size = sizeof(throttle_event),
3475 }, 3494 },
3476 .time = sched_clock(), 3495 .time = perf_clock(),
3477 .id = primary_counter_id(counter), 3496 .id = primary_counter_id(counter),
3478 .stream_id = counter->id, 3497 .stream_id = counter->id,
3479 }; 3498 };
@@ -3493,14 +3512,16 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3493 * Generic counter overflow handling, sampling. 3512 * Generic counter overflow handling, sampling.
3494 */ 3513 */
3495 3514
3496int perf_counter_overflow(struct perf_counter *counter, int nmi, 3515static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
3497 struct perf_sample_data *data) 3516 int throttle, struct perf_sample_data *data,
3517 struct pt_regs *regs)
3498{ 3518{
3499 int events = atomic_read(&counter->event_limit); 3519 int events = atomic_read(&counter->event_limit);
3500 int throttle = counter->pmu->unthrottle != NULL;
3501 struct hw_perf_counter *hwc = &counter->hw; 3520 struct hw_perf_counter *hwc = &counter->hw;
3502 int ret = 0; 3521 int ret = 0;
3503 3522
3523 throttle = (throttle && counter->pmu->unthrottle != NULL);
3524
3504 if (!throttle) { 3525 if (!throttle) {
3505 hwc->interrupts++; 3526 hwc->interrupts++;
3506 } else { 3527 } else {
@@ -3523,7 +3544,7 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3523 } 3544 }
3524 3545
3525 if (counter->attr.freq) { 3546 if (counter->attr.freq) {
3526 u64 now = sched_clock(); 3547 u64 now = perf_clock();
3527 s64 delta = now - hwc->freq_stamp; 3548 s64 delta = now - hwc->freq_stamp;
3528 3549
3529 hwc->freq_stamp = now; 3550 hwc->freq_stamp = now;
@@ -3549,10 +3570,17 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3549 perf_counter_disable(counter); 3570 perf_counter_disable(counter);
3550 } 3571 }
3551 3572
3552 perf_counter_output(counter, nmi, data); 3573 perf_counter_output(counter, nmi, data, regs);
3553 return ret; 3574 return ret;
3554} 3575}
3555 3576
3577int perf_counter_overflow(struct perf_counter *counter, int nmi,
3578 struct perf_sample_data *data,
3579 struct pt_regs *regs)
3580{
3581 return __perf_counter_overflow(counter, nmi, 1, data, regs);
3582}
3583
3556/* 3584/*
3557 * Generic software counter infrastructure 3585 * Generic software counter infrastructure
3558 */ 3586 */
@@ -3588,9 +3616,11 @@ again:
3588} 3616}
3589 3617
3590static void perf_swcounter_overflow(struct perf_counter *counter, 3618static void perf_swcounter_overflow(struct perf_counter *counter,
3591 int nmi, struct perf_sample_data *data) 3619 int nmi, struct perf_sample_data *data,
3620 struct pt_regs *regs)
3592{ 3621{
3593 struct hw_perf_counter *hwc = &counter->hw; 3622 struct hw_perf_counter *hwc = &counter->hw;
3623 int throttle = 0;
3594 u64 overflow; 3624 u64 overflow;
3595 3625
3596 data->period = counter->hw.last_period; 3626 data->period = counter->hw.last_period;
@@ -3600,13 +3630,15 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
3600 return; 3630 return;
3601 3631
3602 for (; overflow; overflow--) { 3632 for (; overflow; overflow--) {
3603 if (perf_counter_overflow(counter, nmi, data)) { 3633 if (__perf_counter_overflow(counter, nmi, throttle,
3634 data, regs)) {
3604 /* 3635 /*
3605 * We inhibit the overflow from happening when 3636 * We inhibit the overflow from happening when
3606 * hwc->interrupts == MAX_INTERRUPTS. 3637 * hwc->interrupts == MAX_INTERRUPTS.
3607 */ 3638 */
3608 break; 3639 break;
3609 } 3640 }
3641 throttle = 1;
3610 } 3642 }
3611} 3643}
3612 3644
@@ -3618,7 +3650,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
3618} 3650}
3619 3651
3620static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 3652static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3621 int nmi, struct perf_sample_data *data) 3653 int nmi, struct perf_sample_data *data,
3654 struct pt_regs *regs)
3622{ 3655{
3623 struct hw_perf_counter *hwc = &counter->hw; 3656 struct hw_perf_counter *hwc = &counter->hw;
3624 3657
@@ -3627,11 +3660,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3627 if (!hwc->sample_period) 3660 if (!hwc->sample_period)
3628 return; 3661 return;
3629 3662
3630 if (!data->regs) 3663 if (!regs)
3631 return; 3664 return;
3632 3665
3633 if (!atomic64_add_negative(nr, &hwc->period_left)) 3666 if (!atomic64_add_negative(nr, &hwc->period_left))
3634 perf_swcounter_overflow(counter, nmi, data); 3667 perf_swcounter_overflow(counter, nmi, data, regs);
3635} 3668}
3636 3669
3637static int perf_swcounter_is_counting(struct perf_counter *counter) 3670static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3690,7 +3723,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
3690static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3723static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3691 enum perf_type_id type, 3724 enum perf_type_id type,
3692 u32 event, u64 nr, int nmi, 3725 u32 event, u64 nr, int nmi,
3693 struct perf_sample_data *data) 3726 struct perf_sample_data *data,
3727 struct pt_regs *regs)
3694{ 3728{
3695 struct perf_counter *counter; 3729 struct perf_counter *counter;
3696 3730
@@ -3699,8 +3733,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3699 3733
3700 rcu_read_lock(); 3734 rcu_read_lock();
3701 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 3735 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
3702 if (perf_swcounter_match(counter, type, event, data->regs)) 3736 if (perf_swcounter_match(counter, type, event, regs))
3703 perf_swcounter_add(counter, nr, nmi, data); 3737 perf_swcounter_add(counter, nr, nmi, data, regs);
3704 } 3738 }
3705 rcu_read_unlock(); 3739 rcu_read_unlock();
3706} 3740}
@@ -3721,7 +3755,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
3721 3755
3722static void do_perf_swcounter_event(enum perf_type_id type, u32 event, 3756static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3723 u64 nr, int nmi, 3757 u64 nr, int nmi,
3724 struct perf_sample_data *data) 3758 struct perf_sample_data *data,
3759 struct pt_regs *regs)
3725{ 3760{
3726 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3761 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3727 int *recursion = perf_swcounter_recursion_context(cpuctx); 3762 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3734,7 +3769,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3734 barrier(); 3769 barrier();
3735 3770
3736 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, 3771 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
3737 nr, nmi, data); 3772 nr, nmi, data, regs);
3738 rcu_read_lock(); 3773 rcu_read_lock();
3739 /* 3774 /*
3740 * doesn't really matter which of the child contexts the 3775 * doesn't really matter which of the child contexts the
@@ -3742,7 +3777,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
3742 */ 3777 */
3743 ctx = rcu_dereference(current->perf_counter_ctxp); 3778 ctx = rcu_dereference(current->perf_counter_ctxp);
3744 if (ctx) 3779 if (ctx)
3745 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); 3780 perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
3746 rcu_read_unlock(); 3781 rcu_read_unlock();
3747 3782
3748 barrier(); 3783 barrier();
@@ -3756,11 +3791,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3756 struct pt_regs *regs, u64 addr) 3791 struct pt_regs *regs, u64 addr)
3757{ 3792{
3758 struct perf_sample_data data = { 3793 struct perf_sample_data data = {
3759 .regs = regs,
3760 .addr = addr, 3794 .addr = addr,
3761 }; 3795 };
3762 3796
3763 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); 3797 do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
3798 &data, regs);
3764} 3799}
3765 3800
3766static void perf_swcounter_read(struct perf_counter *counter) 3801static void perf_swcounter_read(struct perf_counter *counter)
@@ -3797,6 +3832,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3797{ 3832{
3798 enum hrtimer_restart ret = HRTIMER_RESTART; 3833 enum hrtimer_restart ret = HRTIMER_RESTART;
3799 struct perf_sample_data data; 3834 struct perf_sample_data data;
3835 struct pt_regs *regs;
3800 struct perf_counter *counter; 3836 struct perf_counter *counter;
3801 u64 period; 3837 u64 period;
3802 3838
@@ -3804,17 +3840,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3804 counter->pmu->read(counter); 3840 counter->pmu->read(counter);
3805 3841
3806 data.addr = 0; 3842 data.addr = 0;
3807 data.regs = get_irq_regs(); 3843 regs = get_irq_regs();
3808 /* 3844 /*
3809 * In case we exclude kernel IPs or are somehow not in interrupt 3845 * In case we exclude kernel IPs or are somehow not in interrupt
3810 * context, provide the next best thing, the user IP. 3846 * context, provide the next best thing, the user IP.
3811 */ 3847 */
3812 if ((counter->attr.exclude_kernel || !data.regs) && 3848 if ((counter->attr.exclude_kernel || !regs) &&
3813 !counter->attr.exclude_user) 3849 !counter->attr.exclude_user)
3814 data.regs = task_pt_regs(current); 3850 regs = task_pt_regs(current);
3815 3851
3816 if (data.regs) { 3852 if (regs) {
3817 if (perf_counter_overflow(counter, 0, &data)) 3853 if (perf_counter_overflow(counter, 0, &data, regs))
3818 ret = HRTIMER_NORESTART; 3854 ret = HRTIMER_NORESTART;
3819 } 3855 }
3820 3856
@@ -3950,15 +3986,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3950 }; 3986 };
3951 3987
3952 struct perf_sample_data data = { 3988 struct perf_sample_data data = {
3953 .regs = get_irq_regs(),
3954 .addr = addr, 3989 .addr = addr,
3955 .raw = &raw, 3990 .raw = &raw,
3956 }; 3991 };
3957 3992
3958 if (!data.regs) 3993 struct pt_regs *regs = get_irq_regs();
3959 data.regs = task_pt_regs(current); 3994
3995 if (!regs)
3996 regs = task_pt_regs(current);
3960 3997
3961 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); 3998 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
3999 &data, regs);
3962} 4000}
3963EXPORT_SYMBOL_GPL(perf_tpcounter_event); 4001EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3964 4002
@@ -4170,8 +4208,8 @@ done:
4170static int perf_copy_attr(struct perf_counter_attr __user *uattr, 4208static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4171 struct perf_counter_attr *attr) 4209 struct perf_counter_attr *attr)
4172{ 4210{
4173 int ret;
4174 u32 size; 4211 u32 size;
4212 int ret;
4175 4213
4176 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) 4214 if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
4177 return -EFAULT; 4215 return -EFAULT;
@@ -4196,19 +4234,19 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
4196 4234
4197 /* 4235 /*
4198 * If we're handed a bigger struct than we know of, 4236 * If we're handed a bigger struct than we know of,
4199 * ensure all the unknown bits are 0. 4237 * ensure all the unknown bits are 0 - i.e. new
4238 * user-space does not rely on any kernel feature
4239 * extensions we dont know about yet.
4200 */ 4240 */
4201 if (size > sizeof(*attr)) { 4241 if (size > sizeof(*attr)) {
4202 unsigned long val; 4242 unsigned char __user *addr;
4203 unsigned long __user *addr; 4243 unsigned char __user *end;
4204 unsigned long __user *end; 4244 unsigned char val;
4205 4245
4206 addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), 4246 addr = (void __user *)uattr + sizeof(*attr);
4207 sizeof(unsigned long)); 4247 end = (void __user *)uattr + size;
4208 end = PTR_ALIGN((void __user *)uattr + size,
4209 sizeof(unsigned long));
4210 4248
4211 for (; addr < end; addr += sizeof(unsigned long)) { 4249 for (; addr < end; addr++) {
4212 ret = get_user(val, addr); 4250 ret = get_user(val, addr);
4213 if (ret) 4251 if (ret)
4214 return ret; 4252 return ret;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e1d16c9a7680..ac2e1dc708bd 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -48,13 +48,6 @@ static __read_mostly int sched_clock_running;
48__read_mostly int sched_clock_stable; 48__read_mostly int sched_clock_stable;
49 49
50struct sched_clock_data { 50struct sched_clock_data {
51 /*
52 * Raw spinlock - this is a special case: this might be called
53 * from within instrumentation code so we dont want to do any
54 * instrumentation ourselves.
55 */
56 raw_spinlock_t lock;
57
58 u64 tick_raw; 51 u64 tick_raw;
59 u64 tick_gtod; 52 u64 tick_gtod;
60 u64 clock; 53 u64 clock;
@@ -80,7 +73,6 @@ void sched_clock_init(void)
80 for_each_possible_cpu(cpu) { 73 for_each_possible_cpu(cpu) {
81 struct sched_clock_data *scd = cpu_sdc(cpu); 74 struct sched_clock_data *scd = cpu_sdc(cpu);
82 75
83 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
84 scd->tick_raw = 0; 76 scd->tick_raw = 0;
85 scd->tick_gtod = ktime_now; 77 scd->tick_gtod = ktime_now;
86 scd->clock = ktime_now; 78 scd->clock = ktime_now;
@@ -109,14 +101,19 @@ static inline u64 wrap_max(u64 x, u64 y)
109 * - filter out backward motion 101 * - filter out backward motion
110 * - use the GTOD tick value to create a window to filter crazy TSC values 102 * - use the GTOD tick value to create a window to filter crazy TSC values
111 */ 103 */
112static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) 104static u64 sched_clock_local(struct sched_clock_data *scd)
113{ 105{
114 s64 delta = now - scd->tick_raw; 106 u64 now, clock, old_clock, min_clock, max_clock;
115 u64 clock, min_clock, max_clock; 107 s64 delta;
116 108
109again:
110 now = sched_clock();
111 delta = now - scd->tick_raw;
117 if (unlikely(delta < 0)) 112 if (unlikely(delta < 0))
118 delta = 0; 113 delta = 0;
119 114
115 old_clock = scd->clock;
116
120 /* 117 /*
121 * scd->clock = clamp(scd->tick_gtod + delta, 118 * scd->clock = clamp(scd->tick_gtod + delta,
122 * max(scd->tick_gtod, scd->clock), 119 * max(scd->tick_gtod, scd->clock),
@@ -124,84 +121,73 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
124 */ 121 */
125 122
126 clock = scd->tick_gtod + delta; 123 clock = scd->tick_gtod + delta;
127 min_clock = wrap_max(scd->tick_gtod, scd->clock); 124 min_clock = wrap_max(scd->tick_gtod, old_clock);
128 max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); 125 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
129 126
130 clock = wrap_max(clock, min_clock); 127 clock = wrap_max(clock, min_clock);
131 clock = wrap_min(clock, max_clock); 128 clock = wrap_min(clock, max_clock);
132 129
133 scd->clock = clock; 130 if (cmpxchg(&scd->clock, old_clock, clock) != old_clock)
131 goto again;
134 132
135 return scd->clock; 133 return clock;
136} 134}
137 135
138static void lock_double_clock(struct sched_clock_data *data1, 136static u64 sched_clock_remote(struct sched_clock_data *scd)
139 struct sched_clock_data *data2)
140{ 137{
141 if (data1 < data2) { 138 struct sched_clock_data *my_scd = this_scd();
142 __raw_spin_lock(&data1->lock); 139 u64 this_clock, remote_clock;
143 __raw_spin_lock(&data2->lock); 140 u64 *ptr, old_val, val;
141
142 sched_clock_local(my_scd);
143again:
144 this_clock = my_scd->clock;
145 remote_clock = scd->clock;
146
147 /*
148 * Use the opportunity that we have both locks
149 * taken to couple the two clocks: we take the
150 * larger time as the latest time for both
151 * runqueues. (this creates monotonic movement)
152 */
153 if (likely((s64)(remote_clock - this_clock) < 0)) {
154 ptr = &scd->clock;
155 old_val = remote_clock;
156 val = this_clock;
144 } else { 157 } else {
145 __raw_spin_lock(&data2->lock); 158 /*
146 __raw_spin_lock(&data1->lock); 159 * Should be rare, but possible:
160 */
161 ptr = &my_scd->clock;
162 old_val = this_clock;
163 val = remote_clock;
147 } 164 }
165
166 if (cmpxchg(ptr, old_val, val) != old_val)
167 goto again;
168
169 return val;
148} 170}
149 171
150u64 sched_clock_cpu(int cpu) 172u64 sched_clock_cpu(int cpu)
151{ 173{
152 u64 now, clock, this_clock, remote_clock;
153 struct sched_clock_data *scd; 174 struct sched_clock_data *scd;
175 u64 clock;
176
177 WARN_ON_ONCE(!irqs_disabled());
154 178
155 if (sched_clock_stable) 179 if (sched_clock_stable)
156 return sched_clock(); 180 return sched_clock();
157 181
158 scd = cpu_sdc(cpu);
159
160 /*
161 * Normally this is not called in NMI context - but if it is,
162 * trying to do any locking here is totally lethal.
163 */
164 if (unlikely(in_nmi()))
165 return scd->clock;
166
167 if (unlikely(!sched_clock_running)) 182 if (unlikely(!sched_clock_running))
168 return 0ull; 183 return 0ull;
169 184
170 WARN_ON_ONCE(!irqs_disabled()); 185 scd = cpu_sdc(cpu);
171 now = sched_clock();
172
173 if (cpu != raw_smp_processor_id()) {
174 struct sched_clock_data *my_scd = this_scd();
175
176 lock_double_clock(scd, my_scd);
177
178 this_clock = __update_sched_clock(my_scd, now);
179 remote_clock = scd->clock;
180
181 /*
182 * Use the opportunity that we have both locks
183 * taken to couple the two clocks: we take the
184 * larger time as the latest time for both
185 * runqueues. (this creates monotonic movement)
186 */
187 if (likely((s64)(remote_clock - this_clock) < 0)) {
188 clock = this_clock;
189 scd->clock = clock;
190 } else {
191 /*
192 * Should be rare, but possible:
193 */
194 clock = remote_clock;
195 my_scd->clock = remote_clock;
196 }
197
198 __raw_spin_unlock(&my_scd->lock);
199 } else {
200 __raw_spin_lock(&scd->lock);
201 clock = __update_sched_clock(scd, now);
202 }
203 186
204 __raw_spin_unlock(&scd->lock); 187 if (cpu != smp_processor_id())
188 clock = sched_clock_remote(scd);
189 else
190 clock = sched_clock_local(scd);
205 191
206 return clock; 192 return clock;
207} 193}
@@ -223,11 +209,9 @@ void sched_clock_tick(void)
223 now_gtod = ktime_to_ns(ktime_get()); 209 now_gtod = ktime_to_ns(ktime_get());
224 now = sched_clock(); 210 now = sched_clock();
225 211
226 __raw_spin_lock(&scd->lock);
227 scd->tick_raw = now; 212 scd->tick_raw = now;
228 scd->tick_gtod = now_gtod; 213 scd->tick_gtod = now_gtod;
229 __update_sched_clock(scd, now); 214 sched_clock_local(scd);
230 __raw_spin_unlock(&scd->lock);
231} 215}
232 216
233/* 217/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 10d218ab69f2..990b188803ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
513 if (entity_is_task(curr)) { 513 if (entity_is_task(curr)) {
514 struct task_struct *curtask = task_of(curr); 514 struct task_struct *curtask = task_of(curr);
515 515
516 trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
516 cpuacct_charge(curtask, delta_exec); 517 cpuacct_charge(curtask, delta_exec);
517 account_group_exec_runtime(curtask, delta_exec); 518 account_group_exec_runtime(curtask, delta_exec);
518 } 519 }
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..26f03ac07c2b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o 44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_POWER_TRACER) += trace_power.o
46obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
47obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
48obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -54,5 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
54obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
55obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 57
58libftrace-y := ftrace.o 58libftrace-y := ftrace.o
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..e06c6e3d56a3
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
1/*
2 * Power trace points
3 *
4 * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
5 */
6
7#include <linux/string.h>
8#include <linux/types.h>
9#include <linux/workqueue.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/slab.h>
13
14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h>
16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 86bcff94791a..405cb850b75d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,7 +11,6 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <trace/power.h>
15 14
16#include <linux/trace_seq.h> 15#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 16#include <linux/ftrace_event.h>
@@ -37,7 +36,6 @@ enum trace_type {
37 TRACE_HW_BRANCHES, 36 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
40 TRACE_POWER,
41 TRACE_BLK, 39 TRACE_BLK,
42 40
43 __TRACE_LAST_TYPE, 41 __TRACE_LAST_TYPE,
@@ -207,7 +205,6 @@ extern void __ftrace_bad_type(void);
207 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 205 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
208 TRACE_GRAPH_RET); \ 206 TRACE_GRAPH_RET); \
209 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ 207 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
210 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
211 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 208 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
212 TRACE_KMEM_ALLOC); \ 209 TRACE_KMEM_ALLOC); \
213 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a431748ddd6e..ead3d724599d 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -330,23 +330,6 @@ FTRACE_ENTRY(hw_branch, hw_branch_entry,
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to) 330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331); 331);
332 332
333FTRACE_ENTRY(power, trace_power,
334
335 TRACE_POWER,
336
337 F_STRUCT(
338 __field_struct( struct power_trace, state_data )
339 __field_desc( s64, state_data, stamp )
340 __field_desc( s64, state_data, end )
341 __field_desc( int, state_data, type )
342 __field_desc( int, state_data, state )
343 ),
344
345 F_printk("%llx->%llx type:%u state:%u",
346 __entry->stamp, __entry->end,
347 __entry->type, __entry->state)
348);
349
350FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, 333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
351 334
352 TRACE_KMEM_ALLOC, 335 TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index fe1a00f1445a..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,218 +0,0 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <trace/power.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19#include "trace_output.h"
20
21static struct trace_array *power_trace;
22static int __read_mostly trace_power_enabled;
23
24static void probe_power_start(struct power_trace *it, unsigned int type,
25 unsigned int level)
26{
27 if (!trace_power_enabled)
28 return;
29
30 memset(it, 0, sizeof(struct power_trace));
31 it->state = level;
32 it->type = type;
33 it->stamp = ktime_get();
34}
35
36
37static void probe_power_end(struct power_trace *it)
38{
39 struct ftrace_event_call *call = &event_power;
40 struct ring_buffer_event *event;
41 struct ring_buffer *buffer;
42 struct trace_power *entry;
43 struct trace_array_cpu *data;
44 struct trace_array *tr = power_trace;
45
46 if (!trace_power_enabled)
47 return;
48
49 buffer = tr->buffer;
50
51 preempt_disable();
52 it->end = ktime_get();
53 data = tr->data[smp_processor_id()];
54
55 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
56 sizeof(*entry), 0, 0);
57 if (!event)
58 goto out;
59 entry = ring_buffer_event_data(event);
60 entry->state_data = *it;
61 if (!filter_check_discard(call, entry, buffer, event))
62 trace_buffer_unlock_commit(buffer, event, 0, 0);
63 out:
64 preempt_enable();
65}
66
67static void probe_power_mark(struct power_trace *it, unsigned int type,
68 unsigned int level)
69{
70 struct ftrace_event_call *call = &event_power;
71 struct ring_buffer_event *event;
72 struct ring_buffer *buffer;
73 struct trace_power *entry;
74 struct trace_array_cpu *data;
75 struct trace_array *tr = power_trace;
76
77 if (!trace_power_enabled)
78 return;
79
80 buffer = tr->buffer;
81
82 memset(it, 0, sizeof(struct power_trace));
83 it->state = level;
84 it->type = type;
85 it->stamp = ktime_get();
86 preempt_disable();
87 it->end = it->stamp;
88 data = tr->data[smp_processor_id()];
89
90 event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
91 sizeof(*entry), 0, 0);
92 if (!event)
93 goto out;
94 entry = ring_buffer_event_data(event);
95 entry->state_data = *it;
96 if (!filter_check_discard(call, entry, buffer, event))
97 trace_buffer_unlock_commit(buffer, event, 0, 0);
98 out:
99 preempt_enable();
100}
101
102static int tracing_power_register(void)
103{
104 int ret;
105
106 ret = register_trace_power_start(probe_power_start);
107 if (ret) {
108 pr_info("power trace: Couldn't activate tracepoint"
109 " probe to trace_power_start\n");
110 return ret;
111 }
112 ret = register_trace_power_end(probe_power_end);
113 if (ret) {
114 pr_info("power trace: Couldn't activate tracepoint"
115 " probe to trace_power_end\n");
116 goto fail_start;
117 }
118 ret = register_trace_power_mark(probe_power_mark);
119 if (ret) {
120 pr_info("power trace: Couldn't activate tracepoint"
121 " probe to trace_power_mark\n");
122 goto fail_end;
123 }
124 return ret;
125fail_end:
126 unregister_trace_power_end(probe_power_end);
127fail_start:
128 unregister_trace_power_start(probe_power_start);
129 return ret;
130}
131
132static void start_power_trace(struct trace_array *tr)
133{
134 trace_power_enabled = 1;
135}
136
137static void stop_power_trace(struct trace_array *tr)
138{
139 trace_power_enabled = 0;
140}
141
142static void power_trace_reset(struct trace_array *tr)
143{
144 trace_power_enabled = 0;
145 unregister_trace_power_start(probe_power_start);
146 unregister_trace_power_end(probe_power_end);
147 unregister_trace_power_mark(probe_power_mark);
148}
149
150
151static int power_trace_init(struct trace_array *tr)
152{
153 power_trace = tr;
154
155 trace_power_enabled = 1;
156 tracing_power_register();
157
158 tracing_reset_online_cpus(tr);
159 return 0;
160}
161
162static enum print_line_t power_print_line(struct trace_iterator *iter)
163{
164 int ret = 0;
165 struct trace_entry *entry = iter->ent;
166 struct trace_power *field ;
167 struct power_trace *it;
168 struct trace_seq *s = &iter->seq;
169 struct timespec stamp;
170 struct timespec duration;
171
172 trace_assign_type(field, entry);
173 it = &field->state_data;
174 stamp = ktime_to_timespec(it->stamp);
175 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
176
177 if (entry->type == TRACE_POWER) {
178 if (it->type == POWER_CSTATE)
179 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
180 stamp.tv_sec,
181 stamp.tv_nsec,
182 it->state, iter->cpu,
183 duration.tv_sec,
184 duration.tv_nsec);
185 if (it->type == POWER_PSTATE)
186 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
187 stamp.tv_sec,
188 stamp.tv_nsec,
189 it->state, iter->cpu);
190 if (!ret)
191 return TRACE_TYPE_PARTIAL_LINE;
192 return TRACE_TYPE_HANDLED;
193 }
194 return TRACE_TYPE_UNHANDLED;
195}
196
197static void power_print_header(struct seq_file *s)
198{
199 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
200 seq_puts(s, "# | | |\n");
201}
202
203static struct tracer power_tracer __read_mostly =
204{
205 .name = "power",
206 .init = power_trace_init,
207 .start = start_power_trace,
208 .stop = stop_power_trace,
209 .reset = power_trace_reset,
210 .print_line = power_print_line,
211 .print_header = power_print_header,
212};
213
214static int init_power_trace(void)
215{
216 return register_tracer(&power_tracer);
217}
218device_initcall(init_power_trace);
diff --git a/scripts/tracing/power.pl b/scripts/tracing/power.pl
deleted file mode 100644
index 4f729b3501e0..000000000000
--- a/scripts/tracing/power.pl
+++ /dev/null
@@ -1,108 +0,0 @@
1#!/usr/bin/perl
2
3# Copyright 2008, Intel Corporation
4#
5# This file is part of the Linux kernel
6#
7# This program file is free software; you can redistribute it and/or modify it
8# under the terms of the GNU General Public License as published by the
9# Free Software Foundation; version 2 of the License.
10#
11# This program is distributed in the hope that it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14# for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program in a file named COPYING; if not, write to the
18# Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301 USA
21#
22# Authors:
23# Arjan van de Ven <arjan@linux.intel.com>
24
25
26#
27# This script turns a cstate ftrace output into a SVG graphic that shows
28# historic C-state information
29#
30#
31# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
32#
33
34my @styles;
35my $base = 0;
36
37my @pstate_last;
38my @pstate_level;
39
40$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
41$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
42$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
43$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
44$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
45$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
46$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
47$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
48$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
49
50
51print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
52print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
53
54my $scale = 30000.0;
55while (<>) {
56 my $line = $_;
57 if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
58 if ($base == 0) {
59 $base = $1;
60 }
61 my $time = $1 - $base;
62 $time = $time * $scale;
63 my $C = $2;
64 my $cpu = $3;
65 my $y = 400 * $cpu;
66 my $duration = $4 * $scale;
67 my $msec = int($4 * 100000)/100.0;
68 my $height = $C * 20;
69 $style = $styles[$C];
70
71 $y = $y + 140 - $height;
72
73 $x2 = $time + 4;
74 $y2 = $y + 4;
75
76
77 print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
78 print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
79 }
80 if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
81 my $time = $1 - $base;
82 my $state = $2;
83 my $cpu = $3;
84
85 if (defined($pstate_last[$cpu])) {
86 my $from = $pstate_last[$cpu];
87 my $oldstate = $pstate_state[$cpu];
88 my $duration = ($time-$from) * $scale;
89
90 $from = $from * $scale;
91 my $to = $from + $duration;
92 my $height = 140 - ($oldstate * (140/8));
93
94 my $y = 400 * $cpu + 200 + $height;
95 my $y2 = $y+4;
96 my $style = $styles[8];
97
98 print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
99 print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
100 };
101
102 $pstate_last[$cpu] = $time;
103 $pstate_state[$cpu] = $state;
104 }
105}
106
107
108print "</svg>\n";
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
new file mode 100644
index 000000000000..1ce79198997b
--- /dev/null
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -0,0 +1,41 @@
1perf-sched(1)
2==============
3
4NAME
5----
6perf-sched - Tool to trace/measure scheduler properties (latencies)
7
8SYNOPSIS
9--------
10[verse]
11'perf sched' {record|latency|replay|trace}
12
13DESCRIPTION
14-----------
15There's four variants of perf sched:
16
17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload.
19
20 'perf sched latency' to report the per task scheduling latencies
21 and other scheduling properties of the workload.
22
23 'perf sched trace' to see a detailed trace of the workload that
24 was recorded.
25
26 'perf sched replay' to simulate the workload that was recorded
27 via perf sched record. (this is done by starting up mockup threads
28 that mimic the workload based on the events in the trace. These
29 threads can then replay the timings (CPU runtime and sleep patterns)
30 of the workload as it occured when it was recorded - and can repeat
31 it a number of times, measuring its performance.)
32
33OPTIONS
34-------
35-D::
36--dump-raw-trace=::
37 Display verbose dump of the sched data.
38
39SEE ALSO
40--------
41linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
new file mode 100644
index 000000000000..61e0104c6270
--- /dev/null
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -0,0 +1,35 @@
1perf-timechart(1)
2=================
3
4NAME
5----
6perf-timechart - Tool to visualize total system behavior during a workload
7
8SYNOPSIS
9--------
10[verse]
11'perf timechart' {record}
12
13DESCRIPTION
14-----------
15There are two variants of perf timechart:
16
17 'perf timechart record <command>' to record the system level events
18 of an arbitrary workload.
19
20 'perf timechart' to turn a trace into a Scalable Vector Graphics file,
21 that can be viewed with popular SVG viewers such as 'Inkscape'.
22
23OPTIONS
24-------
25-o::
26--output=::
27 Select the output file (default: output.svg)
28-i::
29--input=::
30 Select the input file (default: perf.data)
31
32
33SEE ALSO
34--------
35linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
new file mode 100644
index 000000000000..41ed75398ca9
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -0,0 +1,25 @@
1perf-trace(1)
2==============
3
4NAME
5----
6perf-trace - Read perf.data (created by perf record) and display trace output
7
8SYNOPSIS
9--------
10[verse]
11'perf trace' [-i <file> | --input=file] symbol_name
12
13DESCRIPTION
14-----------
15This command reads the input file and displays the trace recorded.
16
17OPTIONS
18-------
19-D::
20--dump-raw-trace=::
21 Display verbose dump of the trace data.
22
23SEE ALSO
24--------
25linkperf:perf-record[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 9f8d207a91bf..0aba8b6e9c54 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -373,13 +373,16 @@ LIB_OBJS += util/thread.o
373LIB_OBJS += util/trace-event-parse.o 373LIB_OBJS += util/trace-event-parse.o
374LIB_OBJS += util/trace-event-read.o 374LIB_OBJS += util/trace-event-read.o
375LIB_OBJS += util/trace-event-info.o 375LIB_OBJS += util/trace-event-info.o
376LIB_OBJS += util/svghelper.o
376 377
377BUILTIN_OBJS += builtin-annotate.o 378BUILTIN_OBJS += builtin-annotate.o
378BUILTIN_OBJS += builtin-help.o 379BUILTIN_OBJS += builtin-help.o
380BUILTIN_OBJS += builtin-sched.o
379BUILTIN_OBJS += builtin-list.o 381BUILTIN_OBJS += builtin-list.o
380BUILTIN_OBJS += builtin-record.o 382BUILTIN_OBJS += builtin-record.o
381BUILTIN_OBJS += builtin-report.o 383BUILTIN_OBJS += builtin-report.o
382BUILTIN_OBJS += builtin-stat.o 384BUILTIN_OBJS += builtin-stat.o
385BUILTIN_OBJS += builtin-timechart.o
383BUILTIN_OBJS += builtin-top.o 386BUILTIN_OBJS += builtin-top.o
384BUILTIN_OBJS += builtin-trace.o 387BUILTIN_OBJS += builtin-trace.o
385 388
@@ -710,6 +713,12 @@ builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS
710 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 713 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
711 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 714 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
712 715
716builtin-timechart.o: builtin-timechart.c common-cmds.h PERF-CFLAGS
717 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \
718 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
719 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
720 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
721
713$(BUILT_INS): perf$X 722$(BUILT_INS): perf$X
714 $(QUIET_BUILT_IN)$(RM) $@ && \ 723 $(QUIET_BUILT_IN)$(RM) $@ && \
715 ln perf$X $@ 2>/dev/null || \ 724 ln perf$X $@ 2>/dev/null || \
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 99a12fe86e9f..2459e5a22ed8 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -48,6 +48,8 @@ static int call_graph = 0;
48static int inherit_stat = 0; 48static int inherit_stat = 0;
49static int no_samples = 0; 49static int no_samples = 0;
50static int sample_address = 0; 50static int sample_address = 0;
51static int multiplex = 0;
52static int multiplex_fd = -1;
51 53
52static long samples; 54static long samples;
53static struct timeval last_read; 55static struct timeval last_read;
@@ -470,19 +472,28 @@ try_again:
470 */ 472 */
471 if (group && group_fd == -1) 473 if (group && group_fd == -1)
472 group_fd = fd[nr_cpu][counter]; 474 group_fd = fd[nr_cpu][counter];
475 if (multiplex && multiplex_fd == -1)
476 multiplex_fd = fd[nr_cpu][counter];
473 477
474 event_array[nr_poll].fd = fd[nr_cpu][counter]; 478 if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
475 event_array[nr_poll].events = POLLIN; 479 int ret;
476 nr_poll++; 480
477 481 ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
478 mmap_array[nr_cpu][counter].counter = counter; 482 assert(ret != -1);
479 mmap_array[nr_cpu][counter].prev = 0; 483 } else {
480 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; 484 event_array[nr_poll].fd = fd[nr_cpu][counter];
481 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 485 event_array[nr_poll].events = POLLIN;
482 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); 486 nr_poll++;
483 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { 487
484 error("failed to mmap with %d (%s)\n", errno, strerror(errno)); 488 mmap_array[nr_cpu][counter].counter = counter;
485 exit(-1); 489 mmap_array[nr_cpu][counter].prev = 0;
490 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
491 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
492 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
493 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
494 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
495 exit(-1);
496 }
486 } 497 }
487 498
488 ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); 499 ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
@@ -513,6 +524,7 @@ static int __cmd_record(int argc, const char **argv)
513 pid_t pid = 0; 524 pid_t pid = 0;
514 int flags; 525 int flags;
515 int ret; 526 int ret;
527 unsigned long waking = 0;
516 528
517 page_size = sysconf(_SC_PAGE_SIZE); 529 page_size = sysconf(_SC_PAGE_SIZE);
518 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 530 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -614,17 +626,29 @@ static int __cmd_record(int argc, const char **argv)
614 int hits = samples; 626 int hits = samples;
615 627
616 for (i = 0; i < nr_cpu; i++) { 628 for (i = 0; i < nr_cpu; i++) {
617 for (counter = 0; counter < nr_counters; counter++) 629 for (counter = 0; counter < nr_counters; counter++) {
618 mmap_read(&mmap_array[i][counter]); 630 if (mmap_array[i][counter].base)
631 mmap_read(&mmap_array[i][counter]);
632 }
619 } 633 }
620 634
621 if (hits == samples) { 635 if (hits == samples) {
622 if (done) 636 if (done)
623 break; 637 break;
624 ret = poll(event_array, nr_poll, 100); 638 ret = poll(event_array, nr_poll, -1);
639 waking++;
640 }
641
642 if (done) {
643 for (i = 0; i < nr_cpu; i++) {
644 for (counter = 0; counter < nr_counters; counter++)
645 ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE);
646 }
625 } 647 }
626 } 648 }
627 649
650 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
651
628 /* 652 /*
629 * Approximate RIP event size: 24 bytes. 653 * Approximate RIP event size: 24 bytes.
630 */ 654 */
@@ -681,6 +705,8 @@ static const struct option options[] = {
681 "Sample addresses"), 705 "Sample addresses"),
682 OPT_BOOLEAN('n', "no-samples", &no_samples, 706 OPT_BOOLEAN('n', "no-samples", &no_samples,
683 "don't sample"), 707 "don't sample"),
708 OPT_BOOLEAN('M', "multiplex", &multiplex,
709 "multiplex counter output in a single channel"),
684 OPT_END() 710 OPT_END()
685}; 711};
686 712
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
new file mode 100644
index 000000000000..275d79c6627a
--- /dev/null
+++ b/tools/perf/builtin-sched.c
@@ -0,0 +1,2004 @@
1#include "builtin.h"
2#include "perf.h"
3
4#include "util/util.h"
5#include "util/cache.h"
6#include "util/symbol.h"
7#include "util/thread.h"
8#include "util/header.h"
9
10#include "util/parse-options.h"
11#include "util/trace-event.h"
12
13#include "util/debug.h"
14
15#include <sys/types.h>
16#include <sys/prctl.h>
17
18#include <semaphore.h>
19#include <pthread.h>
20#include <math.h>
21
22static char const *input_name = "perf.data";
23static int input;
24static unsigned long page_size;
25static unsigned long mmap_window = 32;
26
27static unsigned long total_comm = 0;
28
29static struct rb_root threads;
30static struct thread *last_match;
31
32static struct perf_header *header;
33static u64 sample_type;
34
35static char default_sort_order[] = "avg, max, switch, runtime";
36static char *sort_order = default_sort_order;
37
38#define PR_SET_NAME 15 /* Set process name */
39#define MAX_CPUS 4096
40
41#define BUG_ON(x) assert(!(x))
42
43static u64 run_measurement_overhead;
44static u64 sleep_measurement_overhead;
45
46#define COMM_LEN 20
47#define SYM_LEN 129
48
49#define MAX_PID 65536
50
51static unsigned long nr_tasks;
52
53struct sched_atom;
54
55struct task_desc {
56 unsigned long nr;
57 unsigned long pid;
58 char comm[COMM_LEN];
59
60 unsigned long nr_events;
61 unsigned long curr_event;
62 struct sched_atom **atoms;
63
64 pthread_t thread;
65 sem_t sleep_sem;
66
67 sem_t ready_for_work;
68 sem_t work_done_sem;
69
70 u64 cpu_usage;
71};
72
73enum sched_event_type {
74 SCHED_EVENT_RUN,
75 SCHED_EVENT_SLEEP,
76 SCHED_EVENT_WAKEUP,
77};
78
79struct sched_atom {
80 enum sched_event_type type;
81 u64 timestamp;
82 u64 duration;
83 unsigned long nr;
84 int specific_wait;
85 sem_t *wait_sem;
86 struct task_desc *wakee;
87};
88
89static struct task_desc *pid_to_task[MAX_PID];
90
91static struct task_desc **tasks;
92
93static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
94static u64 start_time;
95
96static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
97
98static unsigned long nr_run_events;
99static unsigned long nr_sleep_events;
100static unsigned long nr_wakeup_events;
101
102static unsigned long nr_sleep_corrections;
103static unsigned long nr_run_events_optimized;
104
105static unsigned long targetless_wakeups;
106static unsigned long multitarget_wakeups;
107
108static u64 cpu_usage;
109static u64 runavg_cpu_usage;
110static u64 parent_cpu_usage;
111static u64 runavg_parent_cpu_usage;
112
113static unsigned long nr_runs;
114static u64 sum_runtime;
115static u64 sum_fluct;
116static u64 run_avg;
117
118static unsigned long replay_repeat = 10;
119static unsigned long nr_timestamps;
120static unsigned long nr_unordered_timestamps;
121static unsigned long nr_state_machine_bugs;
122static unsigned long nr_context_switch_bugs;
123static unsigned long nr_events;
124static unsigned long nr_lost_chunks;
125static unsigned long nr_lost_events;
126
127#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
128
129enum thread_state {
130 THREAD_SLEEPING = 0,
131 THREAD_WAIT_CPU,
132 THREAD_SCHED_IN,
133 THREAD_IGNORE
134};
135
136struct work_atom {
137 struct list_head list;
138 enum thread_state state;
139 u64 sched_out_time;
140 u64 wake_up_time;
141 u64 sched_in_time;
142 u64 runtime;
143};
144
145struct work_atoms {
146 struct list_head work_list;
147 struct thread *thread;
148 struct rb_node node;
149 u64 max_lat;
150 u64 total_lat;
151 u64 nb_atoms;
152 u64 total_runtime;
153};
154
155typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
156
157static struct rb_root atom_root, sorted_atom_root;
158
159static u64 all_runtime;
160static u64 all_count;
161
162
163static u64 get_nsecs(void)
164{
165 struct timespec ts;
166
167 clock_gettime(CLOCK_MONOTONIC, &ts);
168
169 return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
170}
171
172static void burn_nsecs(u64 nsecs)
173{
174 u64 T0 = get_nsecs(), T1;
175
176 do {
177 T1 = get_nsecs();
178 } while (T1 + run_measurement_overhead < T0 + nsecs);
179}
180
181static void sleep_nsecs(u64 nsecs)
182{
183 struct timespec ts;
184
185 ts.tv_nsec = nsecs % 999999999;
186 ts.tv_sec = nsecs / 999999999;
187
188 nanosleep(&ts, NULL);
189}
190
191static void calibrate_run_measurement_overhead(void)
192{
193 u64 T0, T1, delta, min_delta = 1000000000ULL;
194 int i;
195
196 for (i = 0; i < 10; i++) {
197 T0 = get_nsecs();
198 burn_nsecs(0);
199 T1 = get_nsecs();
200 delta = T1-T0;
201 min_delta = min(min_delta, delta);
202 }
203 run_measurement_overhead = min_delta;
204
205 printf("run measurement overhead: %Ld nsecs\n", min_delta);
206}
207
208static void calibrate_sleep_measurement_overhead(void)
209{
210 u64 T0, T1, delta, min_delta = 1000000000ULL;
211 int i;
212
213 for (i = 0; i < 10; i++) {
214 T0 = get_nsecs();
215 sleep_nsecs(10000);
216 T1 = get_nsecs();
217 delta = T1-T0;
218 min_delta = min(min_delta, delta);
219 }
220 min_delta -= 10000;
221 sleep_measurement_overhead = min_delta;
222
223 printf("sleep measurement overhead: %Ld nsecs\n", min_delta);
224}
225
226static struct sched_atom *
227get_new_event(struct task_desc *task, u64 timestamp)
228{
229 struct sched_atom *event = calloc(1, sizeof(*event));
230 unsigned long idx = task->nr_events;
231 size_t size;
232
233 event->timestamp = timestamp;
234 event->nr = idx;
235
236 task->nr_events++;
237 size = sizeof(struct sched_atom *) * task->nr_events;
238 task->atoms = realloc(task->atoms, size);
239 BUG_ON(!task->atoms);
240
241 task->atoms[idx] = event;
242
243 return event;
244}
245
246static struct sched_atom *last_event(struct task_desc *task)
247{
248 if (!task->nr_events)
249 return NULL;
250
251 return task->atoms[task->nr_events - 1];
252}
253
254static void
255add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
256{
257 struct sched_atom *event, *curr_event = last_event(task);
258
259 /*
260 * optimize an existing RUN event by merging this one
261 * to it:
262 */
263 if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
264 nr_run_events_optimized++;
265 curr_event->duration += duration;
266 return;
267 }
268
269 event = get_new_event(task, timestamp);
270
271 event->type = SCHED_EVENT_RUN;
272 event->duration = duration;
273
274 nr_run_events++;
275}
276
277static void
278add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
279 struct task_desc *wakee)
280{
281 struct sched_atom *event, *wakee_event;
282
283 event = get_new_event(task, timestamp);
284 event->type = SCHED_EVENT_WAKEUP;
285 event->wakee = wakee;
286
287 wakee_event = last_event(wakee);
288 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
289 targetless_wakeups++;
290 return;
291 }
292 if (wakee_event->wait_sem) {
293 multitarget_wakeups++;
294 return;
295 }
296
297 wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem));
298 sem_init(wakee_event->wait_sem, 0, 0);
299 wakee_event->specific_wait = 1;
300 event->wait_sem = wakee_event->wait_sem;
301
302 nr_wakeup_events++;
303}
304
305static void
306add_sched_event_sleep(struct task_desc *task, u64 timestamp,
307 u64 task_state __used)
308{
309 struct sched_atom *event = get_new_event(task, timestamp);
310
311 event->type = SCHED_EVENT_SLEEP;
312
313 nr_sleep_events++;
314}
315
316static struct task_desc *register_pid(unsigned long pid, const char *comm)
317{
318 struct task_desc *task;
319
320 BUG_ON(pid >= MAX_PID);
321
322 task = pid_to_task[pid];
323
324 if (task)
325 return task;
326
327 task = calloc(1, sizeof(*task));
328 task->pid = pid;
329 task->nr = nr_tasks;
330 strcpy(task->comm, comm);
331 /*
332 * every task starts in sleeping state - this gets ignored
333 * if there's no wakeup pointing to this sleep state:
334 */
335 add_sched_event_sleep(task, 0, 0);
336
337 pid_to_task[pid] = task;
338 nr_tasks++;
339 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
340 BUG_ON(!tasks);
341 tasks[task->nr] = task;
342
343 if (verbose)
344 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);
345
346 return task;
347}
348
349
350static void print_task_traces(void)
351{
352 struct task_desc *task;
353 unsigned long i;
354
355 for (i = 0; i < nr_tasks; i++) {
356 task = tasks[i];
357 printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
358 task->nr, task->comm, task->pid, task->nr_events);
359 }
360}
361
362static void add_cross_task_wakeups(void)
363{
364 struct task_desc *task1, *task2;
365 unsigned long i, j;
366
367 for (i = 0; i < nr_tasks; i++) {
368 task1 = tasks[i];
369 j = i + 1;
370 if (j == nr_tasks)
371 j = 0;
372 task2 = tasks[j];
373 add_sched_event_wakeup(task1, 0, task2);
374 }
375}
376
377static void
378process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
379{
380 int ret = 0;
381 u64 now;
382 long long delta;
383
384 now = get_nsecs();
385 delta = start_time + atom->timestamp - now;
386
387 switch (atom->type) {
388 case SCHED_EVENT_RUN:
389 burn_nsecs(atom->duration);
390 break;
391 case SCHED_EVENT_SLEEP:
392 if (atom->wait_sem)
393 ret = sem_wait(atom->wait_sem);
394 BUG_ON(ret);
395 break;
396 case SCHED_EVENT_WAKEUP:
397 if (atom->wait_sem)
398 ret = sem_post(atom->wait_sem);
399 BUG_ON(ret);
400 break;
401 default:
402 BUG_ON(1);
403 }
404}
405
406static u64 get_cpu_usage_nsec_parent(void)
407{
408 struct rusage ru;
409 u64 sum;
410 int err;
411
412 err = getrusage(RUSAGE_SELF, &ru);
413 BUG_ON(err);
414
415 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
416 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
417
418 return sum;
419}
420
421static u64 get_cpu_usage_nsec_self(void)
422{
423 char filename [] = "/proc/1234567890/sched";
424 unsigned long msecs, nsecs;
425 char *line = NULL;
426 u64 total = 0;
427 size_t len = 0;
428 ssize_t chars;
429 FILE *file;
430 int ret;
431
432 sprintf(filename, "/proc/%d/sched", getpid());
433 file = fopen(filename, "r");
434 BUG_ON(!file);
435
436 while ((chars = getline(&line, &len, file)) != -1) {
437 ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n",
438 &msecs, &nsecs);
439 if (ret == 2) {
440 total = msecs*1e6 + nsecs;
441 break;
442 }
443 }
444 if (line)
445 free(line);
446 fclose(file);
447
448 return total;
449}
450
451static void *thread_func(void *ctx)
452{
453 struct task_desc *this_task = ctx;
454 u64 cpu_usage_0, cpu_usage_1;
455 unsigned long i, ret;
456 char comm2[22];
457
458 sprintf(comm2, ":%s", this_task->comm);
459 prctl(PR_SET_NAME, comm2);
460
461again:
462 ret = sem_post(&this_task->ready_for_work);
463 BUG_ON(ret);
464 ret = pthread_mutex_lock(&start_work_mutex);
465 BUG_ON(ret);
466 ret = pthread_mutex_unlock(&start_work_mutex);
467 BUG_ON(ret);
468
469 cpu_usage_0 = get_cpu_usage_nsec_self();
470
471 for (i = 0; i < this_task->nr_events; i++) {
472 this_task->curr_event = i;
473 process_sched_event(this_task, this_task->atoms[i]);
474 }
475
476 cpu_usage_1 = get_cpu_usage_nsec_self();
477 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
478
479 ret = sem_post(&this_task->work_done_sem);
480 BUG_ON(ret);
481
482 ret = pthread_mutex_lock(&work_done_wait_mutex);
483 BUG_ON(ret);
484 ret = pthread_mutex_unlock(&work_done_wait_mutex);
485 BUG_ON(ret);
486
487 goto again;
488}
489
490static void create_tasks(void)
491{
492 struct task_desc *task;
493 pthread_attr_t attr;
494 unsigned long i;
495 int err;
496
497 err = pthread_attr_init(&attr);
498 BUG_ON(err);
499 err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
500 BUG_ON(err);
501 err = pthread_mutex_lock(&start_work_mutex);
502 BUG_ON(err);
503 err = pthread_mutex_lock(&work_done_wait_mutex);
504 BUG_ON(err);
505 for (i = 0; i < nr_tasks; i++) {
506 task = tasks[i];
507 sem_init(&task->sleep_sem, 0, 0);
508 sem_init(&task->ready_for_work, 0, 0);
509 sem_init(&task->work_done_sem, 0, 0);
510 task->curr_event = 0;
511 err = pthread_create(&task->thread, &attr, thread_func, task);
512 BUG_ON(err);
513 }
514}
515
516static void wait_for_tasks(void)
517{
518 u64 cpu_usage_0, cpu_usage_1;
519 struct task_desc *task;
520 unsigned long i, ret;
521
522 start_time = get_nsecs();
523 cpu_usage = 0;
524 pthread_mutex_unlock(&work_done_wait_mutex);
525
526 for (i = 0; i < nr_tasks; i++) {
527 task = tasks[i];
528 ret = sem_wait(&task->ready_for_work);
529 BUG_ON(ret);
530 sem_init(&task->ready_for_work, 0, 0);
531 }
532 ret = pthread_mutex_lock(&work_done_wait_mutex);
533 BUG_ON(ret);
534
535 cpu_usage_0 = get_cpu_usage_nsec_parent();
536
537 pthread_mutex_unlock(&start_work_mutex);
538
539 for (i = 0; i < nr_tasks; i++) {
540 task = tasks[i];
541 ret = sem_wait(&task->work_done_sem);
542 BUG_ON(ret);
543 sem_init(&task->work_done_sem, 0, 0);
544 cpu_usage += task->cpu_usage;
545 task->cpu_usage = 0;
546 }
547
548 cpu_usage_1 = get_cpu_usage_nsec_parent();
549 if (!runavg_cpu_usage)
550 runavg_cpu_usage = cpu_usage;
551 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;
552
553 parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
554 if (!runavg_parent_cpu_usage)
555 runavg_parent_cpu_usage = parent_cpu_usage;
556 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
557 parent_cpu_usage)/10;
558
559 ret = pthread_mutex_lock(&start_work_mutex);
560 BUG_ON(ret);
561
562 for (i = 0; i < nr_tasks; i++) {
563 task = tasks[i];
564 sem_init(&task->sleep_sem, 0, 0);
565 task->curr_event = 0;
566 }
567}
568
569static void run_one_test(void)
570{
571 u64 T0, T1, delta, avg_delta, fluct, std_dev;
572
573 T0 = get_nsecs();
574 wait_for_tasks();
575 T1 = get_nsecs();
576
577 delta = T1 - T0;
578 sum_runtime += delta;
579 nr_runs++;
580
581 avg_delta = sum_runtime / nr_runs;
582 if (delta < avg_delta)
583 fluct = avg_delta - delta;
584 else
585 fluct = delta - avg_delta;
586 sum_fluct += fluct;
587 std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
588 if (!run_avg)
589 run_avg = delta;
590 run_avg = (run_avg*9 + delta)/10;
591
592 printf("#%-3ld: %0.3f, ",
593 nr_runs, (double)delta/1000000.0);
594
595 printf("ravg: %0.2f, ",
596 (double)run_avg/1e6);
597
598 printf("cpu: %0.2f / %0.2f",
599 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);
600
601#if 0
602 /*
603 * rusage statistics done by the parent, these are less
604 * accurate than the sum_exec_runtime based statistics:
605 */
606 printf(" [%0.2f / %0.2f]",
607 (double)parent_cpu_usage/1e6,
608 (double)runavg_parent_cpu_usage/1e6);
609#endif
610
611 printf("\n");
612
613 if (nr_sleep_corrections)
614 printf(" (%ld sleep corrections)\n", nr_sleep_corrections);
615 nr_sleep_corrections = 0;
616}
617
618static void test_calibrations(void)
619{
620 u64 T0, T1;
621
622 T0 = get_nsecs();
623 burn_nsecs(1e6);
624 T1 = get_nsecs();
625
626 printf("the run test took %Ld nsecs\n", T1-T0);
627
628 T0 = get_nsecs();
629 sleep_nsecs(1e6);
630 T1 = get_nsecs();
631
632 printf("the sleep test took %Ld nsecs\n", T1-T0);
633}
634
635static int
636process_comm_event(event_t *event, unsigned long offset, unsigned long head)
637{
638 struct thread *thread;
639
640 thread = threads__findnew(event->comm.pid, &threads, &last_match);
641
642 dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
643 (void *)(offset + head),
644 (void *)(long)(event->header.size),
645 event->comm.comm, event->comm.pid);
646
647 if (thread == NULL ||
648 thread__set_comm(thread, event->comm.comm)) {
649 dump_printf("problem processing perf_event_comm, skipping event.\n");
650 return -1;
651 }
652 total_comm++;
653
654 return 0;
655}
656
657
658struct raw_event_sample {
659 u32 size;
660 char data[0];
661};
662
663#define FILL_FIELD(ptr, field, event, data) \
664 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
665
666#define FILL_ARRAY(ptr, array, event, data) \
667do { \
668 void *__array = raw_field_ptr(event, #array, data); \
669 memcpy(ptr.array, __array, sizeof(ptr.array)); \
670} while(0)
671
672#define FILL_COMMON_FIELDS(ptr, event, data) \
673do { \
674 FILL_FIELD(ptr, common_type, event, data); \
675 FILL_FIELD(ptr, common_flags, event, data); \
676 FILL_FIELD(ptr, common_preempt_count, event, data); \
677 FILL_FIELD(ptr, common_pid, event, data); \
678 FILL_FIELD(ptr, common_tgid, event, data); \
679} while (0)
680
681
682
683struct trace_switch_event {
684 u32 size;
685
686 u16 common_type;
687 u8 common_flags;
688 u8 common_preempt_count;
689 u32 common_pid;
690 u32 common_tgid;
691
692 char prev_comm[16];
693 u32 prev_pid;
694 u32 prev_prio;
695 u64 prev_state;
696 char next_comm[16];
697 u32 next_pid;
698 u32 next_prio;
699};
700
701struct trace_runtime_event {
702 u32 size;
703
704 u16 common_type;
705 u8 common_flags;
706 u8 common_preempt_count;
707 u32 common_pid;
708 u32 common_tgid;
709
710 char comm[16];
711 u32 pid;
712 u64 runtime;
713 u64 vruntime;
714};
715
716struct trace_wakeup_event {
717 u32 size;
718
719 u16 common_type;
720 u8 common_flags;
721 u8 common_preempt_count;
722 u32 common_pid;
723 u32 common_tgid;
724
725 char comm[16];
726 u32 pid;
727
728 u32 prio;
729 u32 success;
730 u32 cpu;
731};
732
733struct trace_fork_event {
734 u32 size;
735
736 u16 common_type;
737 u8 common_flags;
738 u8 common_preempt_count;
739 u32 common_pid;
740 u32 common_tgid;
741
742 char parent_comm[16];
743 u32 parent_pid;
744 char child_comm[16];
745 u32 child_pid;
746};
747
748struct trace_sched_handler {
749 void (*switch_event)(struct trace_switch_event *,
750 struct event *,
751 int cpu,
752 u64 timestamp,
753 struct thread *thread);
754
755 void (*runtime_event)(struct trace_runtime_event *,
756 struct event *,
757 int cpu,
758 u64 timestamp,
759 struct thread *thread);
760
761 void (*wakeup_event)(struct trace_wakeup_event *,
762 struct event *,
763 int cpu,
764 u64 timestamp,
765 struct thread *thread);
766
767 void (*fork_event)(struct trace_fork_event *,
768 struct event *,
769 int cpu,
770 u64 timestamp,
771 struct thread *thread);
772};
773
774
775static void
776replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
777 struct event *event,
778 int cpu __used,
779 u64 timestamp __used,
780 struct thread *thread __used)
781{
782 struct task_desc *waker, *wakee;
783
784 if (verbose) {
785 printf("sched_wakeup event %p\n", event);
786
787 printf(" ... pid %d woke up %s/%d\n",
788 wakeup_event->common_pid,
789 wakeup_event->comm,
790 wakeup_event->pid);
791 }
792
793 waker = register_pid(wakeup_event->common_pid, "<unknown>");
794 wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
795
796 add_sched_event_wakeup(waker, timestamp, wakee);
797}
798
799static u64 cpu_last_switched[MAX_CPUS];
800
801static void
802replay_switch_event(struct trace_switch_event *switch_event,
803 struct event *event,
804 int cpu,
805 u64 timestamp,
806 struct thread *thread __used)
807{
808 struct task_desc *prev, *next;
809 u64 timestamp0;
810 s64 delta;
811
812 if (verbose)
813 printf("sched_switch event %p\n", event);
814
815 if (cpu >= MAX_CPUS || cpu < 0)
816 return;
817
818 timestamp0 = cpu_last_switched[cpu];
819 if (timestamp0)
820 delta = timestamp - timestamp0;
821 else
822 delta = 0;
823
824 if (delta < 0)
825 die("hm, delta: %Ld < 0 ?\n", delta);
826
827 if (verbose) {
828 printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n",
829 switch_event->prev_comm, switch_event->prev_pid,
830 switch_event->next_comm, switch_event->next_pid,
831 delta);
832 }
833
834 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
835 next = register_pid(switch_event->next_pid, switch_event->next_comm);
836
837 cpu_last_switched[cpu] = timestamp;
838
839 add_sched_event_run(prev, timestamp, delta);
840 add_sched_event_sleep(prev, timestamp, switch_event->prev_state);
841}
842
843
844static void
845replay_fork_event(struct trace_fork_event *fork_event,
846 struct event *event,
847 int cpu __used,
848 u64 timestamp __used,
849 struct thread *thread __used)
850{
851 if (verbose) {
852 printf("sched_fork event %p\n", event);
853 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
854 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
855 }
856 register_pid(fork_event->parent_pid, fork_event->parent_comm);
857 register_pid(fork_event->child_pid, fork_event->child_comm);
858}
859
860static struct trace_sched_handler replay_ops = {
861 .wakeup_event = replay_wakeup_event,
862 .switch_event = replay_switch_event,
863 .fork_event = replay_fork_event,
864};
865
866struct sort_dimension {
867 const char *name;
868 sort_fn_t cmp;
869 struct list_head list;
870};
871
872static LIST_HEAD(cmp_pid);
873
874static int
875thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
876{
877 struct sort_dimension *sort;
878 int ret = 0;
879
880 BUG_ON(list_empty(list));
881
882 list_for_each_entry(sort, list, list) {
883 ret = sort->cmp(l, r);
884 if (ret)
885 return ret;
886 }
887
888 return ret;
889}
890
891static struct work_atoms *
892thread_atoms_search(struct rb_root *root, struct thread *thread,
893 struct list_head *sort_list)
894{
895 struct rb_node *node = root->rb_node;
896 struct work_atoms key = { .thread = thread };
897
898 while (node) {
899 struct work_atoms *atoms;
900 int cmp;
901
902 atoms = container_of(node, struct work_atoms, node);
903
904 cmp = thread_lat_cmp(sort_list, &key, atoms);
905 if (cmp > 0)
906 node = node->rb_left;
907 else if (cmp < 0)
908 node = node->rb_right;
909 else {
910 BUG_ON(thread != atoms->thread);
911 return atoms;
912 }
913 }
914 return NULL;
915}
916
917static void
918__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
919 struct list_head *sort_list)
920{
921 struct rb_node **new = &(root->rb_node), *parent = NULL;
922
923 while (*new) {
924 struct work_atoms *this;
925 int cmp;
926
927 this = container_of(*new, struct work_atoms, node);
928 parent = *new;
929
930 cmp = thread_lat_cmp(sort_list, data, this);
931
932 if (cmp > 0)
933 new = &((*new)->rb_left);
934 else
935 new = &((*new)->rb_right);
936 }
937
938 rb_link_node(&data->node, parent, new);
939 rb_insert_color(&data->node, root);
940}
941
942static void thread_atoms_insert(struct thread *thread)
943{
944 struct work_atoms *atoms;
945
946 atoms = calloc(sizeof(*atoms), 1);
947 if (!atoms)
948 die("No memory");
949
950 atoms->thread = thread;
951 INIT_LIST_HEAD(&atoms->work_list);
952 __thread_latency_insert(&atom_root, atoms, &cmp_pid);
953}
954
955static void
956latency_fork_event(struct trace_fork_event *fork_event __used,
957 struct event *event __used,
958 int cpu __used,
959 u64 timestamp __used,
960 struct thread *thread __used)
961{
962 /* should insert the newcomer */
963}
964
965__used
966static char sched_out_state(struct trace_switch_event *switch_event)
967{
968 const char *str = TASK_STATE_TO_CHAR_STR;
969
970 return str[switch_event->prev_state];
971}
972
973static void
974add_sched_out_event(struct work_atoms *atoms,
975 char run_state,
976 u64 timestamp)
977{
978 struct work_atom *atom;
979
980 atom = calloc(sizeof(*atom), 1);
981 if (!atom)
982 die("Non memory");
983
984 atom->sched_out_time = timestamp;
985
986 if (run_state == 'R') {
987 atom->state = THREAD_WAIT_CPU;
988 atom->wake_up_time = atom->sched_out_time;
989 }
990
991 list_add_tail(&atom->list, &atoms->work_list);
992}
993
994static void
995add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used)
996{
997 struct work_atom *atom;
998
999 BUG_ON(list_empty(&atoms->work_list));
1000
1001 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1002
1003 atom->runtime += delta;
1004 atoms->total_runtime += delta;
1005}
1006
1007static void
1008add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
1009{
1010 struct work_atom *atom;
1011 u64 delta;
1012
1013 if (list_empty(&atoms->work_list))
1014 return;
1015
1016 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1017
1018 if (atom->state != THREAD_WAIT_CPU)
1019 return;
1020
1021 if (timestamp < atom->wake_up_time) {
1022 atom->state = THREAD_IGNORE;
1023 return;
1024 }
1025
1026 atom->state = THREAD_SCHED_IN;
1027 atom->sched_in_time = timestamp;
1028
1029 delta = atom->sched_in_time - atom->wake_up_time;
1030 atoms->total_lat += delta;
1031 if (delta > atoms->max_lat)
1032 atoms->max_lat = delta;
1033 atoms->nb_atoms++;
1034}
1035
1036static void
1037latency_switch_event(struct trace_switch_event *switch_event,
1038 struct event *event __used,
1039 int cpu,
1040 u64 timestamp,
1041 struct thread *thread __used)
1042{
1043 struct work_atoms *out_events, *in_events;
1044 struct thread *sched_out, *sched_in;
1045 u64 timestamp0;
1046 s64 delta;
1047
1048 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1049
1050 timestamp0 = cpu_last_switched[cpu];
1051 cpu_last_switched[cpu] = timestamp;
1052 if (timestamp0)
1053 delta = timestamp - timestamp0;
1054 else
1055 delta = 0;
1056
1057 if (delta < 0)
1058 die("hm, delta: %Ld < 0 ?\n", delta);
1059
1060
1061 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
1062 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
1063
1064 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1065 if (!out_events) {
1066 thread_atoms_insert(sched_out);
1067 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1068 if (!out_events)
1069 die("out-event: Internal tree error");
1070 }
1071 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp);
1072
1073 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
1074 if (!in_events) {
1075 thread_atoms_insert(sched_in);
1076 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
1077 if (!in_events)
1078 die("in-event: Internal tree error");
1079 /*
1080 * Take came in we have not heard about yet,
1081 * add in an initial atom in runnable state:
1082 */
1083 add_sched_out_event(in_events, 'R', timestamp);
1084 }
1085 add_sched_in_event(in_events, timestamp);
1086}
1087
1088static void
1089latency_runtime_event(struct trace_runtime_event *runtime_event,
1090 struct event *event __used,
1091 int cpu,
1092 u64 timestamp,
1093 struct thread *this_thread __used)
1094{
1095 struct work_atoms *atoms;
1096 struct thread *thread;
1097
1098 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1099
1100 thread = threads__findnew(runtime_event->pid, &threads, &last_match);
1101 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1102 if (!atoms) {
1103 thread_atoms_insert(thread);
1104 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1105 if (!atoms)
1106 die("in-event: Internal tree error");
1107 add_sched_out_event(atoms, 'R', timestamp);
1108 }
1109
1110 add_runtime_event(atoms, runtime_event->runtime, timestamp);
1111}
1112
1113static void
1114latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1115 struct event *__event __used,
1116 int cpu __used,
1117 u64 timestamp,
1118 struct thread *thread __used)
1119{
1120 struct work_atoms *atoms;
1121 struct work_atom *atom;
1122 struct thread *wakee;
1123
1124 /* Note for later, it may be interesting to observe the failing cases */
1125 if (!wakeup_event->success)
1126 return;
1127
1128 wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
1129 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1130 if (!atoms) {
1131 thread_atoms_insert(wakee);
1132 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1133 if (!atoms)
1134 die("wakeup-event: Internal tree error");
1135 add_sched_out_event(atoms, 'S', timestamp);
1136 }
1137
1138 BUG_ON(list_empty(&atoms->work_list));
1139
1140 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1141
1142 if (atom->state != THREAD_SLEEPING)
1143 nr_state_machine_bugs++;
1144
1145 nr_timestamps++;
1146 if (atom->sched_out_time > timestamp) {
1147 nr_unordered_timestamps++;
1148 return;
1149 }
1150
1151 atom->state = THREAD_WAIT_CPU;
1152 atom->wake_up_time = timestamp;
1153}
1154
1155static struct trace_sched_handler lat_ops = {
1156 .wakeup_event = latency_wakeup_event,
1157 .switch_event = latency_switch_event,
1158 .runtime_event = latency_runtime_event,
1159 .fork_event = latency_fork_event,
1160};
1161
1162static void output_lat_thread(struct work_atoms *work_list)
1163{
1164 int i;
1165 int ret;
1166 u64 avg;
1167
1168 if (!work_list->nb_atoms)
1169 return;
1170 /*
1171 * Ignore idle threads:
1172 */
1173 if (!strcmp(work_list->thread->comm, "swapper"))
1174 return;
1175
1176 all_runtime += work_list->total_runtime;
1177 all_count += work_list->nb_atoms;
1178
1179 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid);
1180
1181 for (i = 0; i < 24 - ret; i++)
1182 printf(" ");
1183
1184 avg = work_list->total_lat / work_list->nb_atoms;
1185
1186 printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n",
1187 (double)work_list->total_runtime / 1e6,
1188 work_list->nb_atoms, (double)avg / 1e6,
1189 (double)work_list->max_lat / 1e6);
1190}
1191
1192static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
1193{
1194 if (l->thread->pid < r->thread->pid)
1195 return -1;
1196 if (l->thread->pid > r->thread->pid)
1197 return 1;
1198
1199 return 0;
1200}
1201
1202static struct sort_dimension pid_sort_dimension = {
1203 .name = "pid",
1204 .cmp = pid_cmp,
1205};
1206
1207static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
1208{
1209 u64 avgl, avgr;
1210
1211 if (!l->nb_atoms)
1212 return -1;
1213
1214 if (!r->nb_atoms)
1215 return 1;
1216
1217 avgl = l->total_lat / l->nb_atoms;
1218 avgr = r->total_lat / r->nb_atoms;
1219
1220 if (avgl < avgr)
1221 return -1;
1222 if (avgl > avgr)
1223 return 1;
1224
1225 return 0;
1226}
1227
1228static struct sort_dimension avg_sort_dimension = {
1229 .name = "avg",
1230 .cmp = avg_cmp,
1231};
1232
1233static int max_cmp(struct work_atoms *l, struct work_atoms *r)
1234{
1235 if (l->max_lat < r->max_lat)
1236 return -1;
1237 if (l->max_lat > r->max_lat)
1238 return 1;
1239
1240 return 0;
1241}
1242
1243static struct sort_dimension max_sort_dimension = {
1244 .name = "max",
1245 .cmp = max_cmp,
1246};
1247
1248static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
1249{
1250 if (l->nb_atoms < r->nb_atoms)
1251 return -1;
1252 if (l->nb_atoms > r->nb_atoms)
1253 return 1;
1254
1255 return 0;
1256}
1257
1258static struct sort_dimension switch_sort_dimension = {
1259 .name = "switch",
1260 .cmp = switch_cmp,
1261};
1262
1263static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
1264{
1265 if (l->total_runtime < r->total_runtime)
1266 return -1;
1267 if (l->total_runtime > r->total_runtime)
1268 return 1;
1269
1270 return 0;
1271}
1272
1273static struct sort_dimension runtime_sort_dimension = {
1274 .name = "runtime",
1275 .cmp = runtime_cmp,
1276};
1277
1278static struct sort_dimension *available_sorts[] = {
1279 &pid_sort_dimension,
1280 &avg_sort_dimension,
1281 &max_sort_dimension,
1282 &switch_sort_dimension,
1283 &runtime_sort_dimension,
1284};
1285
1286#define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *))
1287
1288static LIST_HEAD(sort_list);
1289
1290static int sort_dimension__add(char *tok, struct list_head *list)
1291{
1292 int i;
1293
1294 for (i = 0; i < NB_AVAILABLE_SORTS; i++) {
1295 if (!strcmp(available_sorts[i]->name, tok)) {
1296 list_add_tail(&available_sorts[i]->list, list);
1297
1298 return 0;
1299 }
1300 }
1301
1302 return -1;
1303}
1304
1305static void setup_sorting(void);
1306
1307static void sort_lat(void)
1308{
1309 struct rb_node *node;
1310
1311 for (;;) {
1312 struct work_atoms *data;
1313 node = rb_first(&atom_root);
1314 if (!node)
1315 break;
1316
1317 rb_erase(node, &atom_root);
1318 data = rb_entry(node, struct work_atoms, node);
1319 __thread_latency_insert(&sorted_atom_root, data, &sort_list);
1320 }
1321}
1322
1323static struct trace_sched_handler *trace_handler;
1324
1325static void
1326process_sched_wakeup_event(struct raw_event_sample *raw,
1327 struct event *event,
1328 int cpu __used,
1329 u64 timestamp __used,
1330 struct thread *thread __used)
1331{
1332 struct trace_wakeup_event wakeup_event;
1333
1334 FILL_COMMON_FIELDS(wakeup_event, event, raw->data);
1335
1336 FILL_ARRAY(wakeup_event, comm, event, raw->data);
1337 FILL_FIELD(wakeup_event, pid, event, raw->data);
1338 FILL_FIELD(wakeup_event, prio, event, raw->data);
1339 FILL_FIELD(wakeup_event, success, event, raw->data);
1340 FILL_FIELD(wakeup_event, cpu, event, raw->data);
1341
1342 if (trace_handler->wakeup_event)
1343 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
1344}
1345
1346/*
1347 * Track the current task - that way we can know whether there's any
1348 * weird events, such as a task being switched away that is not current.
1349 */
1350static int max_cpu;
1351
1352static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
1353
1354static struct thread *curr_thread[MAX_CPUS];
1355
1356static char next_shortname1 = 'A';
1357static char next_shortname2 = '0';
1358
1359static void
1360map_switch_event(struct trace_switch_event *switch_event,
1361 struct event *event __used,
1362 int this_cpu,
1363 u64 timestamp,
1364 struct thread *thread __used)
1365{
1366 struct thread *sched_out, *sched_in;
1367 int new_shortname;
1368 u64 timestamp0;
1369 s64 delta;
1370 int cpu;
1371
1372 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
1373
1374 if (this_cpu > max_cpu)
1375 max_cpu = this_cpu;
1376
1377 timestamp0 = cpu_last_switched[this_cpu];
1378 cpu_last_switched[this_cpu] = timestamp;
1379 if (timestamp0)
1380 delta = timestamp - timestamp0;
1381 else
1382 delta = 0;
1383
1384 if (delta < 0)
1385 die("hm, delta: %Ld < 0 ?\n", delta);
1386
1387
1388 sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
1389 sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
1390
1391 curr_thread[this_cpu] = sched_in;
1392
1393 printf(" ");
1394
1395 new_shortname = 0;
1396 if (!sched_in->shortname[0]) {
1397 sched_in->shortname[0] = next_shortname1;
1398 sched_in->shortname[1] = next_shortname2;
1399
1400 if (next_shortname1 < 'Z') {
1401 next_shortname1++;
1402 } else {
1403 next_shortname1='A';
1404 if (next_shortname2 < '9') {
1405 next_shortname2++;
1406 } else {
1407 next_shortname2='0';
1408 }
1409 }
1410 new_shortname = 1;
1411 }
1412
1413 for (cpu = 0; cpu <= max_cpu; cpu++) {
1414 if (cpu != this_cpu)
1415 printf(" ");
1416 else
1417 printf("*");
1418
1419 if (curr_thread[cpu]) {
1420 if (curr_thread[cpu]->pid)
1421 printf("%2s ", curr_thread[cpu]->shortname);
1422 else
1423 printf(". ");
1424 } else
1425 printf(" ");
1426 }
1427
1428 printf(" %12.6f secs ", (double)timestamp/1e9);
1429 if (new_shortname) {
1430 printf("%s => %s:%d\n",
1431 sched_in->shortname, sched_in->comm, sched_in->pid);
1432 } else {
1433 printf("\n");
1434 }
1435}
1436
1437
1438static void
1439process_sched_switch_event(struct raw_event_sample *raw,
1440 struct event *event,
1441 int this_cpu,
1442 u64 timestamp __used,
1443 struct thread *thread __used)
1444{
1445 struct trace_switch_event switch_event;
1446
1447 FILL_COMMON_FIELDS(switch_event, event, raw->data);
1448
1449 FILL_ARRAY(switch_event, prev_comm, event, raw->data);
1450 FILL_FIELD(switch_event, prev_pid, event, raw->data);
1451 FILL_FIELD(switch_event, prev_prio, event, raw->data);
1452 FILL_FIELD(switch_event, prev_state, event, raw->data);
1453 FILL_ARRAY(switch_event, next_comm, event, raw->data);
1454 FILL_FIELD(switch_event, next_pid, event, raw->data);
1455 FILL_FIELD(switch_event, next_prio, event, raw->data);
1456
1457 if (curr_pid[this_cpu] != (u32)-1) {
1458 /*
1459 * Are we trying to switch away a PID that is
1460 * not current?
1461 */
1462 if (curr_pid[this_cpu] != switch_event.prev_pid)
1463 nr_context_switch_bugs++;
1464 }
1465 if (trace_handler->switch_event)
1466 trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread);
1467
1468 curr_pid[this_cpu] = switch_event.next_pid;
1469}
1470
1471static void
1472process_sched_runtime_event(struct raw_event_sample *raw,
1473 struct event *event,
1474 int cpu __used,
1475 u64 timestamp __used,
1476 struct thread *thread __used)
1477{
1478 struct trace_runtime_event runtime_event;
1479
1480 FILL_ARRAY(runtime_event, comm, event, raw->data);
1481 FILL_FIELD(runtime_event, pid, event, raw->data);
1482 FILL_FIELD(runtime_event, runtime, event, raw->data);
1483 FILL_FIELD(runtime_event, vruntime, event, raw->data);
1484
1485 if (trace_handler->runtime_event)
1486 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
1487}
1488
1489static void
1490process_sched_fork_event(struct raw_event_sample *raw,
1491 struct event *event,
1492 int cpu __used,
1493 u64 timestamp __used,
1494 struct thread *thread __used)
1495{
1496 struct trace_fork_event fork_event;
1497
1498 FILL_COMMON_FIELDS(fork_event, event, raw->data);
1499
1500 FILL_ARRAY(fork_event, parent_comm, event, raw->data);
1501 FILL_FIELD(fork_event, parent_pid, event, raw->data);
1502 FILL_ARRAY(fork_event, child_comm, event, raw->data);
1503 FILL_FIELD(fork_event, child_pid, event, raw->data);
1504
1505 if (trace_handler->fork_event)
1506 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
1507}
1508
1509static void
1510process_sched_exit_event(struct event *event,
1511 int cpu __used,
1512 u64 timestamp __used,
1513 struct thread *thread __used)
1514{
1515 if (verbose)
1516 printf("sched_exit event %p\n", event);
1517}
1518
1519static void
1520process_raw_event(event_t *raw_event __used, void *more_data,
1521 int cpu, u64 timestamp, struct thread *thread)
1522{
1523 struct raw_event_sample *raw = more_data;
1524 struct event *event;
1525 int type;
1526
1527 type = trace_parse_common_type(raw->data);
1528 event = trace_find_event(type);
1529
1530 if (!strcmp(event->name, "sched_switch"))
1531 process_sched_switch_event(raw, event, cpu, timestamp, thread);
1532 if (!strcmp(event->name, "sched_stat_runtime"))
1533 process_sched_runtime_event(raw, event, cpu, timestamp, thread);
1534 if (!strcmp(event->name, "sched_wakeup"))
1535 process_sched_wakeup_event(raw, event, cpu, timestamp, thread);
1536 if (!strcmp(event->name, "sched_wakeup_new"))
1537 process_sched_wakeup_event(raw, event, cpu, timestamp, thread);
1538 if (!strcmp(event->name, "sched_process_fork"))
1539 process_sched_fork_event(raw, event, cpu, timestamp, thread);
1540 if (!strcmp(event->name, "sched_process_exit"))
1541 process_sched_exit_event(event, cpu, timestamp, thread);
1542}
1543
1544static int
1545process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1546{
1547 char level;
1548 int show = 0;
1549 struct dso *dso = NULL;
1550 struct thread *thread;
1551 u64 ip = event->ip.ip;
1552 u64 timestamp = -1;
1553 u32 cpu = -1;
1554 u64 period = 1;
1555 void *more_data = event->ip.__more_data;
1556 int cpumode;
1557
1558 thread = threads__findnew(event->ip.pid, &threads, &last_match);
1559
1560 if (sample_type & PERF_SAMPLE_TIME) {
1561 timestamp = *(u64 *)more_data;
1562 more_data += sizeof(u64);
1563 }
1564
1565 if (sample_type & PERF_SAMPLE_CPU) {
1566 cpu = *(u32 *)more_data;
1567 more_data += sizeof(u32);
1568 more_data += sizeof(u32); /* reserved */
1569 }
1570
1571 if (sample_type & PERF_SAMPLE_PERIOD) {
1572 period = *(u64 *)more_data;
1573 more_data += sizeof(u64);
1574 }
1575
1576 dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1577 (void *)(offset + head),
1578 (void *)(long)(event->header.size),
1579 event->header.misc,
1580 event->ip.pid, event->ip.tid,
1581 (void *)(long)ip,
1582 (long long)period);
1583
1584 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1585
1586 if (thread == NULL) {
1587 eprintf("problem processing %d event, skipping it.\n",
1588 event->header.type);
1589 return -1;
1590 }
1591
1592 cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
1593
1594 if (cpumode == PERF_EVENT_MISC_KERNEL) {
1595 show = SHOW_KERNEL;
1596 level = 'k';
1597
1598 dso = kernel_dso;
1599
1600 dump_printf(" ...... dso: %s\n", dso->name);
1601
1602 } else if (cpumode == PERF_EVENT_MISC_USER) {
1603
1604 show = SHOW_USER;
1605 level = '.';
1606
1607 } else {
1608 show = SHOW_HV;
1609 level = 'H';
1610
1611 dso = hypervisor_dso;
1612
1613 dump_printf(" ...... dso: [hypervisor]\n");
1614 }
1615
1616 if (sample_type & PERF_SAMPLE_RAW)
1617 process_raw_event(event, more_data, cpu, timestamp, thread);
1618
1619 return 0;
1620}
1621
1622static int
1623process_event(event_t *event, unsigned long offset, unsigned long head)
1624{
1625 trace_event(event);
1626
1627 nr_events++;
1628 switch (event->header.type) {
1629 case PERF_EVENT_MMAP:
1630 return 0;
1631 case PERF_EVENT_LOST:
1632 nr_lost_chunks++;
1633 nr_lost_events += event->lost.lost;
1634 return 0;
1635
1636 case PERF_EVENT_COMM:
1637 return process_comm_event(event, offset, head);
1638
1639 case PERF_EVENT_EXIT ... PERF_EVENT_READ:
1640 return 0;
1641
1642 case PERF_EVENT_SAMPLE:
1643 return process_sample_event(event, offset, head);
1644
1645 case PERF_EVENT_MAX:
1646 default:
1647 return -1;
1648 }
1649
1650 return 0;
1651}
1652
1653static int read_events(void)
1654{
1655 int ret, rc = EXIT_FAILURE;
1656 unsigned long offset = 0;
1657 unsigned long head = 0;
1658 struct stat perf_stat;
1659 event_t *event;
1660 uint32_t size;
1661 char *buf;
1662
1663 trace_report();
1664 register_idle_thread(&threads, &last_match);
1665
1666 input = open(input_name, O_RDONLY);
1667 if (input < 0) {
1668 perror("failed to open file");
1669 exit(-1);
1670 }
1671
1672 ret = fstat(input, &perf_stat);
1673 if (ret < 0) {
1674 perror("failed to stat file");
1675 exit(-1);
1676 }
1677
1678 if (!perf_stat.st_size) {
1679 fprintf(stderr, "zero-sized file, nothing to do!\n");
1680 exit(0);
1681 }
1682 header = perf_header__read(input);
1683 head = header->data_offset;
1684 sample_type = perf_header__sample_type(header);
1685
1686 if (!(sample_type & PERF_SAMPLE_RAW))
1687 die("No trace sample to read. Did you call perf record "
1688 "without -R?");
1689
1690 if (load_kernel() < 0) {
1691 perror("failed to load kernel symbols");
1692 return EXIT_FAILURE;
1693 }
1694
1695remap:
1696 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1697 MAP_SHARED, input, offset);
1698 if (buf == MAP_FAILED) {
1699 perror("failed to mmap file");
1700 exit(-1);
1701 }
1702
1703more:
1704 event = (event_t *)(buf + head);
1705
1706 size = event->header.size;
1707 if (!size)
1708 size = 8;
1709
1710 if (head + event->header.size >= page_size * mmap_window) {
1711 unsigned long shift = page_size * (head / page_size);
1712 int res;
1713
1714 res = munmap(buf, page_size * mmap_window);
1715 assert(res == 0);
1716
1717 offset += shift;
1718 head -= shift;
1719 goto remap;
1720 }
1721
1722 size = event->header.size;
1723
1724
1725 if (!size || process_event(event, offset, head) < 0) {
1726
1727 /*
1728 * assume we lost track of the stream, check alignment, and
1729 * increment a single u64 in the hope to catch on again 'soon'.
1730 */
1731
1732 if (unlikely(head & 7))
1733 head &= ~7ULL;
1734
1735 size = 8;
1736 }
1737
1738 head += size;
1739
1740 if (offset + head < (unsigned long)perf_stat.st_size)
1741 goto more;
1742
1743 rc = EXIT_SUCCESS;
1744 close(input);
1745
1746 return rc;
1747}
1748
1749static void print_bad_events(void)
1750{
1751 if (nr_unordered_timestamps && nr_timestamps) {
1752 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
1753 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
1754 nr_unordered_timestamps, nr_timestamps);
1755 }
1756 if (nr_lost_events && nr_events) {
1757 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
1758 (double)nr_lost_events/(double)nr_events*100.0,
1759 nr_lost_events, nr_events, nr_lost_chunks);
1760 }
1761 if (nr_state_machine_bugs && nr_timestamps) {
1762 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
1763 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
1764 nr_state_machine_bugs, nr_timestamps);
1765 if (nr_lost_events)
1766 printf(" (due to lost events?)");
1767 printf("\n");
1768 }
1769 if (nr_context_switch_bugs && nr_timestamps) {
1770 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
1771 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
1772 nr_context_switch_bugs, nr_timestamps);
1773 if (nr_lost_events)
1774 printf(" (due to lost events?)");
1775 printf("\n");
1776 }
1777}
1778
1779static void __cmd_lat(void)
1780{
1781 struct rb_node *next;
1782
1783 setup_pager();
1784 read_events();
1785 sort_lat();
1786
1787 printf("\n -----------------------------------------------------------------------------------------\n");
1788 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
1789 printf(" -----------------------------------------------------------------------------------------\n");
1790
1791 next = rb_first(&sorted_atom_root);
1792
1793 while (next) {
1794 struct work_atoms *work_list;
1795
1796 work_list = rb_entry(next, struct work_atoms, node);
1797 output_lat_thread(work_list);
1798 next = rb_next(next);
1799 }
1800
1801 printf(" -----------------------------------------------------------------------------------------\n");
1802 printf(" TOTAL: |%11.3f ms |%9Ld |\n",
1803 (double)all_runtime/1e6, all_count);
1804
1805 printf(" ---------------------------------------------------\n");
1806
1807 print_bad_events();
1808 printf("\n");
1809
1810}
1811
1812static struct trace_sched_handler map_ops = {
1813 .wakeup_event = NULL,
1814 .switch_event = map_switch_event,
1815 .runtime_event = NULL,
1816 .fork_event = NULL,
1817};
1818
1819static void __cmd_map(void)
1820{
1821 max_cpu = sysconf(_SC_NPROCESSORS_CONF);
1822
1823 setup_pager();
1824 read_events();
1825 print_bad_events();
1826}
1827
1828static void __cmd_replay(void)
1829{
1830 unsigned long i;
1831
1832 calibrate_run_measurement_overhead();
1833 calibrate_sleep_measurement_overhead();
1834
1835 test_calibrations();
1836
1837 read_events();
1838
1839 printf("nr_run_events: %ld\n", nr_run_events);
1840 printf("nr_sleep_events: %ld\n", nr_sleep_events);
1841 printf("nr_wakeup_events: %ld\n", nr_wakeup_events);
1842
1843 if (targetless_wakeups)
1844 printf("target-less wakeups: %ld\n", targetless_wakeups);
1845 if (multitarget_wakeups)
1846 printf("multi-target wakeups: %ld\n", multitarget_wakeups);
1847 if (nr_run_events_optimized)
1848 printf("run atoms optimized: %ld\n",
1849 nr_run_events_optimized);
1850
1851 print_task_traces();
1852 add_cross_task_wakeups();
1853
1854 create_tasks();
1855 printf("------------------------------------------------------------\n");
1856 for (i = 0; i < replay_repeat; i++)
1857 run_one_test();
1858}
1859
1860
1861static const char * const sched_usage[] = {
1862 "perf sched [<options>] {record|latency|map|replay|trace}",
1863 NULL
1864};
1865
1866static const struct option sched_options[] = {
1867 OPT_STRING('i', "input", &input_name, "file",
1868 "input file name"),
1869 OPT_BOOLEAN('v', "verbose", &verbose,
1870 "be more verbose (show symbol address, etc)"),
1871 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1872 "dump raw trace in ASCII"),
1873 OPT_END()
1874};
1875
1876static const char * const latency_usage[] = {
1877 "perf sched latency [<options>]",
1878 NULL
1879};
1880
1881static const struct option latency_options[] = {
1882 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1883 "sort by key(s): runtime, switch, avg, max"),
1884 OPT_BOOLEAN('v', "verbose", &verbose,
1885 "be more verbose (show symbol address, etc)"),
1886 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1887 "dump raw trace in ASCII"),
1888 OPT_END()
1889};
1890
1891static const char * const replay_usage[] = {
1892 "perf sched replay [<options>]",
1893 NULL
1894};
1895
1896static const struct option replay_options[] = {
1897 OPT_INTEGER('r', "repeat", &replay_repeat,
1898 "repeat the workload replay N times (-1: infinite)"),
1899 OPT_BOOLEAN('v', "verbose", &verbose,
1900 "be more verbose (show symbol address, etc)"),
1901 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1902 "dump raw trace in ASCII"),
1903 OPT_END()
1904};
1905
1906static void setup_sorting(void)
1907{
1908 char *tmp, *tok, *str = strdup(sort_order);
1909
1910 for (tok = strtok_r(str, ", ", &tmp);
1911 tok; tok = strtok_r(NULL, ", ", &tmp)) {
1912 if (sort_dimension__add(tok, &sort_list) < 0) {
1913 error("Unknown --sort key: `%s'", tok);
1914 usage_with_options(latency_usage, latency_options);
1915 }
1916 }
1917
1918 free(str);
1919
1920 sort_dimension__add((char *)"pid", &cmp_pid);
1921}
1922
1923static const char *record_args[] = {
1924 "record",
1925 "-a",
1926 "-R",
1927 "-M",
1928 "-f",
1929 "-m", "1024",
1930 "-c", "1",
1931 "-e", "sched:sched_switch:r",
1932 "-e", "sched:sched_stat_wait:r",
1933 "-e", "sched:sched_stat_sleep:r",
1934 "-e", "sched:sched_stat_iowait:r",
1935 "-e", "sched:sched_stat_runtime:r",
1936 "-e", "sched:sched_process_exit:r",
1937 "-e", "sched:sched_process_fork:r",
1938 "-e", "sched:sched_wakeup:r",
1939 "-e", "sched:sched_migrate_task:r",
1940};
1941
1942static int __cmd_record(int argc, const char **argv)
1943{
1944 unsigned int rec_argc, i, j;
1945 const char **rec_argv;
1946
1947 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1948 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1949
1950 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1951 rec_argv[i] = strdup(record_args[i]);
1952
1953 for (j = 1; j < (unsigned int)argc; j++, i++)
1954 rec_argv[i] = argv[j];
1955
1956 BUG_ON(i != rec_argc);
1957
1958 return cmd_record(i, rec_argv, NULL);
1959}
1960
1961int cmd_sched(int argc, const char **argv, const char *prefix __used)
1962{
1963 symbol__init();
1964 page_size = getpagesize();
1965
1966 argc = parse_options(argc, argv, sched_options, sched_usage,
1967 PARSE_OPT_STOP_AT_NON_OPTION);
1968 if (!argc)
1969 usage_with_options(sched_usage, sched_options);
1970
1971 if (!strncmp(argv[0], "rec", 3)) {
1972 return __cmd_record(argc, argv);
1973 } else if (!strncmp(argv[0], "lat", 3)) {
1974 trace_handler = &lat_ops;
1975 if (argc > 1) {
1976 argc = parse_options(argc, argv, latency_options, latency_usage, 0);
1977 if (argc)
1978 usage_with_options(latency_usage, latency_options);
1979 }
1980 setup_sorting();
1981 __cmd_lat();
1982 } else if (!strcmp(argv[0], "map")) {
1983 trace_handler = &map_ops;
1984 setup_sorting();
1985 __cmd_map();
1986 } else if (!strncmp(argv[0], "rep", 3)) {
1987 trace_handler = &replay_ops;
1988 if (argc) {
1989 argc = parse_options(argc, argv, replay_options, replay_usage, 0);
1990 if (argc)
1991 usage_with_options(replay_usage, replay_options);
1992 }
1993 __cmd_replay();
1994 } else if (!strcmp(argv[0], "trace")) {
1995 /*
1996 * Aliased to 'perf trace' for now:
1997 */
1998 return cmd_trace(argc, argv, prefix);
1999 } else {
2000 usage_with_options(sched_usage, sched_options);
2001 }
2002
2003 return 0;
2004}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
new file mode 100644
index 000000000000..58d737ec8f5e
--- /dev/null
+++ b/tools/perf/builtin-timechart.c
@@ -0,0 +1,1151 @@
1/*
2 * builtin-timechart.c - make an svg timechart of system activity
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * Authors:
7 * Arjan van de Ven <arjan@linux.intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
12 * of the License.
13 */
14
15#include "builtin.h"
16
17#include "util/util.h"
18
19#include "util/color.h"
20#include <linux/list.h>
21#include "util/cache.h"
22#include <linux/rbtree.h>
23#include "util/symbol.h"
24#include "util/string.h"
25#include "util/callchain.h"
26#include "util/strlist.h"
27
28#include "perf.h"
29#include "util/header.h"
30#include "util/parse-options.h"
31#include "util/parse-events.h"
32#include "util/svghelper.h"
33
34static char const *input_name = "perf.data";
35static char const *output_name = "output.svg";
36
37
38static unsigned long page_size;
39static unsigned long mmap_window = 32;
40static u64 sample_type;
41
42static unsigned int numcpus;
43static u64 min_freq; /* Lowest CPU frequency seen */
44static u64 max_freq; /* Highest CPU frequency seen */
45static u64 turbo_frequency;
46
47static u64 first_time, last_time;
48
49
50static struct perf_header *header;
51
52struct per_pid;
53struct per_pidcomm;
54
55struct cpu_sample;
56struct power_event;
57struct wake_event;
58
59struct sample_wrapper;
60
61/*
62 * Datastructure layout:
63 * We keep an list of "pid"s, matching the kernels notion of a task struct.
64 * Each "pid" entry, has a list of "comm"s.
65 * this is because we want to track different programs different, while
66 * exec will reuse the original pid (by design).
67 * Each comm has a list of samples that will be used to draw
68 * final graph.
69 */
70
71struct per_pid {
72 struct per_pid *next;
73
74 int pid;
75 int ppid;
76
77 u64 start_time;
78 u64 end_time;
79 u64 total_time;
80 int display;
81
82 struct per_pidcomm *all;
83 struct per_pidcomm *current;
84
85 int painted;
86};
87
88
89struct per_pidcomm {
90 struct per_pidcomm *next;
91
92 u64 start_time;
93 u64 end_time;
94 u64 total_time;
95
96 int Y;
97 int display;
98
99 long state;
100 u64 state_since;
101
102 char *comm;
103
104 struct cpu_sample *samples;
105};
106
107struct sample_wrapper {
108 struct sample_wrapper *next;
109
110 u64 timestamp;
111 unsigned char data[0];
112};
113
114#define TYPE_NONE 0
115#define TYPE_RUNNING 1
116#define TYPE_WAITING 2
117#define TYPE_BLOCKED 3
118
119struct cpu_sample {
120 struct cpu_sample *next;
121
122 u64 start_time;
123 u64 end_time;
124 int type;
125 int cpu;
126};
127
128static struct per_pid *all_data;
129
130#define CSTATE 1
131#define PSTATE 2
132
133struct power_event {
134 struct power_event *next;
135 int type;
136 int state;
137 u64 start_time;
138 u64 end_time;
139 int cpu;
140};
141
142struct wake_event {
143 struct wake_event *next;
144 int waker;
145 int wakee;
146 u64 time;
147};
148
149static struct power_event *power_events;
150static struct wake_event *wake_events;
151
152struct sample_wrapper *all_samples;
153
154static struct per_pid *find_create_pid(int pid)
155{
156 struct per_pid *cursor = all_data;
157
158 while (cursor) {
159 if (cursor->pid == pid)
160 return cursor;
161 cursor = cursor->next;
162 }
163 cursor = malloc(sizeof(struct per_pid));
164 assert(cursor != NULL);
165 memset(cursor, 0, sizeof(struct per_pid));
166 cursor->pid = pid;
167 cursor->next = all_data;
168 all_data = cursor;
169 return cursor;
170}
171
172static void pid_set_comm(int pid, char *comm)
173{
174 struct per_pid *p;
175 struct per_pidcomm *c;
176 p = find_create_pid(pid);
177 c = p->all;
178 while (c) {
179 if (c->comm && strcmp(c->comm, comm) == 0) {
180 p->current = c;
181 return;
182 }
183 if (!c->comm) {
184 c->comm = strdup(comm);
185 p->current = c;
186 return;
187 }
188 c = c->next;
189 }
190 c = malloc(sizeof(struct per_pidcomm));
191 assert(c != NULL);
192 memset(c, 0, sizeof(struct per_pidcomm));
193 c->comm = strdup(comm);
194 p->current = c;
195 c->next = p->all;
196 p->all = c;
197}
198
199static void pid_fork(int pid, int ppid, u64 timestamp)
200{
201 struct per_pid *p, *pp;
202 p = find_create_pid(pid);
203 pp = find_create_pid(ppid);
204 p->ppid = ppid;
205 if (pp->current && pp->current->comm && !p->current)
206 pid_set_comm(pid, pp->current->comm);
207
208 p->start_time = timestamp;
209 if (p->current) {
210 p->current->start_time = timestamp;
211 p->current->state_since = timestamp;
212 }
213}
214
215static void pid_exit(int pid, u64 timestamp)
216{
217 struct per_pid *p;
218 p = find_create_pid(pid);
219 p->end_time = timestamp;
220 if (p->current)
221 p->current->end_time = timestamp;
222}
223
224static void
225pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
226{
227 struct per_pid *p;
228 struct per_pidcomm *c;
229 struct cpu_sample *sample;
230
231 p = find_create_pid(pid);
232 c = p->current;
233 if (!c) {
234 c = malloc(sizeof(struct per_pidcomm));
235 assert(c != NULL);
236 memset(c, 0, sizeof(struct per_pidcomm));
237 p->current = c;
238 c->next = p->all;
239 p->all = c;
240 }
241
242 sample = malloc(sizeof(struct cpu_sample));
243 assert(sample != NULL);
244 memset(sample, 0, sizeof(struct cpu_sample));
245 sample->start_time = start;
246 sample->end_time = end;
247 sample->type = type;
248 sample->next = c->samples;
249 sample->cpu = cpu;
250 c->samples = sample;
251
252 if (sample->type == TYPE_RUNNING && end > start && start > 0) {
253 c->total_time += (end-start);
254 p->total_time += (end-start);
255 }
256
257 if (c->start_time == 0 || c->start_time > start)
258 c->start_time = start;
259 if (p->start_time == 0 || p->start_time > start)
260 p->start_time = start;
261
262 if (cpu > numcpus)
263 numcpus = cpu;
264}
265
266#define MAX_CPUS 4096
267
268static u64 cpus_cstate_start_times[MAX_CPUS];
269static int cpus_cstate_state[MAX_CPUS];
270static u64 cpus_pstate_start_times[MAX_CPUS];
271static u64 cpus_pstate_state[MAX_CPUS];
272
273static int
274process_comm_event(event_t *event)
275{
276 pid_set_comm(event->comm.pid, event->comm.comm);
277 return 0;
278}
279static int
280process_fork_event(event_t *event)
281{
282 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
283 return 0;
284}
285
286static int
287process_exit_event(event_t *event)
288{
289 pid_exit(event->fork.pid, event->fork.time);
290 return 0;
291}
292
293struct trace_entry {
294 u32 size;
295 unsigned short type;
296 unsigned char flags;
297 unsigned char preempt_count;
298 int pid;
299 int tgid;
300};
301
302struct power_entry {
303 struct trace_entry te;
304 s64 type;
305 s64 value;
306};
307
308#define TASK_COMM_LEN 16
309struct wakeup_entry {
310 struct trace_entry te;
311 char comm[TASK_COMM_LEN];
312 int pid;
313 int prio;
314 int success;
315};
316
317/*
318 * trace_flag_type is an enumeration that holds different
319 * states when a trace occurs. These are:
320 * IRQS_OFF - interrupts were disabled
321 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
322 * NEED_RESCED - reschedule is requested
323 * HARDIRQ - inside an interrupt handler
324 * SOFTIRQ - inside a softirq handler
325 */
326enum trace_flag_type {
327 TRACE_FLAG_IRQS_OFF = 0x01,
328 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
329 TRACE_FLAG_NEED_RESCHED = 0x04,
330 TRACE_FLAG_HARDIRQ = 0x08,
331 TRACE_FLAG_SOFTIRQ = 0x10,
332};
333
334
335
336struct sched_switch {
337 struct trace_entry te;
338 char prev_comm[TASK_COMM_LEN];
339 int prev_pid;
340 int prev_prio;
341 long prev_state; /* Arjan weeps. */
342 char next_comm[TASK_COMM_LEN];
343 int next_pid;
344 int next_prio;
345};
346
347static void c_state_start(int cpu, u64 timestamp, int state)
348{
349 cpus_cstate_start_times[cpu] = timestamp;
350 cpus_cstate_state[cpu] = state;
351}
352
353static void c_state_end(int cpu, u64 timestamp)
354{
355 struct power_event *pwr;
356 pwr = malloc(sizeof(struct power_event));
357 if (!pwr)
358 return;
359 memset(pwr, 0, sizeof(struct power_event));
360
361 pwr->state = cpus_cstate_state[cpu];
362 pwr->start_time = cpus_cstate_start_times[cpu];
363 pwr->end_time = timestamp;
364 pwr->cpu = cpu;
365 pwr->type = CSTATE;
366 pwr->next = power_events;
367
368 power_events = pwr;
369}
370
371static void p_state_change(int cpu, u64 timestamp, u64 new_freq)
372{
373 struct power_event *pwr;
374 pwr = malloc(sizeof(struct power_event));
375
376 if (new_freq > 8000000) /* detect invalid data */
377 return;
378
379 if (!pwr)
380 return;
381 memset(pwr, 0, sizeof(struct power_event));
382
383 pwr->state = cpus_pstate_state[cpu];
384 pwr->start_time = cpus_pstate_start_times[cpu];
385 pwr->end_time = timestamp;
386 pwr->cpu = cpu;
387 pwr->type = PSTATE;
388 pwr->next = power_events;
389
390 if (!pwr->start_time)
391 pwr->start_time = first_time;
392
393 power_events = pwr;
394
395 cpus_pstate_state[cpu] = new_freq;
396 cpus_pstate_start_times[cpu] = timestamp;
397
398 if ((u64)new_freq > max_freq)
399 max_freq = new_freq;
400
401 if (new_freq < min_freq || min_freq == 0)
402 min_freq = new_freq;
403
404 if (new_freq == max_freq - 1000)
405 turbo_frequency = max_freq;
406}
407
408static void
409sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te)
410{
411 struct wake_event *we;
412 struct per_pid *p;
413 struct wakeup_entry *wake = (void *)te;
414
415 we = malloc(sizeof(struct wake_event));
416 if (!we)
417 return;
418
419 memset(we, 0, sizeof(struct wake_event));
420 we->time = timestamp;
421 we->waker = pid;
422
423 if ((te->flags & TRACE_FLAG_HARDIRQ) || (te->flags & TRACE_FLAG_SOFTIRQ))
424 we->waker = -1;
425
426 we->wakee = wake->pid;
427 we->next = wake_events;
428 wake_events = we;
429 p = find_create_pid(we->wakee);
430
431 if (p && p->current && p->current->state == TYPE_NONE) {
432 p->current->state_since = timestamp;
433 p->current->state = TYPE_WAITING;
434 }
435 if (p && p->current && p->current->state == TYPE_BLOCKED) {
436 pid_put_sample(p->pid, p->current->state, cpu, p->current->state_since, timestamp);
437 p->current->state_since = timestamp;
438 p->current->state = TYPE_WAITING;
439 }
440}
441
442static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
443{
444 struct per_pid *p = NULL, *prev_p;
445 struct sched_switch *sw = (void *)te;
446
447
448 prev_p = find_create_pid(sw->prev_pid);
449
450 p = find_create_pid(sw->next_pid);
451
452 if (prev_p->current && prev_p->current->state != TYPE_NONE)
453 pid_put_sample(sw->prev_pid, TYPE_RUNNING, cpu, prev_p->current->state_since, timestamp);
454 if (p && p->current) {
455 if (p->current->state != TYPE_NONE)
456 pid_put_sample(sw->next_pid, p->current->state, cpu, p->current->state_since, timestamp);
457
458 p->current->state_since = timestamp;
459 p->current->state = TYPE_RUNNING;
460 }
461
462 if (prev_p->current) {
463 prev_p->current->state = TYPE_NONE;
464 prev_p->current->state_since = timestamp;
465 if (sw->prev_state & 2)
466 prev_p->current->state = TYPE_BLOCKED;
467 if (sw->prev_state == 0)
468 prev_p->current->state = TYPE_WAITING;
469 }
470}
471
472
473static int
474process_sample_event(event_t *event)
475{
476 int cursor = 0;
477 u64 addr = 0;
478 u64 stamp = 0;
479 u32 cpu = 0;
480 u32 pid = 0;
481 struct trace_entry *te;
482
483 if (sample_type & PERF_SAMPLE_IP)
484 cursor++;
485
486 if (sample_type & PERF_SAMPLE_TID) {
487 pid = event->sample.array[cursor]>>32;
488 cursor++;
489 }
490 if (sample_type & PERF_SAMPLE_TIME) {
491 stamp = event->sample.array[cursor++];
492
493 if (!first_time || first_time > stamp)
494 first_time = stamp;
495 if (last_time < stamp)
496 last_time = stamp;
497
498 }
499 if (sample_type & PERF_SAMPLE_ADDR)
500 addr = event->sample.array[cursor++];
501 if (sample_type & PERF_SAMPLE_ID)
502 cursor++;
503 if (sample_type & PERF_SAMPLE_STREAM_ID)
504 cursor++;
505 if (sample_type & PERF_SAMPLE_CPU)
506 cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
507 if (sample_type & PERF_SAMPLE_PERIOD)
508 cursor++;
509
510 te = (void *)&event->sample.array[cursor];
511
512 if (sample_type & PERF_SAMPLE_RAW && te->size > 0) {
513 char *event_str;
514 struct power_entry *pe;
515
516 pe = (void *)te;
517
518 event_str = perf_header__find_event(te->type);
519
520 if (!event_str)
521 return 0;
522
523 if (strcmp(event_str, "power:power_start") == 0)
524 c_state_start(cpu, stamp, pe->value);
525
526 if (strcmp(event_str, "power:power_end") == 0)
527 c_state_end(cpu, stamp);
528
529 if (strcmp(event_str, "power:power_frequency") == 0)
530 p_state_change(cpu, stamp, pe->value);
531
532 if (strcmp(event_str, "sched:sched_wakeup") == 0)
533 sched_wakeup(cpu, stamp, pid, te);
534
535 if (strcmp(event_str, "sched:sched_switch") == 0)
536 sched_switch(cpu, stamp, te);
537 }
538 return 0;
539}
540
541/*
542 * After the last sample we need to wrap up the current C/P state
543 * and close out each CPU for these.
544 */
545static void end_sample_processing(void)
546{
547 u64 cpu;
548 struct power_event *pwr;
549
550 for (cpu = 0; cpu < numcpus; cpu++) {
551 pwr = malloc(sizeof(struct power_event));
552 if (!pwr)
553 return;
554 memset(pwr, 0, sizeof(struct power_event));
555
556 /* C state */
557#if 0
558 pwr->state = cpus_cstate_state[cpu];
559 pwr->start_time = cpus_cstate_start_times[cpu];
560 pwr->end_time = last_time;
561 pwr->cpu = cpu;
562 pwr->type = CSTATE;
563 pwr->next = power_events;
564
565 power_events = pwr;
566#endif
567 /* P state */
568
569 pwr = malloc(sizeof(struct power_event));
570 if (!pwr)
571 return;
572 memset(pwr, 0, sizeof(struct power_event));
573
574 pwr->state = cpus_pstate_state[cpu];
575 pwr->start_time = cpus_pstate_start_times[cpu];
576 pwr->end_time = last_time;
577 pwr->cpu = cpu;
578 pwr->type = PSTATE;
579 pwr->next = power_events;
580
581 if (!pwr->start_time)
582 pwr->start_time = first_time;
583 if (!pwr->state)
584 pwr->state = min_freq;
585 power_events = pwr;
586 }
587}
588
589static u64 sample_time(event_t *event)
590{
591 int cursor;
592
593 cursor = 0;
594 if (sample_type & PERF_SAMPLE_IP)
595 cursor++;
596 if (sample_type & PERF_SAMPLE_TID)
597 cursor++;
598 if (sample_type & PERF_SAMPLE_TIME)
599 return event->sample.array[cursor];
600 return 0;
601}
602
603
604/*
605 * We first queue all events, sorted backwards by insertion.
606 * The order will get flipped later.
607 */
608static int
609queue_sample_event(event_t *event)
610{
611 struct sample_wrapper *copy, *prev;
612 int size;
613
614 size = event->sample.header.size + sizeof(struct sample_wrapper) + 8;
615
616 copy = malloc(size);
617 if (!copy)
618 return 1;
619
620 memset(copy, 0, size);
621
622 copy->next = NULL;
623 copy->timestamp = sample_time(event);
624
625 memcpy(&copy->data, event, event->sample.header.size);
626
627 /* insert in the right place in the list */
628
629 if (!all_samples) {
630 /* first sample ever */
631 all_samples = copy;
632 return 0;
633 }
634
635 if (all_samples->timestamp < copy->timestamp) {
636 /* insert at the head of the list */
637 copy->next = all_samples;
638 all_samples = copy;
639 return 0;
640 }
641
642 prev = all_samples;
643 while (prev->next) {
644 if (prev->next->timestamp < copy->timestamp) {
645 copy->next = prev->next;
646 prev->next = copy;
647 return 0;
648 }
649 prev = prev->next;
650 }
651 /* insert at the end of the list */
652 prev->next = copy;
653
654 return 0;
655}
656
657static void sort_queued_samples(void)
658{
659 struct sample_wrapper *cursor, *next;
660
661 cursor = all_samples;
662 all_samples = NULL;
663
664 while (cursor) {
665 next = cursor->next;
666 cursor->next = all_samples;
667 all_samples = cursor;
668 cursor = next;
669 }
670}
671
672/*
673 * Sort the pid datastructure
674 */
675static void sort_pids(void)
676{
677 struct per_pid *new_list, *p, *cursor, *prev;
678 /* sort by ppid first, then by pid, lowest to highest */
679
680 new_list = NULL;
681
682 while (all_data) {
683 p = all_data;
684 all_data = p->next;
685 p->next = NULL;
686
687 if (new_list == NULL) {
688 new_list = p;
689 p->next = NULL;
690 continue;
691 }
692 prev = NULL;
693 cursor = new_list;
694 while (cursor) {
695 if (cursor->ppid > p->ppid ||
696 (cursor->ppid == p->ppid && cursor->pid > p->pid)) {
697 /* must insert before */
698 if (prev) {
699 p->next = prev->next;
700 prev->next = p;
701 cursor = NULL;
702 continue;
703 } else {
704 p->next = new_list;
705 new_list = p;
706 cursor = NULL;
707 continue;
708 }
709 }
710
711 prev = cursor;
712 cursor = cursor->next;
713 if (!cursor)
714 prev->next = p;
715 }
716 }
717 all_data = new_list;
718}
719
720
721static void draw_c_p_states(void)
722{
723 struct power_event *pwr;
724 pwr = power_events;
725
726 /*
727 * two pass drawing so that the P state bars are on top of the C state blocks
728 */
729 while (pwr) {
730 if (pwr->type == CSTATE)
731 svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
732 pwr = pwr->next;
733 }
734
735 pwr = power_events;
736 while (pwr) {
737 if (pwr->type == PSTATE) {
738 if (!pwr->state)
739 pwr->state = min_freq;
740 svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
741 }
742 pwr = pwr->next;
743 }
744}
745
746static void draw_wakeups(void)
747{
748 struct wake_event *we;
749 struct per_pid *p;
750 struct per_pidcomm *c;
751
752 we = wake_events;
753 while (we) {
754 int from = 0, to = 0;
755
756 /* locate the column of the waker and wakee */
757 p = all_data;
758 while (p) {
759 if (p->pid == we->waker || p->pid == we->wakee) {
760 c = p->all;
761 while (c) {
762 if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
763 if (p->pid == we->waker)
764 from = c->Y;
765 if (p->pid == we->wakee)
766 to = c->Y;
767 }
768 c = c->next;
769 }
770 }
771 p = p->next;
772 }
773
774 if (we->waker == -1)
775 svg_interrupt(we->time, to);
776 else if (from && to && abs(from - to) == 1)
777 svg_wakeline(we->time, from, to);
778 else
779 svg_partial_wakeline(we->time, from, to);
780 we = we->next;
781 }
782}
783
784static void draw_cpu_usage(void)
785{
786 struct per_pid *p;
787 struct per_pidcomm *c;
788 struct cpu_sample *sample;
789 p = all_data;
790 while (p) {
791 c = p->all;
792 while (c) {
793 sample = c->samples;
794 while (sample) {
795 if (sample->type == TYPE_RUNNING)
796 svg_process(sample->cpu, sample->start_time, sample->end_time, "sample", c->comm);
797
798 sample = sample->next;
799 }
800 c = c->next;
801 }
802 p = p->next;
803 }
804}
805
806static void draw_process_bars(void)
807{
808 struct per_pid *p;
809 struct per_pidcomm *c;
810 struct cpu_sample *sample;
811 int Y = 0;
812
813 Y = 2 * numcpus + 2;
814
815 p = all_data;
816 while (p) {
817 c = p->all;
818 while (c) {
819 if (!c->display) {
820 c->Y = 0;
821 c = c->next;
822 continue;
823 }
824
825 svg_box(Y, p->start_time, p->end_time, "process");
826 sample = c->samples;
827 while (sample) {
828 if (sample->type == TYPE_RUNNING)
829 svg_sample(Y, sample->cpu, sample->start_time, sample->end_time, "sample");
830 if (sample->type == TYPE_BLOCKED)
831 svg_box(Y, sample->start_time, sample->end_time, "blocked");
832 if (sample->type == TYPE_WAITING)
833 svg_box(Y, sample->start_time, sample->end_time, "waiting");
834 sample = sample->next;
835 }
836
837 if (c->comm) {
838 char comm[256];
839 if (c->total_time > 5000000000) /* 5 seconds */
840 sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
841 else
842 sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
843
844 svg_text(Y, c->start_time, comm);
845 }
846 c->Y = Y;
847 Y++;
848 c = c->next;
849 }
850 p = p->next;
851 }
852}
853
854static int determine_display_tasks(u64 threshold)
855{
856 struct per_pid *p;
857 struct per_pidcomm *c;
858 int count = 0;
859
860 p = all_data;
861 while (p) {
862 p->display = 0;
863 if (p->start_time == 1)
864 p->start_time = first_time;
865
866 /* no exit marker, task kept running to the end */
867 if (p->end_time == 0)
868 p->end_time = last_time;
869 if (p->total_time >= threshold)
870 p->display = 1;
871
872 c = p->all;
873
874 while (c) {
875 c->display = 0;
876
877 if (c->start_time == 1)
878 c->start_time = first_time;
879
880 if (c->total_time >= threshold) {
881 c->display = 1;
882 count++;
883 }
884
885 if (c->end_time == 0)
886 c->end_time = last_time;
887
888 c = c->next;
889 }
890 p = p->next;
891 }
892 return count;
893}
894
895
896
897#define TIME_THRESH 10000000
898
899static void write_svg_file(const char *filename)
900{
901 u64 i;
902 int count;
903
904 numcpus++;
905
906
907 count = determine_display_tasks(TIME_THRESH);
908
909 /* We'd like to show at least 15 tasks; be less picky if we have fewer */
910 if (count < 15)
911 count = determine_display_tasks(TIME_THRESH / 10);
912
913 open_svg(filename, numcpus, count);
914
915 svg_time_grid(first_time, last_time);
916 svg_legenda();
917
918 for (i = 0; i < numcpus; i++)
919 svg_cpu_box(i, max_freq, turbo_frequency);
920
921 draw_cpu_usage();
922 draw_process_bars();
923 draw_c_p_states();
924 draw_wakeups();
925
926 svg_close();
927}
928
929static int
930process_event(event_t *event)
931{
932
933 switch (event->header.type) {
934
935 case PERF_EVENT_COMM:
936 return process_comm_event(event);
937 case PERF_EVENT_FORK:
938 return process_fork_event(event);
939 case PERF_EVENT_EXIT:
940 return process_exit_event(event);
941 case PERF_EVENT_SAMPLE:
942 return queue_sample_event(event);
943
944 /*
945 * We dont process them right now but they are fine:
946 */
947 case PERF_EVENT_MMAP:
948 case PERF_EVENT_THROTTLE:
949 case PERF_EVENT_UNTHROTTLE:
950 return 0;
951
952 default:
953 return -1;
954 }
955
956 return 0;
957}
958
959static void process_samples(void)
960{
961 struct sample_wrapper *cursor;
962 event_t *event;
963
964 sort_queued_samples();
965
966 cursor = all_samples;
967 while (cursor) {
968 event = (void *)&cursor->data;
969 cursor = cursor->next;
970 process_sample_event(event);
971 }
972}
973
974
975static int __cmd_timechart(void)
976{
977 int ret, rc = EXIT_FAILURE;
978 unsigned long offset = 0;
979 unsigned long head, shift;
980 struct stat statbuf;
981 event_t *event;
982 uint32_t size;
983 char *buf;
984 int input;
985
986 input = open(input_name, O_RDONLY);
987 if (input < 0) {
988 fprintf(stderr, " failed to open file: %s", input_name);
989 if (!strcmp(input_name, "perf.data"))
990 fprintf(stderr, " (try 'perf record' first)");
991 fprintf(stderr, "\n");
992 exit(-1);
993 }
994
995 ret = fstat(input, &statbuf);
996 if (ret < 0) {
997 perror("failed to stat file");
998 exit(-1);
999 }
1000
1001 if (!statbuf.st_size) {
1002 fprintf(stderr, "zero-sized file, nothing to do!\n");
1003 exit(0);
1004 }
1005
1006 header = perf_header__read(input);
1007 head = header->data_offset;
1008
1009 sample_type = perf_header__sample_type(header);
1010
1011 shift = page_size * (head / page_size);
1012 offset += shift;
1013 head -= shift;
1014
1015remap:
1016 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
1017 MAP_SHARED, input, offset);
1018 if (buf == MAP_FAILED) {
1019 perror("failed to mmap file");
1020 exit(-1);
1021 }
1022
1023more:
1024 event = (event_t *)(buf + head);
1025
1026 size = event->header.size;
1027 if (!size)
1028 size = 8;
1029
1030 if (head + event->header.size >= page_size * mmap_window) {
1031 int ret2;
1032
1033 shift = page_size * (head / page_size);
1034
1035 ret2 = munmap(buf, page_size * mmap_window);
1036 assert(ret2 == 0);
1037
1038 offset += shift;
1039 head -= shift;
1040 goto remap;
1041 }
1042
1043 size = event->header.size;
1044
1045 if (!size || process_event(event) < 0) {
1046
1047 printf("%p [%p]: skipping unknown header type: %d\n",
1048 (void *)(offset + head),
1049 (void *)(long)(event->header.size),
1050 event->header.type);
1051
1052 /*
1053 * assume we lost track of the stream, check alignment, and
1054 * increment a single u64 in the hope to catch on again 'soon'.
1055 */
1056
1057 if (unlikely(head & 7))
1058 head &= ~7ULL;
1059
1060 size = 8;
1061 }
1062
1063 head += size;
1064
1065 if (offset + head >= header->data_offset + header->data_size)
1066 goto done;
1067
1068 if (offset + head < (unsigned long)statbuf.st_size)
1069 goto more;
1070
1071done:
1072 rc = EXIT_SUCCESS;
1073 close(input);
1074
1075
1076 process_samples();
1077
1078 end_sample_processing();
1079
1080 sort_pids();
1081
1082 write_svg_file(output_name);
1083
1084 printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name);
1085
1086 return rc;
1087}
1088
1089static const char * const timechart_usage[] = {
1090 "perf timechart [<options>] {record}",
1091 NULL
1092};
1093
1094static const char *record_args[] = {
1095 "record",
1096 "-a",
1097 "-R",
1098 "-M",
1099 "-f",
1100 "-c", "1",
1101 "-e", "power:power_start",
1102 "-e", "power:power_end",
1103 "-e", "power:power_frequency",
1104 "-e", "sched:sched_wakeup",
1105 "-e", "sched:sched_switch",
1106};
1107
1108static int __cmd_record(int argc, const char **argv)
1109{
1110 unsigned int rec_argc, i, j;
1111 const char **rec_argv;
1112
1113 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1114 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1115
1116 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1117 rec_argv[i] = strdup(record_args[i]);
1118
1119 for (j = 1; j < (unsigned int)argc; j++, i++)
1120 rec_argv[i] = argv[j];
1121
1122 return cmd_record(i, rec_argv, NULL);
1123}
1124
1125static const struct option options[] = {
1126 OPT_STRING('i', "input", &input_name, "file",
1127 "input file name"),
1128 OPT_STRING('o', "output", &output_name, "file",
1129 "output file name"),
1130 OPT_END()
1131};
1132
1133
1134int cmd_timechart(int argc, const char **argv, const char *prefix __used)
1135{
1136 symbol__init();
1137
1138 page_size = getpagesize();
1139
1140 argc = parse_options(argc, argv, options, timechart_usage,
1141 PARSE_OPT_STOP_AT_NON_OPTION);
1142
1143 if (argc && !strncmp(argv[0], "rec", 3))
1144 return __cmd_record(argc, argv);
1145 else if (argc)
1146 usage_with_options(timechart_usage, options);
1147
1148 setup_pager();
1149
1150 return __cmd_timechart();
1151}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3a63e41fb44e..e11d8d231c3b 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -16,12 +16,14 @@ extern int check_pager_config(const char *cmd);
16 16
17extern int cmd_annotate(int argc, const char **argv, const char *prefix); 17extern int cmd_annotate(int argc, const char **argv, const char *prefix);
18extern int cmd_help(int argc, const char **argv, const char *prefix); 18extern int cmd_help(int argc, const char **argv, const char *prefix);
19extern int cmd_sched(int argc, const char **argv, const char *prefix);
20extern int cmd_list(int argc, const char **argv, const char *prefix);
19extern int cmd_record(int argc, const char **argv, const char *prefix); 21extern int cmd_record(int argc, const char **argv, const char *prefix);
20extern int cmd_report(int argc, const char **argv, const char *prefix); 22extern int cmd_report(int argc, const char **argv, const char *prefix);
21extern int cmd_stat(int argc, const char **argv, const char *prefix); 23extern int cmd_stat(int argc, const char **argv, const char *prefix);
24extern int cmd_timechart(int argc, const char **argv, const char *prefix);
22extern int cmd_top(int argc, const char **argv, const char *prefix); 25extern int cmd_top(int argc, const char **argv, const char *prefix);
23extern int cmd_version(int argc, const char **argv, const char *prefix);
24extern int cmd_list(int argc, const char **argv, const char *prefix);
25extern int cmd_trace(int argc, const char **argv, const char *prefix); 26extern int cmd_trace(int argc, const char **argv, const char *prefix);
27extern int cmd_version(int argc, const char **argv, const char *prefix);
26 28
27#endif 29#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index eebce30afbc0..00326e230d87 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -4,7 +4,10 @@
4# 4#
5perf-annotate mainporcelain common 5perf-annotate mainporcelain common
6perf-list mainporcelain common 6perf-list mainporcelain common
7perf-sched mainporcelain common
7perf-record mainporcelain common 8perf-record mainporcelain common
8perf-report mainporcelain common 9perf-report mainporcelain common
9perf-stat mainporcelain common 10perf-stat mainporcelain common
11perf-timechart mainporcelain common
10perf-top mainporcelain common 12perf-top mainporcelain common
13perf-trace mainporcelain common
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index fe4589dde950..19fc7feb9d59 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -289,10 +289,12 @@ static void handle_internal_command(int argc, const char **argv)
289 { "record", cmd_record, 0 }, 289 { "record", cmd_record, 0 },
290 { "report", cmd_report, 0 }, 290 { "report", cmd_report, 0 },
291 { "stat", cmd_stat, 0 }, 291 { "stat", cmd_stat, 0 },
292 { "timechart", cmd_timechart, 0 },
292 { "top", cmd_top, 0 }, 293 { "top", cmd_top, 0 },
293 { "annotate", cmd_annotate, 0 }, 294 { "annotate", cmd_annotate, 0 },
294 { "version", cmd_version, 0 }, 295 { "version", cmd_version, 0 },
295 { "trace", cmd_trace, 0 }, 296 { "trace", cmd_trace, 0 },
297 { "sched", cmd_sched, 0 },
296 }; 298 };
297 unsigned int i; 299 unsigned int i;
298 static const char ext[] = STRIP_EXTENSION; 300 static const char ext[] = STRIP_EXTENSION;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index fa2d4e91d329..018d414a09d1 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,7 @@ struct fork_event {
39 struct perf_event_header header; 39 struct perf_event_header header;
40 u32 pid, ppid; 40 u32 pid, ppid;
41 u32 tid, ptid; 41 u32 tid, ptid;
42 u64 time;
42}; 43};
43 44
44struct lost_event { 45struct lost_event {
@@ -52,13 +53,19 @@ struct lost_event {
52 */ 53 */
53struct read_event { 54struct read_event {
54 struct perf_event_header header; 55 struct perf_event_header header;
55 u32 pid,tid; 56 u32 pid, tid;
56 u64 value; 57 u64 value;
57 u64 time_enabled; 58 u64 time_enabled;
58 u64 time_running; 59 u64 time_running;
59 u64 id; 60 u64 id;
60}; 61};
61 62
63struct sample_event{
64 struct perf_event_header header;
65 u64 array[];
66};
67
68
62typedef union event_union { 69typedef union event_union {
63 struct perf_event_header header; 70 struct perf_event_header header;
64 struct ip_event ip; 71 struct ip_event ip;
@@ -67,6 +74,7 @@ typedef union event_union {
67 struct fork_event fork; 74 struct fork_event fork;
68 struct lost_event lost; 75 struct lost_event lost;
69 struct read_event read; 76 struct read_event read;
77 struct sample_event sample;
70} event_t; 78} event_t;
71 79
72struct map { 80struct map {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ec4d4c2f9522..bb4fca3efcc3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -7,9 +7,8 @@
7#include "header.h" 7#include "header.h"
8 8
9/* 9/*
10 * 10 * Create new perf.data header attribute:
11 */ 11 */
12
13struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) 12struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
14{ 13{
15 struct perf_header_attr *self = malloc(sizeof(*self)); 14 struct perf_header_attr *self = malloc(sizeof(*self));
@@ -43,9 +42,8 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
43} 42}
44 43
45/* 44/*
46 * 45 * Create new perf.data header:
47 */ 46 */
48
49struct perf_header *perf_header__new(void) 47struct perf_header *perf_header__new(void)
50{ 48{
51 struct perf_header *self = malloc(sizeof(*self)); 49 struct perf_header *self = malloc(sizeof(*self));
@@ -86,6 +84,46 @@ void perf_header__add_attr(struct perf_header *self,
86 self->attr[pos] = attr; 84 self->attr[pos] = attr;
87} 85}
88 86
87#define MAX_EVENT_NAME 64
88
89struct perf_trace_event_type {
90 u64 event_id;
91 char name[MAX_EVENT_NAME];
92};
93
94static int event_count;
95static struct perf_trace_event_type *events;
96
97void perf_header__push_event(u64 id, const char *name)
98{
99 if (strlen(name) > MAX_EVENT_NAME)
100 printf("Event %s will be truncated\n", name);
101
102 if (!events) {
103 events = malloc(sizeof(struct perf_trace_event_type));
104 if (!events)
105 die("nomem");
106 } else {
107 events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type));
108 if (!events)
109 die("nomem");
110 }
111 memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
112 events[event_count].event_id = id;
113 strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
114 event_count++;
115}
116
117char *perf_header__find_event(u64 id)
118{
119 int i;
120 for (i = 0 ; i < event_count; i++) {
121 if (events[i].event_id == id)
122 return events[i].name;
123 }
124 return NULL;
125}
126
89static const char *__perf_magic = "PERFFILE"; 127static const char *__perf_magic = "PERFFILE";
90 128
91#define PERF_MAGIC (*(u64 *)__perf_magic) 129#define PERF_MAGIC (*(u64 *)__perf_magic)
@@ -106,6 +144,7 @@ struct perf_file_header {
106 u64 attr_size; 144 u64 attr_size;
107 struct perf_file_section attrs; 145 struct perf_file_section attrs;
108 struct perf_file_section data; 146 struct perf_file_section data;
147 struct perf_file_section event_types;
109}; 148};
110 149
111static void do_write(int fd, void *buf, size_t size) 150static void do_write(int fd, void *buf, size_t size)
@@ -154,6 +193,11 @@ void perf_header__write(struct perf_header *self, int fd)
154 do_write(fd, &f_attr, sizeof(f_attr)); 193 do_write(fd, &f_attr, sizeof(f_attr));
155 } 194 }
156 195
196 self->event_offset = lseek(fd, 0, SEEK_CUR);
197 self->event_size = event_count * sizeof(struct perf_trace_event_type);
198 if (events)
199 do_write(fd, events, self->event_size);
200
157 201
158 self->data_offset = lseek(fd, 0, SEEK_CUR); 202 self->data_offset = lseek(fd, 0, SEEK_CUR);
159 203
@@ -169,6 +213,10 @@ void perf_header__write(struct perf_header *self, int fd)
169 .offset = self->data_offset, 213 .offset = self->data_offset,
170 .size = self->data_size, 214 .size = self->data_size,
171 }, 215 },
216 .event_types = {
217 .offset = self->event_offset,
218 .size = self->event_size,
219 },
172 }; 220 };
173 221
174 lseek(fd, 0, SEEK_SET); 222 lseek(fd, 0, SEEK_SET);
@@ -234,6 +282,17 @@ struct perf_header *perf_header__read(int fd)
234 lseek(fd, tmp, SEEK_SET); 282 lseek(fd, tmp, SEEK_SET);
235 } 283 }
236 284
285 if (f_header.event_types.size) {
286 lseek(fd, f_header.event_types.offset, SEEK_SET);
287 events = malloc(f_header.event_types.size);
288 if (!events)
289 die("nomem");
290 do_read(fd, events, f_header.event_types.size);
291 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
292 }
293 self->event_offset = f_header.event_types.offset;
294 self->event_size = f_header.event_types.size;
295
237 self->data_offset = f_header.data.offset; 296 self->data_offset = f_header.data.offset;
238 self->data_size = f_header.data.size; 297 self->data_size = f_header.data.size;
239 298
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 5d0a72ecc919..7b0e84a87179 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -19,6 +19,8 @@ struct perf_header {
19 s64 attr_offset; 19 s64 attr_offset;
20 u64 data_offset; 20 u64 data_offset;
21 u64 data_size; 21 u64 data_size;
22 u64 event_offset;
23 u64 event_size;
22}; 24};
23 25
24struct perf_header *perf_header__read(int fd); 26struct perf_header *perf_header__read(int fd);
@@ -27,6 +29,10 @@ void perf_header__write(struct perf_header *self, int fd);
27void perf_header__add_attr(struct perf_header *self, 29void perf_header__add_attr(struct perf_header *self,
28 struct perf_header_attr *attr); 30 struct perf_header_attr *attr);
29 31
32void perf_header__push_event(u64 id, const char *name);
33char *perf_header__find_event(u64 id);
34
35
30struct perf_header_attr * 36struct perf_header_attr *
31perf_header_attr__new(struct perf_counter_attr *attr); 37perf_header_attr__new(struct perf_counter_attr *attr);
32void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 38void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a587d41ae3c9..89172fd0038b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -6,6 +6,7 @@
6#include "exec_cmd.h" 6#include "exec_cmd.h"
7#include "string.h" 7#include "string.h"
8#include "cache.h" 8#include "cache.h"
9#include "header.h"
9 10
10int nr_counters; 11int nr_counters;
11 12
@@ -18,6 +19,12 @@ struct event_symbol {
18 const char *alias; 19 const char *alias;
19}; 20};
20 21
22enum event_result {
23 EVT_FAILED,
24 EVT_HANDLED,
25 EVT_HANDLED_ALL
26};
27
21char debugfs_path[MAXPATHLEN]; 28char debugfs_path[MAXPATHLEN];
22 29
23#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x 30#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
@@ -139,7 +146,7 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
139 (strcmp(evt_dirent.d_name, "..")) && \ 146 (strcmp(evt_dirent.d_name, "..")) && \
140 (!tp_event_has_id(&sys_dirent, &evt_dirent))) 147 (!tp_event_has_id(&sys_dirent, &evt_dirent)))
141 148
142#define MAX_EVENT_LENGTH 30 149#define MAX_EVENT_LENGTH 512
143 150
144int valid_debugfs_mount(const char *debugfs) 151int valid_debugfs_mount(const char *debugfs)
145{ 152{
@@ -344,7 +351,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int
344 return -1; 351 return -1;
345} 352}
346 353
347static int 354static enum event_result
348parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) 355parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
349{ 356{
350 const char *s = *str; 357 const char *s = *str;
@@ -356,7 +363,7 @@ parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
356 * then bail out: 363 * then bail out:
357 */ 364 */
358 if (cache_type == -1) 365 if (cache_type == -1)
359 return 0; 366 return EVT_FAILED;
360 367
361 while ((cache_op == -1 || cache_result == -1) && *s == '-') { 368 while ((cache_op == -1 || cache_result == -1) && *s == '-') {
362 ++s; 369 ++s;
@@ -402,27 +409,115 @@ parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
402 attr->type = PERF_TYPE_HW_CACHE; 409 attr->type = PERF_TYPE_HW_CACHE;
403 410
404 *str = s; 411 *str = s;
405 return 1; 412 return EVT_HANDLED;
413}
414
415static enum event_result
416parse_single_tracepoint_event(char *sys_name,
417 const char *evt_name,
418 unsigned int evt_length,
419 char *flags,
420 struct perf_counter_attr *attr,
421 const char **strp)
422{
423 char evt_path[MAXPATHLEN];
424 char id_buf[4];
425 u64 id;
426 int fd;
427
428 if (flags) {
429 if (!strncmp(flags, "record", strlen(flags))) {
430 attr->sample_type |= PERF_SAMPLE_RAW;
431 attr->sample_type |= PERF_SAMPLE_TIME;
432 attr->sample_type |= PERF_SAMPLE_CPU;
433 }
434 }
435
436 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
437 sys_name, evt_name);
438
439 fd = open(evt_path, O_RDONLY);
440 if (fd < 0)
441 return EVT_FAILED;
442
443 if (read(fd, id_buf, sizeof(id_buf)) < 0) {
444 close(fd);
445 return EVT_FAILED;
446 }
447
448 close(fd);
449 id = atoll(id_buf);
450 attr->config = id;
451 attr->type = PERF_TYPE_TRACEPOINT;
452 *strp = evt_name + evt_length;
453
454 return EVT_HANDLED;
455}
456
457/* sys + ':' + event + ':' + flags*/
458#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
459static enum event_result
460parse_subsystem_tracepoint_event(char *sys_name, char *flags)
461{
462 char evt_path[MAXPATHLEN];
463 struct dirent *evt_ent;
464 DIR *evt_dir;
465
466 snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name);
467 evt_dir = opendir(evt_path);
468
469 if (!evt_dir) {
470 perror("Can't open event dir");
471 return EVT_FAILED;
472 }
473
474 while ((evt_ent = readdir(evt_dir))) {
475 char event_opt[MAX_EVOPT_LEN + 1];
476 int len;
477 unsigned int rem = MAX_EVOPT_LEN;
478
479 if (!strcmp(evt_ent->d_name, ".")
480 || !strcmp(evt_ent->d_name, "..")
481 || !strcmp(evt_ent->d_name, "enable")
482 || !strcmp(evt_ent->d_name, "filter"))
483 continue;
484
485 len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s", sys_name,
486 evt_ent->d_name);
487 if (len < 0)
488 return EVT_FAILED;
489
490 rem -= len;
491 if (flags) {
492 if (rem < strlen(flags) + 1)
493 return EVT_FAILED;
494
495 strcat(event_opt, ":");
496 strcat(event_opt, flags);
497 }
498
499 if (parse_events(NULL, event_opt, 0))
500 return EVT_FAILED;
501 }
502
503 return EVT_HANDLED_ALL;
406} 504}
407 505
408static int parse_tracepoint_event(const char **strp, 506
507static enum event_result parse_tracepoint_event(const char **strp,
409 struct perf_counter_attr *attr) 508 struct perf_counter_attr *attr)
410{ 509{
411 const char *evt_name; 510 const char *evt_name;
412 char *flags; 511 char *flags;
413 char sys_name[MAX_EVENT_LENGTH]; 512 char sys_name[MAX_EVENT_LENGTH];
414 char id_buf[4];
415 int fd;
416 unsigned int sys_length, evt_length; 513 unsigned int sys_length, evt_length;
417 u64 id;
418 char evt_path[MAXPATHLEN];
419 514
420 if (valid_debugfs_mount(debugfs_path)) 515 if (valid_debugfs_mount(debugfs_path))
421 return 0; 516 return 0;
422 517
423 evt_name = strchr(*strp, ':'); 518 evt_name = strchr(*strp, ':');
424 if (!evt_name) 519 if (!evt_name)
425 return 0; 520 return EVT_FAILED;
426 521
427 sys_length = evt_name - *strp; 522 sys_length = evt_name - *strp;
428 if (sys_length >= MAX_EVENT_LENGTH) 523 if (sys_length >= MAX_EVENT_LENGTH)
@@ -434,32 +529,22 @@ static int parse_tracepoint_event(const char **strp,
434 529
435 flags = strchr(evt_name, ':'); 530 flags = strchr(evt_name, ':');
436 if (flags) { 531 if (flags) {
437 *flags = '\0'; 532 /* split it out: */
533 evt_name = strndup(evt_name, flags - evt_name);
438 flags++; 534 flags++;
439 if (!strncmp(flags, "record", strlen(flags)))
440 attr->sample_type |= PERF_SAMPLE_RAW;
441 } 535 }
442 536
443 evt_length = strlen(evt_name); 537 evt_length = strlen(evt_name);
444 if (evt_length >= MAX_EVENT_LENGTH) 538 if (evt_length >= MAX_EVENT_LENGTH)
445 return 0; 539 return EVT_FAILED;
446
447 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
448 sys_name, evt_name);
449 fd = open(evt_path, O_RDONLY);
450 if (fd < 0)
451 return 0;
452 540
453 if (read(fd, id_buf, sizeof(id_buf)) < 0) { 541 if (!strcmp(evt_name, "*")) {
454 close(fd); 542 *strp = evt_name + evt_length;
455 return 0; 543 return parse_subsystem_tracepoint_event(sys_name, flags);
456 } 544 } else
457 close(fd); 545 return parse_single_tracepoint_event(sys_name, evt_name,
458 id = atoll(id_buf); 546 evt_length, flags,
459 attr->config = id; 547 attr, strp);
460 attr->type = PERF_TYPE_TRACEPOINT;
461 *strp = evt_name + evt_length;
462 return 1;
463} 548}
464 549
465static int check_events(const char *str, unsigned int i) 550static int check_events(const char *str, unsigned int i)
@@ -477,7 +562,7 @@ static int check_events(const char *str, unsigned int i)
477 return 0; 562 return 0;
478} 563}
479 564
480static int 565static enum event_result
481parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) 566parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
482{ 567{
483 const char *str = *strp; 568 const char *str = *strp;
@@ -490,31 +575,32 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
490 attr->type = event_symbols[i].type; 575 attr->type = event_symbols[i].type;
491 attr->config = event_symbols[i].config; 576 attr->config = event_symbols[i].config;
492 *strp = str + n; 577 *strp = str + n;
493 return 1; 578 return EVT_HANDLED;
494 } 579 }
495 } 580 }
496 return 0; 581 return EVT_FAILED;
497} 582}
498 583
499static int parse_raw_event(const char **strp, struct perf_counter_attr *attr) 584static enum event_result
585parse_raw_event(const char **strp, struct perf_counter_attr *attr)
500{ 586{
501 const char *str = *strp; 587 const char *str = *strp;
502 u64 config; 588 u64 config;
503 int n; 589 int n;
504 590
505 if (*str != 'r') 591 if (*str != 'r')
506 return 0; 592 return EVT_FAILED;
507 n = hex2u64(str + 1, &config); 593 n = hex2u64(str + 1, &config);
508 if (n > 0) { 594 if (n > 0) {
509 *strp = str + n + 1; 595 *strp = str + n + 1;
510 attr->type = PERF_TYPE_RAW; 596 attr->type = PERF_TYPE_RAW;
511 attr->config = config; 597 attr->config = config;
512 return 1; 598 return EVT_HANDLED;
513 } 599 }
514 return 0; 600 return EVT_FAILED;
515} 601}
516 602
517static int 603static enum event_result
518parse_numeric_event(const char **strp, struct perf_counter_attr *attr) 604parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
519{ 605{
520 const char *str = *strp; 606 const char *str = *strp;
@@ -530,13 +616,13 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
530 attr->type = type; 616 attr->type = type;
531 attr->config = config; 617 attr->config = config;
532 *strp = endp; 618 *strp = endp;
533 return 1; 619 return EVT_HANDLED;
534 } 620 }
535 } 621 }
536 return 0; 622 return EVT_FAILED;
537} 623}
538 624
539static int 625static enum event_result
540parse_event_modifier(const char **strp, struct perf_counter_attr *attr) 626parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
541{ 627{
542 const char *str = *strp; 628 const char *str = *strp;
@@ -569,37 +655,84 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
569 * Each event can have multiple symbolic names. 655 * Each event can have multiple symbolic names.
570 * Symbolic names are (almost) exactly matched. 656 * Symbolic names are (almost) exactly matched.
571 */ 657 */
572static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) 658static enum event_result
659parse_event_symbols(const char **str, struct perf_counter_attr *attr)
573{ 660{
574 if (!(parse_tracepoint_event(str, attr) || 661 enum event_result ret;
575 parse_raw_event(str, attr) || 662
576 parse_numeric_event(str, attr) || 663 ret = parse_tracepoint_event(str, attr);
577 parse_symbolic_event(str, attr) || 664 if (ret != EVT_FAILED)
578 parse_generic_hw_event(str, attr))) 665 goto modifier;
579 return 0; 666
667 ret = parse_raw_event(str, attr);
668 if (ret != EVT_FAILED)
669 goto modifier;
670
671 ret = parse_numeric_event(str, attr);
672 if (ret != EVT_FAILED)
673 goto modifier;
674
675 ret = parse_symbolic_event(str, attr);
676 if (ret != EVT_FAILED)
677 goto modifier;
580 678
679 ret = parse_generic_hw_event(str, attr);
680 if (ret != EVT_FAILED)
681 goto modifier;
682
683 return EVT_FAILED;
684
685modifier:
581 parse_event_modifier(str, attr); 686 parse_event_modifier(str, attr);
582 687
583 return 1; 688 return ret;
584} 689}
585 690
691static void store_event_type(const char *orgname)
692{
693 char filename[PATH_MAX], *c;
694 FILE *file;
695 int id;
696
697 sprintf(filename, "/sys/kernel/debug/tracing/events/%s/id", orgname);
698 c = strchr(filename, ':');
699 if (c)
700 *c = '/';
701
702 file = fopen(filename, "r");
703 if (!file)
704 return;
705 if (fscanf(file, "%i", &id) < 1)
706 die("cannot store event ID");
707 fclose(file);
708 perf_header__push_event(id, orgname);
709}
710
711
586int parse_events(const struct option *opt __used, const char *str, int unset __used) 712int parse_events(const struct option *opt __used, const char *str, int unset __used)
587{ 713{
588 struct perf_counter_attr attr; 714 struct perf_counter_attr attr;
715 enum event_result ret;
716
717 if (strchr(str, ':'))
718 store_event_type(str);
589 719
590 for (;;) { 720 for (;;) {
591 if (nr_counters == MAX_COUNTERS) 721 if (nr_counters == MAX_COUNTERS)
592 return -1; 722 return -1;
593 723
594 memset(&attr, 0, sizeof(attr)); 724 memset(&attr, 0, sizeof(attr));
595 if (!parse_event_symbols(&str, &attr)) 725 ret = parse_event_symbols(&str, &attr);
726 if (ret == EVT_FAILED)
596 return -1; 727 return -1;
597 728
598 if (!(*str == 0 || *str == ',' || isspace(*str))) 729 if (!(*str == 0 || *str == ',' || isspace(*str)))
599 return -1; 730 return -1;
600 731
601 attrs[nr_counters] = attr; 732 if (ret != EVT_HANDLED_ALL) {
602 nr_counters++; 733 attrs[nr_counters] = attr;
734 nr_counters++;
735 }
603 736
604 if (*str == 0) 737 if (*str == 0)
605 break; 738 break;
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 8aa3464c7090..2ee248ff27e5 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -104,6 +104,8 @@ struct option {
104 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } 104 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
105#define OPT_CALLBACK(s, l, v, a, h, f) \ 105#define OPT_CALLBACK(s, l, v, a, h, f) \
106 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } 106 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) }
107#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
108 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
107#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ 109#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
108 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } 110 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
109 111
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
new file mode 100644
index 000000000000..b0fcecdf378d
--- /dev/null
+++ b/tools/perf/util/svghelper.c
@@ -0,0 +1,384 @@
1/*
2 * svghelper.c - helper functions for outputting svg
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * Authors:
7 * Arjan van de Ven <arjan@linux.intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
12 * of the License.
13 */
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <unistd.h>
18#include <string.h>
19
20#include "svghelper.h"
21
22static u64 first_time, last_time;
23static u64 turbo_frequency, max_freq;
24
25
26#define SLOT_MULT 30.0
27#define SLOT_HEIGHT 25.0
28#define WIDTH 1000.0
29
30#define MIN_TEXT_SIZE 0.001
31
32static u64 total_height;
33static FILE *svgfile;
34
35static double cpu2slot(int cpu)
36{
37 return 2 * cpu + 1;
38}
39
40static double cpu2y(int cpu)
41{
42 return cpu2slot(cpu) * SLOT_MULT;
43}
44
45static double time2pixels(u64 time)
46{
47 double X;
48
49 X = WIDTH * (time - first_time) / (last_time - first_time);
50 return X;
51}
52
53void open_svg(const char *filename, int cpus, int rows)
54{
55
56 svgfile = fopen(filename, "w");
57 if (!svgfile) {
58 fprintf(stderr, "Cannot open %s for output\n", filename);
59 return;
60 }
61 total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
62 fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
63 fprintf(svgfile, "<svg width=\"%4.1f\" height=\"%llu\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", WIDTH, total_height);
64
65 fprintf(svgfile, "<defs>\n <style type=\"text/css\">\n <![CDATA[\n");
66
67 fprintf(svgfile, " rect { stroke-width: 1; }\n");
68 fprintf(svgfile, " rect.process { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:1; stroke:rgb( 0, 0, 0); } \n");
69 fprintf(svgfile, " rect.process2 { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
70 fprintf(svgfile, " rect.sample { fill:rgb( 0, 0,255); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
71 fprintf(svgfile, " rect.blocked { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
72 fprintf(svgfile, " rect.waiting { fill:rgb(255,255, 0); fill-opacity:0.3; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
73 fprintf(svgfile, " rect.cpu { fill:rgb(192,192,192); fill-opacity:0.2; stroke-width:0.5; stroke:rgb(128,128,128); } \n");
74 fprintf(svgfile, " rect.pstate { fill:rgb(128,128,128); fill-opacity:0.8; stroke-width:0; } \n");
75 fprintf(svgfile, " rect.c1 { fill:rgb(255,214,214); fill-opacity:0.5; stroke-width:0; } \n");
76 fprintf(svgfile, " rect.c2 { fill:rgb(255,172,172); fill-opacity:0.5; stroke-width:0; } \n");
77 fprintf(svgfile, " rect.c3 { fill:rgb(255,130,130); fill-opacity:0.5; stroke-width:0; } \n");
78 fprintf(svgfile, " rect.c4 { fill:rgb(255, 88, 88); fill-opacity:0.5; stroke-width:0; } \n");
79 fprintf(svgfile, " rect.c5 { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
80 fprintf(svgfile, " rect.c6 { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; } \n");
81 fprintf(svgfile, " line.pstate { stroke:rgb(255,255, 0); stroke-opacity:0.8; stroke-width:2; } \n");
82
83 fprintf(svgfile, " ]]>\n </style>\n</defs>\n");
84}
85
86void svg_box(int Yslot, u64 start, u64 end, const char *type)
87{
88 if (!svgfile)
89 return;
90
91 fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"%s\"/>\n",
92 time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
93}
94
95void svg_sample(int Yslot, int cpu, u64 start, u64 end, const char *type)
96{
97 double text_size;
98 if (!svgfile)
99 return;
100
101 fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"%s\"/>\n",
102 time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
103
104 text_size = (time2pixels(end)-time2pixels(start));
105 if (cpu > 9)
106 text_size = text_size/2;
107 if (text_size > 1.25)
108 text_size = 1.25;
109 if (text_size > MIN_TEXT_SIZE)
110 fprintf(svgfile, "<text transform=\"translate(%1.8f,%1.8f)\" font-size=\"%1.6fpt\">%i</text>\n",
111 time2pixels(start), Yslot * SLOT_MULT + SLOT_HEIGHT - 1, text_size, cpu + 1);
112
113}
114
115static char *cpu_model(void)
116{
117 static char cpu_m[255];
118 char buf[256];
119 FILE *file;
120
121 cpu_m[0] = 0;
122 /* CPU type */
123 file = fopen("/proc/cpuinfo", "r");
124 if (file) {
125 while (fgets(buf, 255, file)) {
126 if (strstr(buf, "model name")) {
127 strncpy(cpu_m, &buf[13], 255);
128 break;
129 }
130 }
131 fclose(file);
132 }
133 return cpu_m;
134}
135
136void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
137{
138 char cpu_string[80];
139 if (!svgfile)
140 return;
141
142 max_freq = __max_freq;
143 turbo_frequency = __turbo_freq;
144
145 fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"cpu\"/>\n",
146 time2pixels(first_time),
147 time2pixels(last_time)-time2pixels(first_time),
148 cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
149
150 sprintf(cpu_string, "CPU %i", (int)cpu+1);
151 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\">%s</text>\n",
152 10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
153
154 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\" font-size=\"1.25pt\">%s</text>\n",
155 10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
156}
157
158void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name)
159{
160 double width;
161
162 if (!svgfile)
163 return;
164
165 fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"%s\"/>\n",
166 time2pixels(start), time2pixels(end)-time2pixels(start), cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT, type);
167 width = time2pixels(end)-time2pixels(start);
168 if (width > 6)
169 width = 6;
170
171 if (width > MIN_TEXT_SIZE)
172 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f) rotate(90)\" font-size=\"%3.4fpt\">%s</text>\n",
173 time2pixels(start), cpu2y(cpu), width, name);
174}
175
176void svg_cstate(int cpu, u64 start, u64 end, int type)
177{
178 double width;
179 char style[128];
180
181 if (!svgfile)
182 return;
183
184
185 if (type > 6)
186 type = 6;
187 sprintf(style, "c%i", type);
188
189 fprintf(svgfile, "<rect class=\"%s\" x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\"/>\n",
190 style,
191 time2pixels(start), time2pixels(end)-time2pixels(start),
192 cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
193
194 width = time2pixels(end)-time2pixels(start);
195 if (width > 6)
196 width = 6;
197
198 if (width > MIN_TEXT_SIZE)
199 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f) rotate(90)\" font-size=\"%3.4fpt\">C%i</text>\n",
200 time2pixels(start), cpu2y(cpu), width, type);
201}
202
203static char *HzToHuman(unsigned long hz)
204{
205 static char buffer[1024];
206 unsigned long long Hz;
207
208 memset(buffer, 0, 1024);
209
210 Hz = hz;
211
212 /* default: just put the Number in */
213 sprintf(buffer, "%9lli", Hz);
214
215 if (Hz > 1000)
216 sprintf(buffer, " %6lli Mhz", (Hz+500)/1000);
217
218 if (Hz > 1500000)
219 sprintf(buffer, " %6.2f Ghz", (Hz+5000.0)/1000000);
220
221 if (Hz == turbo_frequency)
222 sprintf(buffer, "Turbo");
223
224 return buffer;
225}
226
227void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
228{
229 double height = 0;
230
231 if (!svgfile)
232 return;
233
234 if (max_freq)
235 height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT);
236 height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height;
237 fprintf(svgfile, "<line x1=\"%4.8f\" x2=\"%4.8f\" y1=\"%4.1f\" y2=\"%4.1f\" class=\"pstate\"/>\n",
238 time2pixels(start), time2pixels(end), height, height);
239 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\" font-size=\"0.25pt\">%s</text>\n",
240 time2pixels(start), height+0.9, HzToHuman(freq));
241
242}
243
244
245void svg_partial_wakeline(u64 start, int row1, int row2)
246{
247 double height;
248
249 if (!svgfile)
250 return;
251
252
253 if (row1 < row2) {
254 if (row1)
255 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
256 time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
257
258 if (row2)
259 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
260 time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row2 * SLOT_MULT);
261 } else {
262 if (row2)
263 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
264 time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
265
266 if (row1)
267 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
268 time2pixels(start), row1 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row1 * SLOT_MULT);
269 }
270 height = row1 * SLOT_MULT;
271 if (row2 > row1)
272 height += SLOT_HEIGHT;
273 if (row1)
274 fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
275 time2pixels(start), height);
276}
277
278void svg_wakeline(u64 start, int row1, int row2)
279{
280 double height;
281
282 if (!svgfile)
283 return;
284
285
286 if (row1 < row2)
287 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
288 time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT);
289 else
290 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
291 time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT);
292
293 height = row1 * SLOT_MULT;
294 if (row2 > row1)
295 height += SLOT_HEIGHT;
296 fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
297 time2pixels(start), height);
298}
299
300void svg_interrupt(u64 start, int row)
301{
302 if (!svgfile)
303 return;
304
305 fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
306 time2pixels(start), row * SLOT_MULT);
307 fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
308 time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT);
309}
310
311void svg_text(int Yslot, u64 start, const char *text)
312{
313 if (!svgfile)
314 return;
315
316 fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\">%s</text>\n",
317 time2pixels(start), Yslot * SLOT_MULT+SLOT_HEIGHT/2, text);
318}
319
320static void svg_legenda_box(int X, const char *text, const char *style)
321{
322 double boxsize;
323 boxsize = SLOT_HEIGHT / 2;
324
325 fprintf(svgfile, "<rect x=\"%i\" width=\"%4.8f\" y=\"0\" height=\"%4.1f\" class=\"%s\"/>\n",
326 X, boxsize, boxsize, style);
327 fprintf(svgfile, "<text transform=\"translate(%4.8f, %4.8f)\" font-size=\"%4.4fpt\">%s</text>\n",
328 X + boxsize + 5, boxsize, 0.8 * boxsize, text);
329}
330
331void svg_legenda(void)
332{
333 if (!svgfile)
334 return;
335
336 svg_legenda_box(0, "Running", "sample");
337 svg_legenda_box(100, "Idle","rect.c1");
338 svg_legenda_box(200, "Deeper Idle", "rect.c3");
339 svg_legenda_box(350, "Deepest Idle", "rect.c6");
340 svg_legenda_box(550, "Sleeping", "process2");
341 svg_legenda_box(650, "Waiting for cpu", "waiting");
342 svg_legenda_box(800, "Blocked on IO", "blocked");
343}
344
345void svg_time_grid(u64 start, u64 end)
346{
347 u64 i;
348
349 first_time = start;
350 last_time = end;
351
352 first_time = first_time / 100000000 * 100000000;
353
354 if (!svgfile)
355 return;
356
357 i = first_time;
358 while (i < last_time) {
359 int color = 220;
360 double thickness = 0.075;
361 if ((i % 100000000) == 0) {
362 thickness = 0.5;
363 color = 192;
364 }
365 if ((i % 1000000000) == 0) {
366 thickness = 2.0;
367 color = 128;
368 }
369
370 fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%llu\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%1.3f\"/>\n",
371 time2pixels(i), SLOT_MULT/2, time2pixels(i), total_height, color, color, color, thickness);
372
373 i += 10000000;
374 }
375}
376
377void svg_close(void)
378{
379 if (svgfile) {
380 fprintf(svgfile, "</svg>\n");
381 fclose(svgfile);
382 svgfile = NULL;
383 }
384}
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
new file mode 100644
index 000000000000..ad79b5dc53de
--- /dev/null
+++ b/tools/perf/util/svghelper.h
@@ -0,0 +1,25 @@
1#ifndef _INCLUDE_GUARD_SVG_HELPER_
2#define _INCLUDE_GUARD_SVG_HELPER_
3
4#include "types.h"
5
6extern void open_svg(const char *filename, int cpus, int rows);
7extern void svg_box(int Yslot, u64 start, u64 end, const char *type);
8extern void svg_sample(int Yslot, int cpu, u64 start, u64 end, const char *type);
9extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
10
11
12extern void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name);
13extern void svg_cstate(int cpu, u64 start, u64 end, int type);
14extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
15
16
17extern void svg_time_grid(u64 start, u64 end);
18extern void svg_legenda(void);
19extern void svg_wakeline(u64 start, int row1, int row2);
20extern void svg_partial_wakeline(u64 start, int row1, int row2);
21extern void svg_interrupt(u64 start, int row);
22extern void svg_text(int Yslot, u64 start, const char *text);
23extern void svg_close(void);
24
25#endif
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 7635928ca278..45efb5db0d19 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -8,7 +8,7 @@
8 8
9static struct thread *thread__new(pid_t pid) 9static struct thread *thread__new(pid_t pid)
10{ 10{
11 struct thread *self = malloc(sizeof(*self)); 11 struct thread *self = calloc(1, sizeof(*self));
12 12
13 if (self != NULL) { 13 if (self != NULL) {
14 self->pid = pid; 14 self->pid = pid;
@@ -85,7 +85,7 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match)
85{ 85{
86 struct thread *thread = threads__findnew(0, threads, last_match); 86 struct thread *thread = threads__findnew(0, threads, last_match);
87 87
88 if (!thread || thread__set_comm(thread, "[init]")) { 88 if (!thread || thread__set_comm(thread, "swapper")) {
89 fprintf(stderr, "problem inserting idle task.\n"); 89 fprintf(stderr, "problem inserting idle task.\n");
90 exit(-1); 90 exit(-1);
91 } 91 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 634f2809a342..32aea3c1c2ad 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -4,10 +4,11 @@
4#include "symbol.h" 4#include "symbol.h"
5 5
6struct thread { 6struct thread {
7 struct rb_node rb_node; 7 struct rb_node rb_node;
8 struct list_head maps; 8 struct list_head maps;
9 pid_t pid; 9 pid_t pid;
10 char *comm; 10 char shortname[3];
11 char *comm;
11}; 12};
12 13
13int thread__set_comm(struct thread *self, const char *comm); 14int thread__set_comm(struct thread *self, const char *comm);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 6c9302a7274c..1fd824c1f1c4 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -458,7 +458,7 @@ static void read_proc_kallsyms(void)
458static void read_ftrace_printk(void) 458static void read_ftrace_printk(void)
459{ 459{
460 unsigned int size, check_size; 460 unsigned int size, check_size;
461 const char *path; 461 char *path;
462 struct stat st; 462 struct stat st;
463 int ret; 463 int ret;
464 464
@@ -468,14 +468,15 @@ static void read_ftrace_printk(void)
468 /* not found */ 468 /* not found */
469 size = 0; 469 size = 0;
470 write_or_die(&size, 4); 470 write_or_die(&size, 4);
471 return; 471 goto out;
472 } 472 }
473 size = get_size(path); 473 size = get_size(path);
474 write_or_die(&size, 4); 474 write_or_die(&size, 4);
475 check_size = copy_file(path); 475 check_size = copy_file(path);
476 if (size != check_size) 476 if (size != check_size)
477 die("error in size of file '%s'", path); 477 die("error in size of file '%s'", path);
478 478out:
479 put_tracing_file(path);
479} 480}
480 481
481static struct tracepoint_path * 482static struct tracepoint_path *
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 629e602d9405..f6a8437141c8 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -1776,6 +1776,29 @@ static unsigned long long read_size(void *ptr, int size)
1776 } 1776 }
1777} 1777}
1778 1778
1779unsigned long long
1780raw_field_value(struct event *event, const char *name, void *data)
1781{
1782 struct format_field *field;
1783
1784 field = find_any_field(event, name);
1785 if (!field)
1786 return 0ULL;
1787
1788 return read_size(data + field->offset, field->size);
1789}
1790
1791void *raw_field_ptr(struct event *event, const char *name, void *data)
1792{
1793 struct format_field *field;
1794
1795 field = find_any_field(event, name);
1796 if (!field)
1797 return NULL;
1798
1799 return data + field->offset;
1800}
1801
1779static int get_common_info(const char *type, int *offset, int *size) 1802static int get_common_info(const char *type, int *offset, int *size)
1780{ 1803{
1781 struct event *event; 1804 struct event *event;
@@ -1799,7 +1822,7 @@ static int get_common_info(const char *type, int *offset, int *size)
1799 return 0; 1822 return 0;
1800} 1823}
1801 1824
1802static int parse_common_type(void *data) 1825int trace_parse_common_type(void *data)
1803{ 1826{
1804 static int type_offset; 1827 static int type_offset;
1805 static int type_size; 1828 static int type_size;
@@ -1832,7 +1855,7 @@ static int parse_common_pid(void *data)
1832 return read_size(data + pid_offset, pid_size); 1855 return read_size(data + pid_offset, pid_size);
1833} 1856}
1834 1857
1835static struct event *find_event(int id) 1858struct event *trace_find_event(int id)
1836{ 1859{
1837 struct event *event; 1860 struct event *event;
1838 1861
@@ -2420,8 +2443,8 @@ get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func,
2420 int type; 2443 int type;
2421 int pid; 2444 int pid;
2422 2445
2423 type = parse_common_type(next->data); 2446 type = trace_parse_common_type(next->data);
2424 event = find_event(type); 2447 event = trace_find_event(type);
2425 if (!event) 2448 if (!event)
2426 return NULL; 2449 return NULL;
2427 2450
@@ -2502,8 +2525,8 @@ print_graph_entry_leaf(struct event *event, void *data, struct record *ret_rec)
2502 int type; 2525 int type;
2503 int i; 2526 int i;
2504 2527
2505 type = parse_common_type(ret_rec->data); 2528 type = trace_parse_common_type(ret_rec->data);
2506 ret_event = find_event(type); 2529 ret_event = trace_find_event(type);
2507 2530
2508 field = find_field(ret_event, "rettime"); 2531 field = find_field(ret_event, "rettime");
2509 if (!field) 2532 if (!field)
@@ -2696,11 +2719,13 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
2696 nsecs -= secs * NSECS_PER_SEC; 2719 nsecs -= secs * NSECS_PER_SEC;
2697 usecs = nsecs / NSECS_PER_USEC; 2720 usecs = nsecs / NSECS_PER_USEC;
2698 2721
2699 type = parse_common_type(data); 2722 type = trace_parse_common_type(data);
2700 2723
2701 event = find_event(type); 2724 event = trace_find_event(type);
2702 if (!event) 2725 if (!event) {
2703 die("ug! no event found for type %d", type); 2726 printf("ug! no event found for type %d\n", type);
2727 return;
2728 }
2704 2729
2705 pid = parse_common_pid(data); 2730 pid = parse_common_pid(data);
2706 2731
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index a1217a10632f..1b5c847d2c22 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -458,12 +458,13 @@ struct record *trace_read_data(int cpu)
458 return data; 458 return data;
459} 459}
460 460
461void trace_report (void) 461void trace_report(void)
462{ 462{
463 const char *input_file = "trace.info"; 463 const char *input_file = "trace.info";
464 char buf[BUFSIZ]; 464 char buf[BUFSIZ];
465 char test[] = { 23, 8, 68 }; 465 char test[] = { 23, 8, 68 };
466 char *version; 466 char *version;
467 int show_version = 0;
467 int show_funcs = 0; 468 int show_funcs = 0;
468 int show_printk = 0; 469 int show_printk = 0;
469 470
@@ -480,7 +481,8 @@ void trace_report (void)
480 die("not a trace file (missing tracing)"); 481 die("not a trace file (missing tracing)");
481 482
482 version = read_string(); 483 version = read_string();
483 printf("version = %s\n", version); 484 if (show_version)
485 printf("version = %s\n", version);
484 free(version); 486 free(version);
485 487
486 read_or_die(buf, 1); 488 read_or_die(buf, 1);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 420294a5773e..d35ebf1e29ff 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -234,6 +234,11 @@ extern int header_page_data_offset;
234extern int header_page_data_size; 234extern int header_page_data_size;
235 235
236int parse_header_page(char *buf, unsigned long size); 236int parse_header_page(char *buf, unsigned long size);
237int trace_parse_common_type(void *data);
238struct event *trace_find_event(int id);
239unsigned long long
240raw_field_value(struct event *event, const char *name, void *data);
241void *raw_field_ptr(struct event *event, const char *name, void *data);
237 242
238void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters); 243void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters);
239 244