aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS2
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/include/asm/perf_event.h6
-rw-r--r--arch/alpha/kernel/irq_alpha.c2
-rw-r--r--arch/alpha/kernel/perf_event.c11
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c2
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c2
-rw-r--r--arch/powerpc/kernel/perf_event.c2
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c2
-rw-r--r--arch/powerpc/kernel/power4-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5-pmu.c2
-rw-r--r--arch/powerpc/kernel/power6-pmu.c2
-rw-r--r--arch/powerpc/kernel/power7-pmu.c2
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c2
-rw-r--r--arch/sh/kernel/perf_event.c2
-rw-r--r--arch/sparc/include/asm/perf_event.h4
-rw-r--r--arch/sparc/kernel/nmi.c2
-rw-r--r--arch/sparc/kernel/perf_event.c9
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/nmi.h53
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h63
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h33
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/kernel/alternative.c49
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c15
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c20
-rw-r--r--arch/x86/kernel/apic/io_apic.c46
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c26
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/kprobes.c113
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/traps.c27
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c3
-rw-r--r--arch/x86/oprofile/op_model_amd.c54
-rw-r--r--arch/x86/oprofile/op_model_p4.c2
-rw-r--r--drivers/acpi/acpica/nsinit.c2
-rw-r--r--drivers/watchdog/hpwdt.c11
-rw-r--r--include/linux/ftrace_event.h14
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/nmi.h10
-rw-r--r--include/linux/perf_event.h26
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/stacktrace.h4
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/linux/tracepoint.h33
-rw-r--r--include/trace/define_trace.h15
-rw-r--r--include/trace/events/syscalls.h4
-rw-r--r--include/trace/ftrace.h21
-rw-r--r--init/main.c5
-rw-r--r--kernel/hw_breakpoint.c2
-rw-r--r--kernel/kprobes.c565
-rw-r--r--kernel/perf_event.c573
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sysctl.c16
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/trace/trace_event_perf.c31
-rw-r--r--kernel/trace/trace_events.c6
-rw-r--r--kernel/trace/trace_export.c14
-rw-r--r--kernel/watchdog.c9
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--scripts/Makefile.build13
-rw-r--r--tools/perf/Documentation/perf-annotate.txt37
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt21
-rw-r--r--tools/perf/Documentation/perf-kvm.txt8
-rw-r--r--tools/perf/Documentation/perf-lock.txt15
-rw-r--r--tools/perf/Documentation/perf-probe.txt4
-rw-r--r--tools/perf/Documentation/perf-record.txt22
-rw-r--r--tools/perf/Documentation/perf-report.txt55
-rw-r--r--tools/perf/Documentation/perf-sched.txt18
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt (renamed from tools/perf/Documentation/perf-trace-perl.txt)28
-rw-r--r--tools/perf/Documentation/perf-script-python.txt (renamed from tools/perf/Documentation/perf-trace-python.txt)88
-rw-r--r--tools/perf/Documentation/perf-script.txt (renamed from tools/perf/Documentation/perf-trace.txt)61
-rw-r--r--tools/perf/Documentation/perf-stat.txt44
-rw-r--r--tools/perf/Documentation/perf-test.txt2
-rw-r--r--tools/perf/Documentation/perf-timechart.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt28
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile19
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c219
-rw-r--r--tools/perf/builtin-annotate.c10
-rw-r--r--tools/perf/builtin-buildid-list.c3
-rw-r--r--tools/perf/builtin-diff.c21
-rw-r--r--tools/perf/builtin-inject.c41
-rw-r--r--tools/perf/builtin-kmem.c27
-rw-r--r--tools/perf/builtin-lock.c23
-rw-r--r--tools/perf/builtin-record.c58
-rw-r--r--tools/perf/builtin-report.c23
-rw-r--r--tools/perf/builtin-sched.c33
-rw-r--r--tools/perf/builtin-script.c (renamed from tools/perf/builtin-trace.c)120
-rw-r--r--tools/perf/builtin-stat.c325
-rw-r--r--tools/perf/builtin-test.c23
-rw-r--r--tools/perf/builtin-timechart.c48
-rw-r--r--tools/perf/builtin-top.c20
-rw-r--r--tools/perf/builtin.h2
-rw-r--r--tools/perf/command-list.txt2
-rw-r--r--tools/perf/feature-tests.mak4
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/README4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm4
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report5
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report5
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report5
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report5
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report6
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl2
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl2
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py2
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-report2
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report2
-rw-r--r--tools/perf/scripts/python/bin/sctop-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report2
-rw-r--r--tools/perf/scripts/python/check-perf-trace.py2
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py2
-rw-r--r--tools/perf/scripts/python/sched-migration.py2
-rw-r--r--tools/perf/scripts/python/sctop.py2
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py2
-rw-r--r--tools/perf/scripts/python/syscall-counts.py2
-rw-r--r--tools/perf/util/build-id.c7
-rw-r--r--tools/perf/util/debug.c42
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c358
-rw-r--r--tools/perf/util/event.h30
-rw-r--r--tools/perf/util/header.c43
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c23
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h9
-rw-r--r--tools/perf/util/include/asm/dwarf2.h11
-rw-r--r--tools/perf/util/include/linux/bitops.h5
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/parse-events.c12
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/probe-event.c234
-rw-r--r--tools/perf/util/probe-finder.c42
-rw-r--r--tools/perf/util/probe-finder.h6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c564
-rw-r--r--tools/perf/util/session.h25
-rw-r--r--tools/perf/util/sort.c6
-rw-r--r--tools/perf/util/symbol.c139
-rw-r--r--tools/perf/util/symbol.h4
-rw-r--r--tools/perf/util/ui/util.c16
184 files changed, 3637 insertions, 2972 deletions
diff --git a/CREDITS b/CREDITS
index 41d8e63d5165..494b6e4746d7 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
2365W: http://oops.ghostprotocols.net:81/blog/ 2365W: http://oops.ghostprotocols.net:81/blog/
2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks 2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
2368S: R. Brasílio Itiberê, 4270/1010 - Água Verde
2369S: 80240-060 - Curitiba - Paraná
2370S: Brazil 2368S: Brazil
2371 2369
2372N: Karsten Merker 2370N: Karsten Merker
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8b61c9360999..316c723a950c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
1579 1579
1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels 1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
1581 Format: [panic,][num] 1581 Format: [panic,][num]
1582 Valid num: 0,1,2 1582 Valid num: 0
1583 0 - turn nmi_watchdog off 1583 0 - turn nmi_watchdog off
1584 1 - use the IO-APIC timer for the NMI watchdog
1585 2 - use the local APIC for the NMI watchdog using
1586 a performance counter. Note: This will use one
1587 performance counter and the local APIC's performance
1588 vector.
1589 When panic is specified, panic when an NMI watchdog 1584 When panic is specified, panic when an NMI watchdog
1590 timeout occurs. 1585 timeout occurs.
1591 This is useful when you use a panic=... timeout and 1586 This is useful when you use a panic=... timeout and
1592 need the box quickly up again. 1587 need the box quickly up again.
1593 Instead of 1 and 2 it is possible to use the following
1594 symbolic names: lapic and ioapic
1595 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
1596 1588
1597 netpoll.carrier_timeout= 1589 netpoll.carrier_timeout=
1598 [NET] Specifies amount of time (in seconds) that 1590 [NET] Specifies amount of time (in seconds) that
diff --git a/MAINTAINERS b/MAINTAINERS
index 71e40f9118df..92e5b67105f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4612,7 +4612,7 @@ PERFORMANCE EVENTS SUBSYSTEM
4612M: Peter Zijlstra <a.p.zijlstra@chello.nl> 4612M: Peter Zijlstra <a.p.zijlstra@chello.nl>
4613M: Paul Mackerras <paulus@samba.org> 4613M: Paul Mackerras <paulus@samba.org>
4614M: Ingo Molnar <mingo@elte.hu> 4614M: Ingo Molnar <mingo@elte.hu>
4615M: Arnaldo Carvalho de Melo <acme@redhat.com> 4615M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
4616S: Supported 4616S: Supported
4617F: kernel/perf_event*.c 4617F: kernel/perf_event*.c
4618F: include/linux/perf_event.h 4618F: include/linux/perf_event.h
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index fe792ca818f6..5996e7a6757e 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,10 +1,4 @@
1#ifndef __ASM_ALPHA_PERF_EVENT_H 1#ifndef __ASM_ALPHA_PERF_EVENT_H
2#define __ASM_ALPHA_PERF_EVENT_H 2#define __ASM_ALPHA_PERF_EVENT_H
3 3
4#ifdef CONFIG_PERF_EVENTS
5extern void init_hw_perf_events(void);
6#else
7static inline void init_hw_perf_events(void) { }
8#endif
9
10#endif /* __ASM_ALPHA_PERF_EVENT_H */ 4#endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 5f77afb88e89..4c8bb374eb0a 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -112,8 +112,6 @@ init_IRQ(void)
112 wrent(entInt, 0); 112 wrent(entInt, 0);
113 113
114 alpha_mv.init_irq(); 114 alpha_mv.init_irq();
115
116 init_hw_perf_events();
117} 115}
118 116
119/* 117/*
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1cc49683fb69..90561c45e7d8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -14,6 +14,7 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/init.h>
17 18
18#include <asm/hwrpb.h> 19#include <asm/hwrpb.h>
19#include <asm/atomic.h> 20#include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
863/* 864/*
864 * Init call to initialise performance events at kernel startup. 865 * Init call to initialise performance events at kernel startup.
865 */ 866 */
866void __init init_hw_perf_events(void) 867int __init init_hw_perf_events(void)
867{ 868{
868 pr_info("Performance events: "); 869 pr_info("Performance events: ");
869 870
870 if (!supported_cpu()) { 871 if (!supported_cpu()) {
871 pr_cont("No support for your CPU.\n"); 872 pr_cont("No support for your CPU.\n");
872 return; 873 return 0;
873 } 874 }
874 875
875 pr_cont("Supported CPU type!\n"); 876 pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
881 /* And set up PMU specification */ 882 /* And set up PMU specification */
882 alpha_pmu = &ev67_pmu; 883 alpha_pmu = &ev67_pmu;
883 884
884 perf_pmu_register(&pmu); 885 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
885}
886 886
887 return 0;
888}
889early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492a..fdfa4976b0bf 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
3034 pr_info("no hardware support available\n"); 3034 pr_info("no hardware support available\n");
3035 } 3035 }
3036 3036
3037 perf_pmu_register(&pmu); 3037 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
3038 3038
3039 return 0; 3039 return 0;
3040} 3040}
3041arch_initcall(init_hw_perf_events); 3041early_initcall(init_hw_perf_events);
3042 3042
3043/* 3043/*
3044 * Callchain handling code. 3044 * Callchain handling code.
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 5c7c6fc07565..183e0d226669 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
1047 1047
1048 return 0; 1048 return 0;
1049} 1049}
1050arch_initcall(init_hw_perf_events); 1050early_initcall(init_hw_perf_events);
1051 1051
1052#endif /* defined(CONFIG_CPU_MIPS32)... */ 1052#endif /* defined(CONFIG_CPU_MIPS32)... */
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index 7c07de0d8943..b150b510510f 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
126 return register_fsl_emb_pmu(&e500_pmu); 126 return register_fsl_emb_pmu(&e500_pmu);
127} 127}
128 128
129arch_initcall(init_e500_pmu); 129early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 09d72028f317..2cc5e0301d0b 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
414 return register_power_pmu(&mpc7450_pmu); 414 return register_power_pmu(&mpc7450_pmu);
415} 415}
416 416
417arch_initcall(init_mpc7450_pmu); 417early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 3129c855933c..567480705789 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1379 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1380#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1381 1381
1382 perf_pmu_register(&power_pmu); 1382 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
1383 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1384 1384
1385 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 7ecca59ddf77..4dcf5f831e9d 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
681 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
682 pmu->name); 682 pmu->name);
683 683
684 perf_pmu_register(&fsl_emb_pmu); 684 perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
685 685
686 return 0; 686 return 0;
687} 687}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 2a361cdda635..ead8b3c2649e 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
613 return register_power_pmu(&power4_pmu); 613 return register_power_pmu(&power4_pmu);
614} 614}
615 615
616arch_initcall(init_power4_pmu); 616early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 199de527d411..eca0ac595cb6 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
682 return register_power_pmu(&power5p_pmu); 682 return register_power_pmu(&power5p_pmu);
683} 683}
684 684
685arch_initcall(init_power5p_pmu); 685early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 98b6a729a9dd..d5ff0f64a5e6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
621 return register_power_pmu(&power5_pmu); 621 return register_power_pmu(&power5_pmu);
622} 622}
623 623
624arch_initcall(init_power5_pmu); 624early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 84a607bda8fb..31603927e376 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
544 return register_power_pmu(&power6_pmu); 544 return register_power_pmu(&power6_pmu);
545} 545}
546 546
547arch_initcall(init_power6_pmu); 547early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 852f7b7f6b40..593740fcb799 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
369 return register_power_pmu(&power7_pmu); 369 return register_power_pmu(&power7_pmu);
370} 370}
371 371
372arch_initcall(init_power7_pmu); 372early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 3fee685de4df..9a6e093858fe 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
494 return register_power_pmu(&ppc970_pmu); 494 return register_power_pmu(&ppc970_pmu);
495} 495}
496 496
497arch_initcall(init_ppc970_pmu); 497early_initcall(init_ppc970_pmu);
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index dbf3b4bb71fe..748955df018d 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
250 250
251 return register_sh_pmu(&sh7750_pmu); 251 return register_sh_pmu(&sh7750_pmu);
252} 252}
253arch_initcall(sh7750_pmu_init); 253early_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 580276525731..17e6bebfede0 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
284 284
285 return register_sh_pmu(&sh4a_pmu); 285 return register_sh_pmu(&sh4a_pmu);
286} 286}
287arch_initcall(sh4a_pmu_init); 287early_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 5a4b33435650..2ee21a47b5af 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
389 389
390 WARN_ON(_pmu->num_events > MAX_HWEVENTS); 390 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
391 391
392 perf_pmu_register(&pmu); 392 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
393 perf_cpu_notifier(sh_pmu_notifier); 393 perf_cpu_notifier(sh_pmu_notifier);
394 return 0; 394 return 0;
395} 395}
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 6e8bfa1786da..4d3dbe3703e9 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -4,8 +4,6 @@
4#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
5#include <asm/ptrace.h> 5#include <asm/ptrace.h>
6 6
7extern void init_hw_perf_events(void);
8
9#define perf_arch_fetch_caller_regs(regs, ip) \ 7#define perf_arch_fetch_caller_regs(regs, ip) \
10do { \ 8do { \
11 unsigned long _pstate, _asi, _pil, _i7, _fp; \ 9 unsigned long _pstate, _asi, _pil, _i7, _fp; \
@@ -26,8 +24,6 @@ do { \
26 (regs)->u_regs[UREG_I6] = _fp; \ 24 (regs)->u_regs[UREG_I6] = _fp; \
27 (regs)->u_regs[UREG_I7] = _i7; \ 25 (regs)->u_regs[UREG_I7] = _i7; \
28} while (0) 26} while (0)
29#else
30static inline void init_hw_perf_events(void) { }
31#endif 27#endif
32 28
33#endif 29#endif
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a4bd7ba74c89..300f810142f5 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -270,8 +270,6 @@ int __init nmi_init(void)
270 atomic_set(&nmi_active, -1); 270 atomic_set(&nmi_active, -1);
271 } 271 }
272 } 272 }
273 if (!err)
274 init_hw_perf_events();
275 273
276 return err; 274 return err;
277} 275}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0d6deb55a2ae..760578687e7c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
1307 return false; 1307 return false;
1308} 1308}
1309 1309
1310void __init init_hw_perf_events(void) 1310int __init init_hw_perf_events(void)
1311{ 1311{
1312 pr_info("Performance events: "); 1312 pr_info("Performance events: ");
1313 1313
1314 if (!supported_pmu()) { 1314 if (!supported_pmu()) {
1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1316 return; 1316 return 0;
1317 } 1317 }
1318 1318
1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1320 1320
1321 perf_pmu_register(&pmu); 1321 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1322 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1323
1324 return 0;
1323} 1325}
1326early_initcall(init_hw_perf_events);
1324 1327
1325void perf_callchain_kernel(struct perf_callchain_entry *entry, 1328void perf_callchain_kernel(struct perf_callchain_entry *entry,
1326 struct pt_regs *regs) 1329 struct pt_regs *regs)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d20ea2f..4a2adaa9aefc 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
181 * inconsistent instruction while you patch. 181 * inconsistent instruction while you patch.
182 */ 182 */
183struct text_poke_param {
184 void *addr;
185 const void *opcode;
186 size_t len;
187};
188
183extern void *text_poke(void *addr, const void *opcode, size_t len); 189extern void *text_poke(void *addr, const void *opcode, size_t len);
184extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 190extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
191extern void text_poke_smp_batch(struct text_poke_param *params, int n);
185 192
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 193#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5 194#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0ebaa512f..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
15 return ((irq == 2) ? 9 : irq); 15 return ((irq == 2) ? 9 : irq);
16} 16}
17 17
18#ifdef CONFIG_X86_LOCAL_APIC
19# define ARCH_HAS_NMI_WATCHDOG
20#endif
21
22#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
23extern void irq_ctx_init(int cpu); 19extern void irq_ctx_init(int cpu);
24#else 20#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..f23eb2528464 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
28extern int __must_check __die(const char *, struct pt_regs *, long); 28extern int __must_check __die(const char *, struct pt_regs *, long);
29extern void show_registers(struct pt_regs *regs); 29extern void show_registers(struct pt_regs *regs);
30extern void show_trace(struct task_struct *t, struct pt_regs *regs, 30extern void show_trace(struct task_struct *t, struct pt_regs *regs,
31 unsigned long *sp, unsigned long bp); 31 unsigned long *sp);
32extern void __show_regs(struct pt_regs *regs, int all); 32extern void __show_regs(struct pt_regs *regs, int all);
33extern void show_regs(struct pt_regs *regs); 33extern void show_regs(struct pt_regs *regs);
34extern unsigned long oops_begin(void); 34extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b89f5e86021..86030f63ba02 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -123,6 +123,10 @@
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b 124#define MSR_AMD64_IBSBRTARGET 0xc001103b
125 125
126/* Fam 15h MSRs */
127#define MSR_F15H_PERF_CTL 0xc0010200
128#define MSR_F15H_PERF_CTR 0xc0010201
129
126/* Fam 10h MSRs */ 130/* Fam 10h MSRs */
127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 131#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 132#define FAM10H_MMIO_CONF_ENABLE (1<<0)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..c4021b953510 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,41 +5,15 @@
5#include <asm/irq.h> 5#include <asm/irq.h>
6#include <asm/io.h> 6#include <asm/io.h>
7 7
8#ifdef ARCH_HAS_NMI_WATCHDOG 8#ifdef CONFIG_X86_LOCAL_APIC
9
10/**
11 * do_nmi_callback
12 *
13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu);
17 9
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled;
22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
24extern int reserve_perfctr_nmi(unsigned int); 12extern int reserve_perfctr_nmi(unsigned int);
25extern void release_perfctr_nmi(unsigned int); 13extern void release_perfctr_nmi(unsigned int);
26extern int reserve_evntsel_nmi(unsigned int); 14extern int reserve_evntsel_nmi(unsigned int);
27extern void release_evntsel_nmi(unsigned int); 15extern void release_evntsel_nmi(unsigned int);
28 16
29extern void setup_apic_nmi_watchdog(void *);
30extern void stop_apic_nmi_watchdog(void *);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
34extern void cpu_nmi_set_wd_enabled(void);
35
36extern atomic_t nmi_active;
37extern unsigned int nmi_watchdog;
38#define NMI_NONE 0
39#define NMI_IO_APIC 1
40#define NMI_LOCAL_APIC 2
41#define NMI_INVALID 3
42
43struct ctl_table; 17struct ctl_table;
44extern int proc_nmi_enabled(struct ctl_table *, int , 18extern int proc_nmi_enabled(struct ctl_table *, int ,
45 void __user *, size_t *, loff_t *); 19 void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
47 21
48void arch_trigger_all_cpu_backtrace(void); 22void arch_trigger_all_cpu_backtrace(void);
49#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
50
51static inline void localise_nmi_watchdog(void)
52{
53 if (nmi_watchdog == NMI_IO_APIC)
54 nmi_watchdog = NMI_LOCAL_APIC;
55}
56
57/* check if nmi_watchdog is active (ie was specified at boot) */
58static inline int nmi_watchdog_active(void)
59{
60 /*
61 * actually it should be:
62 * return (nmi_watchdog == NMI_LOCAL_APIC ||
63 * nmi_watchdog == NMI_IO_APIC)
64 * but since they are power of two we could use a
65 * cheaper way --cvg
66 */
67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68}
69#endif 24#endif
70 25
71void lapic_watchdog_stop(void);
72int lapic_watchdog_init(unsigned nmi_hz);
73int lapic_wd_event(unsigned nmi_hz);
74unsigned lapic_adjust_nmi_hz(unsigned hz);
75void disable_lapic_nmi_watchdog(void);
76void enable_lapic_nmi_watchdog(void);
77void stop_nmi(void); 26void stop_nmi(void);
78void restart_nmi(void); 27void restart_nmi(void);
79 28
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
126 126
127#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
128extern void init_hw_perf_events(void);
129extern void perf_events_lapic_init(void); 128extern void perf_events_lapic_init(void);
130 129
131#define PERF_EVENT_INDEX_OFFSET 0 130#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
156} 155}
157 156
158#else 157#else
159static inline void init_hw_perf_events(void) { }
160static inline void perf_events_lapic_init(void) { } 158static inline void perf_events_lapic_init(void) { }
161#endif 159#endif
162 160
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd216be5d..295e2ff18a6a 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
744}; 744};
745 745
746/* 746/*
747 * P4 PEBS specifics (Replay Event only)
748 *
749 * Format (bits):
750 * 0-6: metric from P4_PEBS_METRIC enum
751 * 7 : reserved
752 * 8 : reserved
753 * 9-11 : reserved
754 *
755 * Note we have UOP and PEBS bits reserved for now 747 * Note we have UOP and PEBS bits reserved for now
756 * just in case if we will need them once 748 * just in case if we will need them once
757 */ 749 */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
788 P4_PEBS_METRIC__max 780 P4_PEBS_METRIC__max
789}; 781};
790 782
783/*
784 * Notes on internal configuration of ESCR+CCCR tuples
785 *
786 * Since P4 has quite the different architecture of
787 * performance registers in compare with "architectural"
788 * once and we have on 64 bits to keep configuration
789 * of performance event, the following trick is used.
790 *
791 * 1) Since both ESCR and CCCR registers have only low
792 * 32 bits valuable, we pack them into a single 64 bit
793 * configuration. Low 32 bits of such config correspond
794 * to low 32 bits of CCCR register and high 32 bits
795 * correspond to low 32 bits of ESCR register.
796 *
797 * 2) The meaning of every bit of such config field can
798 * be found in Intel SDM but it should be noted that
799 * we "borrow" some reserved bits for own usage and
800 * clean them or set to a proper value when we do
801 * a real write to hardware registers.
802 *
803 * 3) The format of bits of config is the following
804 * and should be either 0 or set to some predefined
805 * values:
806 *
807 * Low 32 bits
808 * -----------
809 * 0-6: P4_PEBS_METRIC enum
810 * 7-11: reserved
811 * 12: reserved (Enable)
812 * 13-15: reserved (ESCR select)
813 * 16-17: Active Thread
814 * 18: Compare
815 * 19: Complement
816 * 20-23: Threshold
817 * 24: Edge
818 * 25: reserved (FORCE_OVF)
819 * 26: reserved (OVF_PMI_T0)
820 * 27: reserved (OVF_PMI_T1)
821 * 28-29: reserved
822 * 30: reserved (Cascade)
823 * 31: reserved (OVF)
824 *
825 * High 32 bits
826 * ------------
827 * 0: reserved (T1_USR)
828 * 1: reserved (T1_OS)
829 * 2: reserved (T0_USR)
830 * 3: reserved (T0_OS)
831 * 4: Tag Enable
832 * 5-8: Tag Value
833 * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
834 * 25-30: enum P4_EVENTS
835 * 31: reserved (HT thread)
836 */
837
791#endif /* PERF_EVENT_P4_H */ 838#endif /* PERF_EVENT_P4_H */
792 839
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
48 setup_IO_APIC(); 48 setup_IO_APIC();
49 else { 49 else {
50 nr_ioapics = 0; 50 nr_ioapics = 0;
51 localise_nmi_watchdog();
52 } 51 }
53#endif 52#endif
54} 53}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
7#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h>
10 11
11extern int kstack_depth_to_print; 12extern int kstack_depth_to_print;
12 13
@@ -46,7 +47,7 @@ struct stacktrace_ops {
46}; 47};
47 48
48void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 49void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
49 unsigned long *stack, unsigned long bp, 50 unsigned long *stack,
50 const struct stacktrace_ops *ops, void *data); 51 const struct stacktrace_ops *ops, void *data);
51 52
52#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 58#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif 59#endif
59 60
61#ifdef CONFIG_FRAME_POINTER
62static inline unsigned long
63stack_frame(struct task_struct *task, struct pt_regs *regs)
64{
65 unsigned long bp;
66
67 if (regs)
68 return regs->bp;
69
70 if (task == current) {
71 /* Grab bp right from our regs */
72 get_bp(bp);
73 return bp;
74 }
75
76 /* bp is the last reg pushed by switch_to */
77 return *(unsigned long *)task->thread.sp;
78}
79#else
80static inline unsigned long
81stack_frame(struct task_struct *task, struct pt_regs *regs)
82{
83 return 0;
84}
85#endif
86
60extern void 87extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 88show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl); 89 unsigned long *stack, char *log_lvl);
63 90
64extern void 91extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl); 93 unsigned long *sp, char *log_lvl);
67 94
68extern unsigned int code_bytes; 95extern unsigned int code_bytes;
69 96
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
10unsigned long long native_sched_clock(void); 10unsigned long long native_sched_clock(void);
11extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
12 12
13#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
14extern int timer_ack;
15#else
16# define timer_ack (0)
17#endif
18
19extern int no_timer_check; 13extern int no_timer_check;
20 14
21/* Accelerators for sched_clock() 15/* Accelerators for sched_clock()
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..553d0b0d639b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 591static int wrote_text;
592 592
593struct text_poke_params { 593struct text_poke_params {
594 void *addr; 594 struct text_poke_param *params;
595 const void *opcode; 595 int nparams;
596 size_t len;
597}; 596};
598 597
599static int __kprobes stop_machine_text_poke(void *data) 598static int __kprobes stop_machine_text_poke(void *data)
600{ 599{
601 struct text_poke_params *tpp = data; 600 struct text_poke_params *tpp = data;
601 struct text_poke_param *p;
602 int i;
602 603
603 if (atomic_dec_and_test(&stop_machine_first)) { 604 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 605 for (i = 0; i < tpp->nparams; i++) {
606 p = &tpp->params[i];
607 text_poke(p->addr, p->opcode, p->len);
608 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 609 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 610 wrote_text = 1;
607 } else { 611 } else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 614 smp_mb(); /* Load wrote_text before following execution */
611 } 615 }
612 616
613 flush_icache_range((unsigned long)tpp->addr, 617 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 618 p = &tpp->params[i];
619 flush_icache_range((unsigned long)p->addr,
620 (unsigned long)p->addr + p->len);
621 }
622
615 return 0; 623 return 0;
616} 624}
617 625
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 639void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 640{
633 struct text_poke_params tpp; 641 struct text_poke_params tpp;
642 struct text_poke_param p;
634 643
635 tpp.addr = addr; 644 p.addr = addr;
636 tpp.opcode = opcode; 645 p.opcode = opcode;
637 tpp.len = len; 646 p.len = len;
647 tpp.params = &p;
648 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 649 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 650 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 651 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 653 return addr;
643} 654}
644 655
656/**
657 * text_poke_smp_batch - Update instructions on a live kernel on SMP
658 * @params: an array of text_poke parameters
659 * @n: the number of elements in params.
660 *
661 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
662 * stop_machine() is heavy task, it is better to aggregate text_poke requests
663 * and do it once if possible.
664 *
665 * Note: Must be called under get_online_cpus() and text_mutex.
666 */
667void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
668{
669 struct text_poke_params tpp = {.params = params, .nparams = n};
670
671 atomic_set(&stop_machine_first, 1);
672 wrote_text = 0;
673 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
674}
675
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 676#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 677
647#ifdef CONFIG_X86_64 678#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78218135b48e..fb7657822aad 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 798 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 799 * device.
801 */ 800 */
802 if (nmi_watchdog != NMI_IO_APIC) 801 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 802
808 /* Setup the lapic or request the broadcast */ 803 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 804 setup_APIC_timer();
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1382 }
1388#endif 1383#endif
1389 1384
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1385 apic_pm_activate();
1392 1386
1393 /* 1387 /*
@@ -1758,17 +1752,10 @@ int __init APIC_init_uniprocessor(void)
1758 setup_IO_APIC(); 1752 setup_IO_APIC();
1759 else { 1753 else {
1760 nr_ioapics = 0; 1754 nr_ioapics = 0;
1761 localise_nmi_watchdog();
1762 } 1755 }
1763#else
1764 localise_nmi_watchdog();
1765#endif 1756#endif
1766 1757
1767 x86_init.timers.setup_percpu_clockev(); 1758 x86_init.timers.setup_percpu_clockev();
1768#ifdef CONFIG_X86_64
1769 check_nmi_watchdog();
1770#endif
1771
1772 return 0; 1759 return 0;
1773} 1760}
1774 1761
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..c57d0b599448 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,13 +17,14 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG 27#ifdef arch_trigger_all_cpu_backtrace
26
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
@@ -91,18 +92,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 92}
92early_initcall(register_trigger_all_cpu_backtrace); 93early_initcall(register_trigger_all_cpu_backtrace);
93#endif 94#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd743a74f..16c2db8750a2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -2642,24 +2641,6 @@ static void lapic_register_intr(int irq)
2642 "edge"); 2641 "edge");
2643} 2642}
2644 2643
2645static void __init setup_nmi(void)
2646{
2647 /*
2648 * Dirty trick to enable the NMI watchdog ...
2649 * We put the 8259A master into AEOI mode and
2650 * unmask on all local APICs LVT0 as NMI.
2651 *
2652 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2653 * is from Maciej W. Rozycki - so we do not have to EOI from
2654 * the NMI handler or the timer interrupt.
2655 */
2656 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2657
2658 enable_NMI_through_LVT0();
2659
2660 apic_printk(APIC_VERBOSE, " done.\n");
2661}
2662
2663/* 2644/*
2664 * This looks a bit hackish but it's about the only one way of sending 2645 * This looks a bit hackish but it's about the only one way of sending
2665 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2646 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2746,6 @@ static inline void __init check_timer(void)
2765 */ 2746 */
2766 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2747 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2767 legacy_pic->init(1); 2748 legacy_pic->init(1);
2768#ifdef CONFIG_X86_32
2769 {
2770 unsigned int ver;
2771
2772 ver = apic_read(APIC_LVR);
2773 ver = GET_APIC_VERSION(ver);
2774 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2775 }
2776#endif
2777 2749
2778 pin1 = find_isa_irq_pin(0, mp_INT); 2750 pin1 = find_isa_irq_pin(0, mp_INT);
2779 apic1 = find_isa_irq_apic(0, mp_INT); 2751 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2793,6 @@ static inline void __init check_timer(void)
2821 unmask_ioapic(cfg); 2793 unmask_ioapic(cfg);
2822 } 2794 }
2823 if (timer_irq_works()) { 2795 if (timer_irq_works()) {
2824 if (nmi_watchdog == NMI_IO_APIC) {
2825 setup_nmi();
2826 legacy_pic->unmask(0);
2827 }
2828 if (disable_timer_pin_1 > 0) 2796 if (disable_timer_pin_1 > 0)
2829 clear_IO_APIC_pin(0, pin1); 2797 clear_IO_APIC_pin(0, pin1);
2830 goto out; 2798 goto out;
@@ -2850,11 +2818,6 @@ static inline void __init check_timer(void)
2850 if (timer_irq_works()) { 2818 if (timer_irq_works()) {
2851 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2819 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2852 timer_through_8259 = 1; 2820 timer_through_8259 = 1;
2853 if (nmi_watchdog == NMI_IO_APIC) {
2854 legacy_pic->mask(0);
2855 setup_nmi();
2856 legacy_pic->unmask(0);
2857 }
2858 goto out; 2821 goto out;
2859 } 2822 }
2860 /* 2823 /*
@@ -2866,15 +2829,6 @@ static inline void __init check_timer(void)
2866 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2829 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2867 } 2830 }
2868 2831
2869 if (nmi_watchdog == NMI_IO_APIC) {
2870 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2871 "through the IO-APIC - disabling NMI Watchdog!\n");
2872 nmi_watchdog = NMI_NONE;
2873 }
2874#ifdef CONFIG_X86_32
2875 timer_ack = 0;
2876#endif
2877
2878 apic_printk(APIC_QUIET, KERN_INFO 2832 apic_printk(APIC_QUIET, KERN_INFO
2879 "...trying to set up timer as Virtual Wire IRQ...\n"); 2833 "...trying to set up timer as Virtual Wire IRQ...\n");
2880 2834
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..0a360d146596 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1391 pr_info("no hardware sampling interrupt available.\n");
1363} 1392}
1364 1393
1365void __init init_hw_perf_events(void) 1394int __init init_hw_perf_events(void)
1366{ 1395{
1367 struct event_constraint *c; 1396 struct event_constraint *c;
1368 int err; 1397 int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1406 err = amd_pmu_init();
1378 break; 1407 break;
1379 default: 1408 default:
1380 return; 1409 return 0;
1381 } 1410 }
1382 if (err != 0) { 1411 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1412 pr_cont("no PMU driver, software events only.\n");
1384 return; 1413 return 0;
1385 } 1414 }
1386 1415
1387 pmu_check_apic(); 1416 pmu_check_apic();
1388 1417
1389 /* sanity check that the hardware exists or is emulated */ 1418 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1419 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1420 return 0;
1392 return;
1393 }
1394 1421
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1422 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1423
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1465 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1466 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1467
1441 perf_pmu_register(&pmu); 1468 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1469 perf_cpu_notifier(x86_pmu_notifier);
1470
1471 return 0;
1443} 1472}
1473early_initcall(init_hw_perf_events);
1444 1474
1445static inline void x86_pmu_read(struct perf_event *event) 1475static inline void x86_pmu_read(struct perf_event *event)
1446{ 1476{
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1716
1687 perf_callchain_store(entry, regs->ip); 1717 perf_callchain_store(entry, regs->ip);
1688 1718
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1719 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1720}
1691 1721
1692#ifdef CONFIG_COMPAT 1722#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..24e390e40f2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <asm/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -210,7 +210,7 @@ void dump_stack(void)
210 init_utsname()->release, 210 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 211 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 212 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 213 show_trace(NULL, NULL, &stack);
214} 214}
215EXPORT_SYMBOL(dump_stack); 215EXPORT_SYMBOL(dump_stack);
216 216
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..96ed1aac543a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
91void show_regs(struct pt_regs *regs) 91void show_regs(struct pt_regs *regs)
92{ 92{
93 show_registers(regs); 93 show_registers(regs);
94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
95 regs->bp);
96} 95}
97 96
98void show_regs_common(void) 97void show_regs_common(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..68f61ac632e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1191#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1192 setup_ioapic_dest();
1198#endif 1193#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1194 mtrr_aps_init();
1201} 1195}
1202 1196
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1335 if (cpu == 0)
1342 return -EBUSY; 1336 return -EBUSY;
1343 1337
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1338 clear_local_APIC();
1347 1339
1348 cpu_disable_common(); 1340 cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..bb6f04167361 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87
86static inline void conditional_sti(struct pt_regs *regs) 88static inline void conditional_sti(struct pt_regs *regs)
87{ 89{
88 if (regs->flags & X86_EFLAGS_IF) 90 if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 302 die("general protection fault", regs, error_code);
301} 303}
302 304
305static int __init setup_unknown_nmi_panic(char *str)
306{
307 unknown_nmi_panic = 1;
308 return 1;
309}
310__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
311
303static notrace __kprobes void 312static notrace __kprobes void
304mem_parity_error(unsigned char reason, struct pt_regs *regs) 313mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 314{
@@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
371 reason, smp_processor_id()); 380 reason, smp_processor_id());
372 381
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 382 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 383 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 384 panic("NMI: Not continuing");
376 385
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 386 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 406 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP) 407 == NOTIFY_STOP)
399 return; 408 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
402 /*
403 * Ok, so this is none of the documented NMI sources,
404 * so it must be the NMI watchdog.
405 */
406 if (nmi_watchdog_tick(regs, reason))
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 409#endif
410 unknown_nmi_error(reason, regs);
414 411
415 return; 412 return;
416 } 413 }
@@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 443
447void stop_nmi(void) 444void stop_nmi(void)
448{ 445{
449 acpi_nmi_disable();
450 ignore_nmis++; 446 ignore_nmis++;
451} 447}
452 448
453void restart_nmi(void) 449void restart_nmi(void)
454{ 450{
455 ignore_nmis--; 451 ignore_nmis--;
456 acpi_nmi_enable();
457} 452}
458 453
459/* May run on IST stack. */ 454/* May run on IST stack. */
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
126 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
127 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
128 if (depth) 128 if (depth)
129 dump_trace(NULL, regs, (unsigned long *)stack, 0, 129 dump_trace(NULL, regs, (unsigned long *)stack,
130 &backtrace_ops, &depth); 130 &backtrace_ops, &depth);
131 return; 131 return;
132 } 132 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 4e8baad36d37..358c8b9c96a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
732 case 0x14: 732 case 0x14:
733 cpu_type = "x86-64/family14h"; 733 cpu_type = "x86-64/family14h";
734 break; 734 break;
735 case 0x15:
736 cpu_type = "x86-64/family15h";
737 break;
735 default: 738 default:
736 return -ENODEV; 739 return -ENODEV;
737 } 740 }
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..0636dd93cef8 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@ static void timer_stop(void)
58 58
59int __init op_nmi_timer_init(struct oprofile_operations *ops) 59int __init op_nmi_timer_init(struct oprofile_operations *ops)
60{ 60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start; 61 ops->start = timer_start;
65 ops->stop = timer_stop; 62 ops->stop = timer_stop;
66 ops->cpu_type = "timer"; 63 ops->cpu_type = "timer";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index a011bcc0f943..f2984d43a6b3 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,11 +29,12 @@
29#include "op_x86_model.h" 29#include "op_x86_model.h"
30#include "op_counter.h" 30#include "op_counter.h"
31 31
32#define NUM_COUNTERS 4 32#define NUM_COUNTERS 4
33#define NUM_COUNTERS_F15H 6
33#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 34#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
34#define NUM_VIRT_COUNTERS 32 35#define NUM_VIRT_COUNTERS 32
35#else 36#else
36#define NUM_VIRT_COUNTERS NUM_COUNTERS 37#define NUM_VIRT_COUNTERS 0
37#endif 38#endif
38 39
39#define OP_EVENT_MASK 0x0FFF 40#define OP_EVENT_MASK 0x0FFF
@@ -41,7 +42,8 @@
41 42
42#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) 43#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
43 44
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 45static int num_counters;
46static unsigned long reset_value[OP_MAX_COUNTER];
45 47
46#define IBS_FETCH_SIZE 6 48#define IBS_FETCH_SIZE 6
47#define IBS_OP_SIZE 12 49#define IBS_OP_SIZE 12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
387 int i; 389 int i;
388 390
389 /* enable active counters */ 391 /* enable active counters */
390 for (i = 0; i < NUM_COUNTERS; ++i) { 392 for (i = 0; i < num_counters; ++i) {
391 int virt = op_x86_phys_to_virt(i); 393 int virt = op_x86_phys_to_virt(i);
392 if (!reset_value[virt]) 394 if (!reset_value[virt])
393 continue; 395 continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
406{ 408{
407 int i; 409 int i;
408 410
409 for (i = 0; i < NUM_COUNTERS; ++i) { 411 for (i = 0; i < num_counters; ++i) {
410 if (!msrs->counters[i].addr) 412 if (!msrs->counters[i].addr)
411 continue; 413 continue;
412 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 414 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
418{ 420{
419 int i; 421 int i;
420 422
421 for (i = 0; i < NUM_COUNTERS; i++) { 423 for (i = 0; i < num_counters; i++) {
422 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 424 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
423 goto fail; 425 goto fail;
424 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { 426 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
426 goto fail; 428 goto fail;
427 } 429 }
428 /* both registers must be reserved */ 430 /* both registers must be reserved */
429 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 431 if (num_counters == NUM_COUNTERS_F15H) {
430 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 432 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
433 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
434 } else {
435 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
436 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
437 }
431 continue; 438 continue;
432 fail: 439 fail:
433 if (!counter_config[i].enabled) 440 if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
447 int i; 454 int i;
448 455
449 /* setup reset_value */ 456 /* setup reset_value */
450 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 457 for (i = 0; i < OP_MAX_COUNTER; ++i) {
451 if (counter_config[i].enabled 458 if (counter_config[i].enabled
452 && msrs->counters[op_x86_virt_to_phys(i)].addr) 459 && msrs->counters[op_x86_virt_to_phys(i)].addr)
453 reset_value[i] = counter_config[i].count; 460 reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
456 } 463 }
457 464
458 /* clear all counters */ 465 /* clear all counters */
459 for (i = 0; i < NUM_COUNTERS; ++i) { 466 for (i = 0; i < num_counters; ++i) {
460 if (!msrs->controls[i].addr) 467 if (!msrs->controls[i].addr)
461 continue; 468 continue;
462 rdmsrl(msrs->controls[i].addr, val); 469 rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
472 } 479 }
473 480
474 /* enable active counters */ 481 /* enable active counters */
475 for (i = 0; i < NUM_COUNTERS; ++i) { 482 for (i = 0; i < num_counters; ++i) {
476 int virt = op_x86_phys_to_virt(i); 483 int virt = op_x86_phys_to_virt(i);
477 if (!reset_value[virt]) 484 if (!reset_value[virt])
478 continue; 485 continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
503 u64 val; 510 u64 val;
504 int i; 511 int i;
505 512
506 for (i = 0; i < NUM_COUNTERS; ++i) { 513 for (i = 0; i < num_counters; ++i) {
507 int virt = op_x86_phys_to_virt(i); 514 int virt = op_x86_phys_to_virt(i);
508 if (!reset_value[virt]) 515 if (!reset_value[virt])
509 continue; 516 continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
526 u64 val; 533 u64 val;
527 int i; 534 int i;
528 535
529 for (i = 0; i < NUM_COUNTERS; ++i) { 536 for (i = 0; i < num_counters; ++i) {
530 if (!reset_value[op_x86_phys_to_virt(i)]) 537 if (!reset_value[op_x86_phys_to_virt(i)])
531 continue; 538 continue;
532 rdmsrl(msrs->controls[i].addr, val); 539 rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
546 * Subtle: stop on all counters to avoid race with setting our 553 * Subtle: stop on all counters to avoid race with setting our
547 * pm callback 554 * pm callback
548 */ 555 */
549 for (i = 0; i < NUM_COUNTERS; ++i) { 556 for (i = 0; i < num_counters; ++i) {
550 if (!reset_value[op_x86_phys_to_virt(i)]) 557 if (!reset_value[op_x86_phys_to_virt(i)])
551 continue; 558 continue;
552 rdmsrl(msrs->controls[i].addr, val); 559 rdmsrl(msrs->controls[i].addr, val);
@@ -698,18 +705,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
698 return 0; 705 return 0;
699} 706}
700 707
708struct op_x86_model_spec op_amd_spec;
709
701static int op_amd_init(struct oprofile_operations *ops) 710static int op_amd_init(struct oprofile_operations *ops)
702{ 711{
703 init_ibs(); 712 init_ibs();
704 create_arch_files = ops->create_files; 713 create_arch_files = ops->create_files;
705 ops->create_files = setup_ibs_files; 714 ops->create_files = setup_ibs_files;
715
716 if (boot_cpu_data.x86 == 0x15) {
717 num_counters = NUM_COUNTERS_F15H;
718 } else {
719 num_counters = NUM_COUNTERS;
720 }
721
722 op_amd_spec.num_counters = num_counters;
723 op_amd_spec.num_controls = num_counters;
724 op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
725
706 return 0; 726 return 0;
707} 727}
708 728
709struct op_x86_model_spec op_amd_spec = { 729struct op_x86_model_spec op_amd_spec = {
710 .num_counters = NUM_COUNTERS, 730 /* num_counters/num_controls filled in at runtime */
711 .num_controls = NUM_COUNTERS,
712 .num_virt_counters = NUM_VIRT_COUNTERS,
713 .reserved = MSR_AMD_EVENTSEL_RESERVED, 731 .reserved = MSR_AMD_EVENTSEL_RESERVED,
714 .event_mask = OP_EVENT_MASK, 732 .event_mask = OP_EVENT_MASK,
715 .init = op_amd_init, 733 .init = op_amd_init,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558dd5515..9fadec074142 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h> 13#include <linux/ptrace.h>
14#include <linux/nmi.h> 14#include <asm/nmi.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/apic.h> 17#include <asm/apic.h>
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 660a2728908d..0cac7ec0d2ec 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
577 * as possible (without an NMI being received in the middle of 577 * as possible (without an NMI being received in the middle of
578 * this) - so disable NMIs and initialize the device: 578 * this) - so disable NMIs and initialize the device:
579 */ 579 */
580 acpi_nmi_disable();
581 status = acpi_ns_evaluate(info); 580 status = acpi_ns_evaluate(info);
582 acpi_nmi_enable();
583 581
584 if (ACPI_SUCCESS(status)) { 582 if (ACPI_SUCCESS(status)) {
585 walk_info->num_INI++; 583 walk_info->num_INI++;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3d77116e4634..dea7b5bf6e2c 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -642,19 +642,14 @@ static struct notifier_block die_notifier = {
642 */ 642 */
643 643
644#ifdef CONFIG_HPWDT_NMI_DECODING 644#ifdef CONFIG_HPWDT_NMI_DECODING
645#ifdef ARCH_HAS_NMI_WATCHDOG 645#ifdef CONFIG_X86_LOCAL_APIC
646static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) 646static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
647{ 647{
648 /* 648 /*
649 * If nmi_watchdog is turned off then we can turn on 649 * If nmi_watchdog is turned off then we can turn on
650 * our nmi decoding capability. 650 * our nmi decoding capability.
651 */ 651 */
652 if (!nmi_watchdog_active()) 652 hpwdt_nmi_decoding = 1;
653 hpwdt_nmi_decoding = 1;
654 else
655 dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
656 "functionality you must reboot with nmi_watchdog=0 "
657 "and load the hpwdt driver with priority=1.\n");
658} 653}
659#else 654#else
660static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) 655static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
662 dev_warn(&dev->dev, "NMI decoding is disabled. " 657 dev_warn(&dev->dev, "NMI decoding is disabled. "
663 "Your kernel does not support a NMI Watchdog.\n"); 658 "Your kernel does not support a NMI Watchdog.\n");
664} 659}
665#endif /* ARCH_HAS_NMI_WATCHDOG */ 660#endif /* CONFIG_X86_LOCAL_APIC */
666 661
667static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) 662static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
668{ 663{
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8beabb958f61..47e3997f7b5c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -154,12 +154,14 @@ enum {
154 TRACE_EVENT_FL_ENABLED_BIT, 154 TRACE_EVENT_FL_ENABLED_BIT,
155 TRACE_EVENT_FL_FILTERED_BIT, 155 TRACE_EVENT_FL_FILTERED_BIT,
156 TRACE_EVENT_FL_RECORDED_CMD_BIT, 156 TRACE_EVENT_FL_RECORDED_CMD_BIT,
157 TRACE_EVENT_FL_CAP_ANY_BIT,
157}; 158};
158 159
159enum { 160enum {
160 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), 161 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
161 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 162 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
162 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), 163 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
164 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
163}; 165};
164 166
165struct ftrace_event_call { 167struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
196#endif 198#endif
197}; 199};
198 200
201#define __TRACE_EVENT_FLAGS(name, value) \
202 static int __init trace_init_flags_##name(void) \
203 { \
204 event_##name.flags = value; \
205 return 0; \
206 } \
207 early_initcall(trace_init_flags_##name);
208
199#define PERF_MAX_TRACE_SIZE 2048 209#define PERF_MAX_TRACE_SIZE 2048
200 210
201#define MAX_FILTER_PRED 32 211#define MAX_FILTER_PRED 32
@@ -215,6 +225,10 @@ enum {
215 FILTER_PTR_STRING, 225 FILTER_PTR_STRING,
216}; 226};
217 227
228#define EVENT_STORAGE_SIZE 128
229extern struct mutex event_storage_mutex;
230extern char event_storage[EVENT_STORAGE_SIZE];
231
218extern int trace_event_raw_init(struct ftrace_event_call *call); 232extern int trace_event_raw_init(struct ftrace_event_call *call);
219extern int trace_define_field(struct ftrace_event_call *call, const char *type, 233extern int trace_define_field(struct ftrace_event_call *call, const char *type,
220 const char *name, int offset, int size, 234 const char *name, int offset, int size,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e0070d..b78edb58ee66 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); 275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); 276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); 277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
278extern int arch_optimize_kprobe(struct optimized_kprobe *op); 278extern void arch_optimize_kprobes(struct list_head *oplist);
279extern void arch_unoptimize_kprobes(struct list_head *oplist,
280 struct list_head *done_list);
279extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); 281extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
280extern kprobe_opcode_t *get_optinsn_slot(void); 282extern kprobe_opcode_t *get_optinsn_slot(void);
281extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); 283extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5eee134..c536f8545f74 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,22 +14,14 @@
14 * may be used to reset the timeout - for code which intentionally 14 * may be used to reset the timeout - for code which intentionally
15 * disables interrupts for a long time. This call is stateless. 15 * disables interrupts for a long time. This call is stateless.
16 */ 16 */
17#ifdef ARCH_HAS_NMI_WATCHDOG 17#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
18#include <asm/nmi.h> 18#include <asm/nmi.h>
19extern void touch_nmi_watchdog(void); 19extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void);
22#else 20#else
23#ifndef CONFIG_HARDLOCKUP_DETECTOR
24static inline void touch_nmi_watchdog(void) 21static inline void touch_nmi_watchdog(void)
25{ 22{
26 touch_softlockup_watchdog(); 23 touch_softlockup_watchdog();
27} 24}
28#else
29extern void touch_nmi_watchdog(void);
30#endif
31static inline void acpi_nmi_disable(void) { }
32static inline void acpi_nmi_enable(void) { }
33#endif 25#endif
34 26
35/* 27/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f1279e105ee..dda5b0a3ff60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@ struct perf_event_attr {
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */ 217 mmap_data : 1, /* non-exec mmap data */
218 sample_id_all : 1, /* sample_type all events */
218 219
219 __reserved_1 : 46; 220 __reserved_1 : 45;
220 221
221 union { 222 union {
222 __u32 wakeup_events; /* wakeup every n events */ 223 __u32 wakeup_events; /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
327enum perf_event_type { 328enum perf_event_type {
328 329
329 /* 330 /*
331 * If perf_event_attr.sample_id_all is set then all event types will
332 * have the sample_type selected fields related to where/when
333 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
334 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
335 * the perf_event_header and the fields already present for the existing
336 * fields, i.e. at the end of the payload. That way a newer perf.data
337 * file will be supported by older perf tools, with these new optional
338 * fields being ignored.
339 *
330 * The MMAP events record the PROT_EXEC mappings so that we can 340 * The MMAP events record the PROT_EXEC mappings so that we can
331 * correlate userspace IPs to code. They have the following structure: 341 * correlate userspace IPs to code. They have the following structure:
332 * 342 *
@@ -578,6 +588,10 @@ struct perf_event;
578struct pmu { 588struct pmu {
579 struct list_head entry; 589 struct list_head entry;
580 590
591 struct device *dev;
592 char *name;
593 int type;
594
581 int * __percpu pmu_disable_count; 595 int * __percpu pmu_disable_count;
582 struct perf_cpu_context * __percpu pmu_cpu_context; 596 struct perf_cpu_context * __percpu pmu_cpu_context;
583 int task_ctx_nr; 597 int task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
758 u64 shadow_ctx_time; 772 u64 shadow_ctx_time;
759 773
760 struct perf_event_attr attr; 774 struct perf_event_attr attr;
775 u16 header_size;
776 u16 id_header_size;
777 u16 read_size;
761 struct hw_perf_event hw; 778 struct hw_perf_event hw;
762 779
763 struct perf_event_context *ctx; 780 struct perf_event_context *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
903 920
904#ifdef CONFIG_PERF_EVENTS 921#ifdef CONFIG_PERF_EVENTS
905 922
906extern int perf_pmu_register(struct pmu *pmu); 923extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
907extern void perf_pmu_unregister(struct pmu *pmu); 924extern void perf_pmu_unregister(struct pmu *pmu);
908 925
909extern int perf_num_counters(void); 926extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
970 struct perf_sample_data *data, 987 struct perf_sample_data *data,
971 struct pt_regs *regs); 988 struct pt_regs *regs);
972 989
990static inline bool is_sampling_event(struct perf_event *event)
991{
992 return event->attr.sample_period != 0;
993}
994
973/* 995/*
974 * Return 1 for a software event, 0 for a hardware event 996 * Return 1 for a software event, 0 for a hardware event
975 */ 997 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 223874538b33..a99d735db3df 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
316 size_t *lenp, loff_t *ppos); 316 size_t *lenp, loff_t *ppos);
317extern unsigned int softlockup_panic; 317extern unsigned int softlockup_panic;
318extern int softlockup_thresh; 318extern int softlockup_thresh;
319void lockup_detector_init(void);
319#else 320#else
320static inline void touch_softlockup_watchdog(void) 321static inline void touch_softlockup_watchdog(void)
321{ 322{
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
326static inline void touch_all_softlockup_watchdogs(void) 327static inline void touch_all_softlockup_watchdogs(void)
327{ 328{
328} 329}
330static inline void lockup_detector_init(void)
331{
332}
329#endif 333#endif
330 334
331#ifdef CONFIG_DETECT_HUNG_TASK 335#ifdef CONFIG_DETECT_HUNG_TASK
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 51efbef38fb0..25310f1d7f37 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -2,6 +2,7 @@
2#define __LINUX_STACKTRACE_H 2#define __LINUX_STACKTRACE_H
3 3
4struct task_struct; 4struct task_struct;
5struct pt_regs;
5 6
6#ifdef CONFIG_STACKTRACE 7#ifdef CONFIG_STACKTRACE
7struct task_struct; 8struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
13}; 14};
14 15
15extern void save_stack_trace(struct stack_trace *trace); 16extern void save_stack_trace(struct stack_trace *trace);
16extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); 17extern void save_stack_trace_regs(struct stack_trace *trace,
18 struct pt_regs *regs);
17extern void save_stack_trace_tsk(struct task_struct *tsk, 19extern void save_stack_trace_tsk(struct task_struct *tsk,
18 struct stack_trace *trace); 20 struct stack_trace *trace);
19 21
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cacc27a0e285..18cd0684fc4e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
127#define SYSCALL_TRACE_ENTER_EVENT(sname) \ 127#define SYSCALL_TRACE_ENTER_EVENT(sname) \
128 static struct syscall_metadata \ 128 static struct syscall_metadata \
129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
130 static struct ftrace_event_call \
131 __attribute__((__aligned__(4))) event_enter_##sname; \
132 static struct ftrace_event_call __used \ 130 static struct ftrace_event_call __used \
133 __attribute__((__aligned__(4))) \ 131 __attribute__((__aligned__(4))) \
134 __attribute__((section("_ftrace_events"))) \ 132 __attribute__((section("_ftrace_events"))) \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
137 .class = &event_class_syscall_enter, \ 135 .class = &event_class_syscall_enter, \
138 .event.funcs = &enter_syscall_print_funcs, \ 136 .event.funcs = &enter_syscall_print_funcs, \
139 .data = (void *)&__syscall_meta_##sname,\ 137 .data = (void *)&__syscall_meta_##sname,\
140 } 138 }; \
139 __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
141 140
142#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 141#define SYSCALL_TRACE_EXIT_EVENT(sname) \
143 static struct syscall_metadata \ 142 static struct syscall_metadata \
144 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 143 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
145 static struct ftrace_event_call \
146 __attribute__((__aligned__(4))) event_exit_##sname; \
147 static struct ftrace_event_call __used \ 144 static struct ftrace_event_call __used \
148 __attribute__((__aligned__(4))) \ 145 __attribute__((__aligned__(4))) \
149 __attribute__((section("_ftrace_events"))) \ 146 __attribute__((section("_ftrace_events"))) \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
152 .class = &event_class_syscall_exit, \ 149 .class = &event_class_syscall_exit, \
153 .event.funcs = &exit_syscall_print_funcs, \ 150 .event.funcs = &exit_syscall_print_funcs, \
154 .data = (void *)&__syscall_meta_##sname,\ 151 .data = (void *)&__syscall_meta_##sname,\
155 } 152 }; \
153 __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
156 154
157#define SYSCALL_METADATA(sname, nb) \ 155#define SYSCALL_METADATA(sname, nb) \
158 SYSCALL_TRACE_ENTER_EVENT(sname); \ 156 SYSCALL_TRACE_ENTER_EVENT(sname); \
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a4a90b6726ce..d3e4f87e95c0 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
106 106
107#define TP_PROTO(args...) args 107#define TP_PROTO(args...) args
108#define TP_ARGS(args...) args 108#define TP_ARGS(args...) args
109#define TP_CONDITION(args...) args
109 110
110#ifdef CONFIG_TRACEPOINTS 111#ifdef CONFIG_TRACEPOINTS
111 112
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
119 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just 120 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
120 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". 121 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
121 */ 122 */
122#define __DO_TRACE(tp, proto, args) \ 123#define __DO_TRACE(tp, proto, args, cond) \
123 do { \ 124 do { \
124 struct tracepoint_func *it_func_ptr; \ 125 struct tracepoint_func *it_func_ptr; \
125 void *it_func; \ 126 void *it_func; \
126 void *__data; \ 127 void *__data; \
127 \ 128 \
129 if (!(cond)) \
130 return; \
128 rcu_read_lock_sched_notrace(); \ 131 rcu_read_lock_sched_notrace(); \
129 it_func_ptr = rcu_dereference_sched((tp)->funcs); \ 132 it_func_ptr = rcu_dereference_sched((tp)->funcs); \
130 if (it_func_ptr) { \ 133 if (it_func_ptr) { \
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
142 * not add unwanted padding between the beginning of the section and the 145 * not add unwanted padding between the beginning of the section and the
143 * structure. Force alignment to the same alignment as the section start. 146 * structure. Force alignment to the same alignment as the section start.
144 */ 147 */
145#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ 148#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
146 extern struct tracepoint __tracepoint_##name; \ 149 extern struct tracepoint __tracepoint_##name; \
147 static inline void trace_##name(proto) \ 150 static inline void trace_##name(proto) \
148 { \ 151 { \
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
151do_trace: \ 154do_trace: \
152 __DO_TRACE(&__tracepoint_##name, \ 155 __DO_TRACE(&__tracepoint_##name, \
153 TP_PROTO(data_proto), \ 156 TP_PROTO(data_proto), \
154 TP_ARGS(data_args)); \ 157 TP_ARGS(data_args), \
158 TP_CONDITION(cond)); \
155 } \ 159 } \
156 static inline int \ 160 static inline int \
157 register_trace_##name(void (*probe)(data_proto), void *data) \ 161 register_trace_##name(void (*probe)(data_proto), void *data) \
@@ -186,7 +190,7 @@ do_trace: \
186 EXPORT_SYMBOL(__tracepoint_##name) 190 EXPORT_SYMBOL(__tracepoint_##name)
187 191
188#else /* !CONFIG_TRACEPOINTS */ 192#else /* !CONFIG_TRACEPOINTS */
189#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ 193#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
190 static inline void trace_##name(proto) \ 194 static inline void trace_##name(proto) \
191 { } \ 195 { } \
192 static inline int \ 196 static inline int \
@@ -227,13 +231,20 @@ do_trace: \
227 * "void *__data, proto" as the callback prototype. 231 * "void *__data, proto" as the callback prototype.
228 */ 232 */
229#define DECLARE_TRACE_NOARGS(name) \ 233#define DECLARE_TRACE_NOARGS(name) \
230 __DECLARE_TRACE(name, void, , void *__data, __data) 234 __DECLARE_TRACE(name, void, , 1, void *__data, __data)
231 235
232#define DECLARE_TRACE(name, proto, args) \ 236#define DECLARE_TRACE(name, proto, args) \
233 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ 237 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \
234 PARAMS(void *__data, proto), \ 238 PARAMS(void *__data, proto), \
235 PARAMS(__data, args)) 239 PARAMS(__data, args))
236 240
241#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \
242 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
243 PARAMS(void *__data, proto), \
244 PARAMS(__data, args))
245
246#define TRACE_EVENT_FLAGS(event, flag)
247
237#endif /* DECLARE_TRACE */ 248#endif /* DECLARE_TRACE */
238 249
239#ifndef TRACE_EVENT 250#ifndef TRACE_EVENT
@@ -347,11 +358,21 @@ do_trace: \
347 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 358 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
348#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 359#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
349 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 360 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
361#define DEFINE_EVENT_CONDITION(template, name, proto, \
362 args, cond) \
363 DECLARE_TRACE_CONDITION(name, PARAMS(proto), \
364 PARAMS(args), PARAMS(cond))
350 365
351#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 366#define TRACE_EVENT(name, proto, args, struct, assign, print) \
352 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 367 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
353#define TRACE_EVENT_FN(name, proto, args, struct, \ 368#define TRACE_EVENT_FN(name, proto, args, struct, \
354 assign, print, reg, unreg) \ 369 assign, print, reg, unreg) \
355 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 370 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
371#define TRACE_EVENT_CONDITION(name, proto, args, cond, \
372 struct, assign, print) \
373 DECLARE_TRACE_CONDITION(name, PARAMS(proto), \
374 PARAMS(args), PARAMS(cond))
375
376#define TRACE_EVENT_FLAGS(event, flag)
356 377
357#endif /* ifdef TRACE_EVENT (see note above) */ 378#endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 1dfab5401511..b0b4eb24d592 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,6 +26,15 @@
26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ 26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
27 DEFINE_TRACE(name) 27 DEFINE_TRACE(name)
28 28
29#undef TRACE_EVENT_CONDITION
30#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
31 TRACE_EVENT(name, \
32 PARAMS(proto), \
33 PARAMS(args), \
34 PARAMS(tstruct), \
35 PARAMS(assign), \
36 PARAMS(print))
37
29#undef TRACE_EVENT_FN 38#undef TRACE_EVENT_FN
30#define TRACE_EVENT_FN(name, proto, args, tstruct, \ 39#define TRACE_EVENT_FN(name, proto, args, tstruct, \
31 assign, print, reg, unreg) \ 40 assign, print, reg, unreg) \
@@ -39,6 +48,10 @@
39#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 48#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
40 DEFINE_TRACE(name) 49 DEFINE_TRACE(name)
41 50
51#undef DEFINE_EVENT_CONDITION
52#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
53 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
54
42#undef DECLARE_TRACE 55#undef DECLARE_TRACE
43#define DECLARE_TRACE(name, proto, args) \ 56#define DECLARE_TRACE(name, proto, args) \
44 DEFINE_TRACE(name) 57 DEFINE_TRACE(name)
@@ -75,9 +88,11 @@
75 88
76#undef TRACE_EVENT 89#undef TRACE_EVENT
77#undef TRACE_EVENT_FN 90#undef TRACE_EVENT_FN
91#undef TRACE_EVENT_CONDITION
78#undef DECLARE_EVENT_CLASS 92#undef DECLARE_EVENT_CLASS
79#undef DEFINE_EVENT 93#undef DEFINE_EVENT
80#undef DEFINE_EVENT_PRINT 94#undef DEFINE_EVENT_PRINT
95#undef DEFINE_EVENT_CONDITION
81#undef TRACE_HEADER_MULTI_READ 96#undef TRACE_HEADER_MULTI_READ
82#undef DECLARE_TRACE 97#undef DECLARE_TRACE
83 98
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index fb726ac7caee..5a4c04a75b3d 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
40 syscall_regfunc, syscall_unregfunc 40 syscall_regfunc, syscall_unregfunc
41); 41);
42 42
43TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
44
43TRACE_EVENT_FN(sys_exit, 45TRACE_EVENT_FN(sys_exit,
44 46
45 TP_PROTO(struct pt_regs *regs, long ret), 47 TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
62 syscall_regfunc, syscall_unregfunc 64 syscall_regfunc, syscall_unregfunc
63); 65);
64 66
67TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
68
65#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ 69#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
66 70
67#endif /* _TRACE_EVENTS_SYSCALLS_H */ 71#endif /* _TRACE_EVENTS_SYSCALLS_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a9377c0083ad..e16610c208c9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -82,6 +82,10 @@
82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ 82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ 83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
84 84
85#undef TRACE_EVENT_FLAGS
86#define TRACE_EVENT_FLAGS(name, value) \
87 __TRACE_EVENT_FLAGS(name, value)
88
85#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 89#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
86 90
87 91
@@ -129,6 +133,9 @@
129#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 133#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
130 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 134 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
131 135
136#undef TRACE_EVENT_FLAGS
137#define TRACE_EVENT_FLAGS(event, flag)
138
132#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 139#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
133 140
134/* 141/*
@@ -289,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \
289 296
290#undef __array 297#undef __array
291#define __array(type, item, len) \ 298#define __array(type, item, len) \
292 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 299 do { \
293 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 300 mutex_lock(&event_storage_mutex); \
301 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
302 snprintf(event_storage, sizeof(event_storage), \
303 "%s[%d]", #type, len); \
304 ret = trace_define_field(event_call, event_storage, #item, \
294 offsetof(typeof(field), item), \ 305 offsetof(typeof(field), item), \
295 sizeof(field.item), \ 306 sizeof(field.item), \
296 is_signed_type(type), FILTER_OTHER); \ 307 is_signed_type(type), FILTER_OTHER); \
297 if (ret) \ 308 mutex_unlock(&event_storage_mutex); \
298 return ret; 309 if (ret) \
310 return ret; \
311 } while (0);
299 312
300#undef __dynamic_array 313#undef __dynamic_array
301#define __dynamic_array(type, item, len) \ 314#define __dynamic_array(type, item, len) \
diff --git a/init/main.c b/init/main.c
index 8646401f7a0e..ea51770c0170 100644
--- a/init/main.c
+++ b/init/main.c
@@ -67,6 +67,7 @@
67#include <linux/sfi.h> 67#include <linux/sfi.h>
68#include <linux/shmem_fs.h> 68#include <linux/shmem_fs.h>
69#include <linux/slab.h> 69#include <linux/slab.h>
70#include <linux/perf_event.h>
70 71
71#include <asm/io.h> 72#include <asm/io.h>
72#include <asm/bugs.h> 73#include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
603 "enabled *very* early, fixing it\n"); 604 "enabled *very* early, fixing it\n");
604 local_irq_disable(); 605 local_irq_disable();
605 } 606 }
607 idr_init_cache();
608 perf_event_init();
606 rcu_init(); 609 rcu_init();
607 radix_tree_init(); 610 radix_tree_init();
608 /* init some links before init_ISA_irqs() */ 611 /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
658 enable_debug_pagealloc(); 661 enable_debug_pagealloc();
659 kmemleak_init(); 662 kmemleak_init();
660 debug_objects_mem_init(); 663 debug_objects_mem_init();
661 idr_init_cache();
662 setup_per_cpu_pageset(); 664 setup_per_cpu_pageset();
663 numa_policy_init(); 665 numa_policy_init();
664 if (late_time_init) 666 if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
882 smp_prepare_cpus(setup_max_cpus); 884 smp_prepare_cpus(setup_max_cpus);
883 885
884 do_pre_smp_initcalls(); 886 do_pre_smp_initcalls();
887 lockup_detector_init();
885 888
886 smp_init(); 889 smp_init();
887 sched_init_smp(); 890 sched_init_smp();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e5325825aeb6..086adf25a55e 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
641 641
642 constraints_initialized = 1; 642 constraints_initialized = 1;
643 643
644 perf_pmu_register(&perf_breakpoint); 644 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
645 645
646 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
647 647
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76e106f..7663e5df0e6f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
354 return p->pre_handler == aggr_pre_handler; 354 return p->pre_handler == aggr_pre_handler;
355} 355}
356 356
357/* Return true(!0) if the kprobe is unused */
358static inline int kprobe_unused(struct kprobe *p)
359{
360 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
361 list_empty(&p->list);
362}
363
357/* 364/*
358 * Keep all fields in the kprobe consistent 365 * Keep all fields in the kprobe consistent
359 */ 366 */
360static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 367static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
361{ 368{
362 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 369 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
363 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 370 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
364} 371}
365 372
366#ifdef CONFIG_OPTPROBES 373#ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
384 } 391 }
385} 392}
386 393
394/* Free optimized instructions and optimized_kprobe */
395static __kprobes void free_aggr_kprobe(struct kprobe *p)
396{
397 struct optimized_kprobe *op;
398
399 op = container_of(p, struct optimized_kprobe, kp);
400 arch_remove_optimized_kprobe(op);
401 arch_remove_kprobe(p);
402 kfree(op);
403}
404
387/* Return true(!0) if the kprobe is ready for optimization. */ 405/* Return true(!0) if the kprobe is ready for optimization. */
388static inline int kprobe_optready(struct kprobe *p) 406static inline int kprobe_optready(struct kprobe *p)
389{ 407{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
397 return 0; 415 return 0;
398} 416}
399 417
418/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
419static inline int kprobe_disarmed(struct kprobe *p)
420{
421 struct optimized_kprobe *op;
422
423 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
424 if (!kprobe_aggrprobe(p))
425 return kprobe_disabled(p);
426
427 op = container_of(p, struct optimized_kprobe, kp);
428
429 return kprobe_disabled(p) && list_empty(&op->list);
430}
431
432/* Return true(!0) if the probe is queued on (un)optimizing lists */
433static int __kprobes kprobe_queued(struct kprobe *p)
434{
435 struct optimized_kprobe *op;
436
437 if (kprobe_aggrprobe(p)) {
438 op = container_of(p, struct optimized_kprobe, kp);
439 if (!list_empty(&op->list))
440 return 1;
441 }
442 return 0;
443}
444
400/* 445/*
401 * Return an optimized kprobe whose optimizing code replaces 446 * Return an optimized kprobe whose optimizing code replaces
402 * instructions including addr (exclude breakpoint). 447 * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
422 467
423/* Optimization staging list, protected by kprobe_mutex */ 468/* Optimization staging list, protected by kprobe_mutex */
424static LIST_HEAD(optimizing_list); 469static LIST_HEAD(optimizing_list);
470static LIST_HEAD(unoptimizing_list);
425 471
426static void kprobe_optimizer(struct work_struct *work); 472static void kprobe_optimizer(struct work_struct *work);
427static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 473static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
474static DECLARE_COMPLETION(optimizer_comp);
428#define OPTIMIZE_DELAY 5 475#define OPTIMIZE_DELAY 5
429 476
430/* Kprobe jump optimizer */ 477/*
431static __kprobes void kprobe_optimizer(struct work_struct *work) 478 * Optimize (replace a breakpoint with a jump) kprobes listed on
479 * optimizing_list.
480 */
481static __kprobes void do_optimize_kprobes(void)
432{ 482{
433 struct optimized_kprobe *op, *tmp; 483 /* Optimization never be done when disarmed */
434 484 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
435 /* Lock modules while optimizing kprobes */ 485 list_empty(&optimizing_list))
436 mutex_lock(&module_mutex); 486 return;
437 mutex_lock(&kprobe_mutex);
438 if (kprobes_all_disarmed || !kprobes_allow_optimization)
439 goto end;
440
441 /*
442 * Wait for quiesence period to ensure all running interrupts
443 * are done. Because optprobe may modify multiple instructions
444 * there is a chance that Nth instruction is interrupted. In that
445 * case, running interrupt can return to 2nd-Nth byte of jump
446 * instruction. This wait is for avoiding it.
447 */
448 synchronize_sched();
449 487
450 /* 488 /*
451 * The optimization/unoptimization refers online_cpus via 489 * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
459 */ 497 */
460 get_online_cpus(); 498 get_online_cpus();
461 mutex_lock(&text_mutex); 499 mutex_lock(&text_mutex);
462 list_for_each_entry_safe(op, tmp, &optimizing_list, list) { 500 arch_optimize_kprobes(&optimizing_list);
463 WARN_ON(kprobe_disabled(&op->kp)); 501 mutex_unlock(&text_mutex);
464 if (arch_optimize_kprobe(op) < 0) 502 put_online_cpus();
465 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 503}
466 list_del_init(&op->list); 504
505/*
506 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
507 * if need) kprobes listed on unoptimizing_list.
508 */
509static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
510{
511 struct optimized_kprobe *op, *tmp;
512
513 /* Unoptimization must be done anytime */
514 if (list_empty(&unoptimizing_list))
515 return;
516
517 /* Ditto to do_optimize_kprobes */
518 get_online_cpus();
519 mutex_lock(&text_mutex);
520 arch_unoptimize_kprobes(&unoptimizing_list, free_list);
521 /* Loop free_list for disarming */
522 list_for_each_entry_safe(op, tmp, free_list, list) {
523 /* Disarm probes if marked disabled */
524 if (kprobe_disabled(&op->kp))
525 arch_disarm_kprobe(&op->kp);
526 if (kprobe_unused(&op->kp)) {
527 /*
528 * Remove unused probes from hash list. After waiting
529 * for synchronization, these probes are reclaimed.
530 * (reclaiming is done by do_free_cleaned_kprobes.)
531 */
532 hlist_del_rcu(&op->kp.hlist);
533 } else
534 list_del_init(&op->list);
467 } 535 }
468 mutex_unlock(&text_mutex); 536 mutex_unlock(&text_mutex);
469 put_online_cpus(); 537 put_online_cpus();
470end: 538}
539
540/* Reclaim all kprobes on the free_list */
541static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
542{
543 struct optimized_kprobe *op, *tmp;
544
545 list_for_each_entry_safe(op, tmp, free_list, list) {
546 BUG_ON(!kprobe_unused(&op->kp));
547 list_del_init(&op->list);
548 free_aggr_kprobe(&op->kp);
549 }
550}
551
552/* Start optimizer after OPTIMIZE_DELAY passed */
553static __kprobes void kick_kprobe_optimizer(void)
554{
555 if (!delayed_work_pending(&optimizing_work))
556 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
557}
558
559/* Kprobe jump optimizer */
560static __kprobes void kprobe_optimizer(struct work_struct *work)
561{
562 LIST_HEAD(free_list);
563
564 /* Lock modules while optimizing kprobes */
565 mutex_lock(&module_mutex);
566 mutex_lock(&kprobe_mutex);
567
568 /*
569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
570 * kprobes before waiting for quiesence period.
571 */
572 do_unoptimize_kprobes(&free_list);
573
574 /*
575 * Step 2: Wait for quiesence period to ensure all running interrupts
576 * are done. Because optprobe may modify multiple instructions
577 * there is a chance that Nth instruction is interrupted. In that
578 * case, running interrupt can return to 2nd-Nth byte of jump
579 * instruction. This wait is for avoiding it.
580 */
581 synchronize_sched();
582
583 /* Step 3: Optimize kprobes after quiesence period */
584 do_optimize_kprobes();
585
586 /* Step 4: Free cleaned kprobes after quiesence period */
587 do_free_cleaned_kprobes(&free_list);
588
471 mutex_unlock(&kprobe_mutex); 589 mutex_unlock(&kprobe_mutex);
472 mutex_unlock(&module_mutex); 590 mutex_unlock(&module_mutex);
591
592 /* Step 5: Kick optimizer again if needed */
593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
594 kick_kprobe_optimizer();
595 else
596 /* Wake up all waiters */
597 complete_all(&optimizer_comp);
598}
599
600/* Wait for completing optimization and unoptimization */
601static __kprobes void wait_for_kprobe_optimizer(void)
602{
603 if (delayed_work_pending(&optimizing_work))
604 wait_for_completion(&optimizer_comp);
473} 605}
474 606
475/* Optimize kprobe if p is ready to be optimized */ 607/* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
495 /* Check if it is already optimized. */ 627 /* Check if it is already optimized. */
496 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) 628 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
497 return; 629 return;
498
499 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 630 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
500 list_add(&op->list, &optimizing_list); 631
501 if (!delayed_work_pending(&optimizing_work)) 632 if (!list_empty(&op->list))
502 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 633 /* This is under unoptimizing. Just dequeue the probe */
634 list_del_init(&op->list);
635 else {
636 list_add(&op->list, &optimizing_list);
637 kick_kprobe_optimizer();
638 }
639}
640
641/* Short cut to direct unoptimizing */
642static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
643{
644 get_online_cpus();
645 arch_unoptimize_kprobe(op);
646 put_online_cpus();
647 if (kprobe_disabled(&op->kp))
648 arch_disarm_kprobe(&op->kp);
503} 649}
504 650
505/* Unoptimize a kprobe if p is optimized */ 651/* Unoptimize a kprobe if p is optimized */
506static __kprobes void unoptimize_kprobe(struct kprobe *p) 652static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
507{ 653{
508 struct optimized_kprobe *op; 654 struct optimized_kprobe *op;
509 655
510 if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { 656 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
511 op = container_of(p, struct optimized_kprobe, kp); 657 return; /* This is not an optprobe nor optimized */
512 if (!list_empty(&op->list)) 658
513 /* Dequeue from the optimization queue */ 659 op = container_of(p, struct optimized_kprobe, kp);
660 if (!kprobe_optimized(p)) {
661 /* Unoptimized or unoptimizing case */
662 if (force && !list_empty(&op->list)) {
663 /*
664 * Only if this is unoptimizing kprobe and forced,
665 * forcibly unoptimize it. (No need to unoptimize
666 * unoptimized kprobe again :)
667 */
514 list_del_init(&op->list); 668 list_del_init(&op->list);
515 else 669 force_unoptimize_kprobe(op);
516 /* Replace jump with break */ 670 }
517 arch_unoptimize_kprobe(op); 671 return;
518 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 672 }
673
674 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
675 if (!list_empty(&op->list)) {
676 /* Dequeue from the optimization queue */
677 list_del_init(&op->list);
678 return;
679 }
680 /* Optimized kprobe case */
681 if (force)
682 /* Forcibly update the code: this is a special case */
683 force_unoptimize_kprobe(op);
684 else {
685 list_add(&op->list, &unoptimizing_list);
686 kick_kprobe_optimizer();
519 } 687 }
520} 688}
521 689
690/* Cancel unoptimizing for reusing */
691static void reuse_unused_kprobe(struct kprobe *ap)
692{
693 struct optimized_kprobe *op;
694
695 BUG_ON(!kprobe_unused(ap));
696 /*
697 * Unused kprobe MUST be on the way of delayed unoptimizing (means
698 * there is still a relative jump) and disabled.
699 */
700 op = container_of(ap, struct optimized_kprobe, kp);
701 if (unlikely(list_empty(&op->list)))
702 printk(KERN_WARNING "Warning: found a stray unused "
703 "aggrprobe@%p\n", ap->addr);
704 /* Enable the probe again */
705 ap->flags &= ~KPROBE_FLAG_DISABLED;
706 /* Optimize it again (remove from op->list) */
707 BUG_ON(!kprobe_optready(ap));
708 optimize_kprobe(ap);
709}
710
522/* Remove optimized instructions */ 711/* Remove optimized instructions */
523static void __kprobes kill_optimized_kprobe(struct kprobe *p) 712static void __kprobes kill_optimized_kprobe(struct kprobe *p)
524{ 713{
525 struct optimized_kprobe *op; 714 struct optimized_kprobe *op;
526 715
527 op = container_of(p, struct optimized_kprobe, kp); 716 op = container_of(p, struct optimized_kprobe, kp);
528 if (!list_empty(&op->list)) { 717 if (!list_empty(&op->list))
529 /* Dequeue from the optimization queue */ 718 /* Dequeue from the (un)optimization queue */
530 list_del_init(&op->list); 719 list_del_init(&op->list);
531 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 720
532 } 721 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
533 /* Don't unoptimize, because the target code will be freed. */ 722 /* Don't touch the code, because it is already freed. */
534 arch_remove_optimized_kprobe(op); 723 arch_remove_optimized_kprobe(op);
535} 724}
536 725
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
543 arch_prepare_optimized_kprobe(op); 732 arch_prepare_optimized_kprobe(op);
544} 733}
545 734
546/* Free optimized instructions and optimized_kprobe */
547static __kprobes void free_aggr_kprobe(struct kprobe *p)
548{
549 struct optimized_kprobe *op;
550
551 op = container_of(p, struct optimized_kprobe, kp);
552 arch_remove_optimized_kprobe(op);
553 kfree(op);
554}
555
556/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 735/* Allocate new optimized_kprobe and try to prepare optimized instructions */
557static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 736static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
558{ 737{
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
587 op = container_of(ap, struct optimized_kprobe, kp); 766 op = container_of(ap, struct optimized_kprobe, kp);
588 if (!arch_prepared_optinsn(&op->optinsn)) { 767 if (!arch_prepared_optinsn(&op->optinsn)) {
589 /* If failed to setup optimizing, fallback to kprobe */ 768 /* If failed to setup optimizing, fallback to kprobe */
590 free_aggr_kprobe(ap); 769 arch_remove_optimized_kprobe(op);
770 kfree(op);
591 return; 771 return;
592 } 772 }
593 773
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
631 return; 811 return;
632 812
633 kprobes_allow_optimization = false; 813 kprobes_allow_optimization = false;
634 printk(KERN_INFO "Kprobes globally unoptimized\n");
635 get_online_cpus(); /* For avoiding text_mutex deadlock */
636 mutex_lock(&text_mutex);
637 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 814 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
638 head = &kprobe_table[i]; 815 head = &kprobe_table[i];
639 hlist_for_each_entry_rcu(p, node, head, hlist) { 816 hlist_for_each_entry_rcu(p, node, head, hlist) {
640 if (!kprobe_disabled(p)) 817 if (!kprobe_disabled(p))
641 unoptimize_kprobe(p); 818 unoptimize_kprobe(p, false);
642 } 819 }
643 } 820 }
644 821 /* Wait for unoptimizing completion */
645 mutex_unlock(&text_mutex); 822 wait_for_kprobe_optimizer();
646 put_online_cpus(); 823 printk(KERN_INFO "Kprobes globally unoptimized\n");
647 /* Allow all currently running kprobes to complete */
648 synchronize_sched();
649} 824}
650 825
651int sysctl_kprobes_optimization; 826int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
669} 844}
670#endif /* CONFIG_SYSCTL */ 845#endif /* CONFIG_SYSCTL */
671 846
847/* Put a breakpoint for a probe. Must be called with text_mutex locked */
672static void __kprobes __arm_kprobe(struct kprobe *p) 848static void __kprobes __arm_kprobe(struct kprobe *p)
673{ 849{
674 struct kprobe *old_p; 850 struct kprobe *_p;
675 851
676 /* Check collision with other optimized kprobes */ 852 /* Check collision with other optimized kprobes */
677 old_p = get_optimized_kprobe((unsigned long)p->addr); 853 _p = get_optimized_kprobe((unsigned long)p->addr);
678 if (unlikely(old_p)) 854 if (unlikely(_p))
679 unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ 855 /* Fallback to unoptimized kprobe */
856 unoptimize_kprobe(_p, true);
680 857
681 arch_arm_kprobe(p); 858 arch_arm_kprobe(p);
682 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 859 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
683} 860}
684 861
685static void __kprobes __disarm_kprobe(struct kprobe *p) 862/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
863static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
686{ 864{
687 struct kprobe *old_p; 865 struct kprobe *_p;
688 866
689 unoptimize_kprobe(p); /* Try to unoptimize */ 867 unoptimize_kprobe(p, false); /* Try to unoptimize */
690 arch_disarm_kprobe(p);
691 868
692 /* If another kprobe was blocked, optimize it. */ 869 if (!kprobe_queued(p)) {
693 old_p = get_optimized_kprobe((unsigned long)p->addr); 870 arch_disarm_kprobe(p);
694 if (unlikely(old_p)) 871 /* If another kprobe was blocked, optimize it. */
695 optimize_kprobe(old_p); 872 _p = get_optimized_kprobe((unsigned long)p->addr);
873 if (unlikely(_p) && reopt)
874 optimize_kprobe(_p);
875 }
876 /* TODO: reoptimize others after unoptimized this probe */
696} 877}
697 878
698#else /* !CONFIG_OPTPROBES */ 879#else /* !CONFIG_OPTPROBES */
699 880
700#define optimize_kprobe(p) do {} while (0) 881#define optimize_kprobe(p) do {} while (0)
701#define unoptimize_kprobe(p) do {} while (0) 882#define unoptimize_kprobe(p, f) do {} while (0)
702#define kill_optimized_kprobe(p) do {} while (0) 883#define kill_optimized_kprobe(p) do {} while (0)
703#define prepare_optimized_kprobe(p) do {} while (0) 884#define prepare_optimized_kprobe(p) do {} while (0)
704#define try_to_optimize_kprobe(p) do {} while (0) 885#define try_to_optimize_kprobe(p) do {} while (0)
705#define __arm_kprobe(p) arch_arm_kprobe(p) 886#define __arm_kprobe(p) arch_arm_kprobe(p)
706#define __disarm_kprobe(p) arch_disarm_kprobe(p) 887#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
888#define kprobe_disarmed(p) kprobe_disabled(p)
889#define wait_for_kprobe_optimizer() do {} while (0)
890
891/* There should be no unused kprobes can be reused without optimization */
892static void reuse_unused_kprobe(struct kprobe *ap)
893{
894 printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
895 BUG_ON(kprobe_unused(ap));
896}
707 897
708static __kprobes void free_aggr_kprobe(struct kprobe *p) 898static __kprobes void free_aggr_kprobe(struct kprobe *p)
709{ 899{
900 arch_remove_kprobe(p);
710 kfree(p); 901 kfree(p);
711} 902}
712 903
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
732/* Disarm a kprobe with text_mutex */ 923/* Disarm a kprobe with text_mutex */
733static void __kprobes disarm_kprobe(struct kprobe *kp) 924static void __kprobes disarm_kprobe(struct kprobe *kp)
734{ 925{
735 get_online_cpus(); /* For avoiding text_mutex deadlock */ 926 /* Ditto */
736 mutex_lock(&text_mutex); 927 mutex_lock(&text_mutex);
737 __disarm_kprobe(kp); 928 __disarm_kprobe(kp, true);
738 mutex_unlock(&text_mutex); 929 mutex_unlock(&text_mutex);
739 put_online_cpus();
740} 930}
741 931
742/* 932/*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
942 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1132 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
943 1133
944 if (p->break_handler || p->post_handler) 1134 if (p->break_handler || p->post_handler)
945 unoptimize_kprobe(ap); /* Fall back to normal kprobe */ 1135 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
946 1136
947 if (p->break_handler) { 1137 if (p->break_handler) {
948 if (ap->break_handler) 1138 if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
993 * This is the second or subsequent kprobe at the address - handle 1183 * This is the second or subsequent kprobe at the address - handle
994 * the intricacies 1184 * the intricacies
995 */ 1185 */
996static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 1186static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
997 struct kprobe *p) 1187 struct kprobe *p)
998{ 1188{
999 int ret = 0; 1189 int ret = 0;
1000 struct kprobe *ap = old_p; 1190 struct kprobe *ap = orig_p;
1001 1191
1002 if (!kprobe_aggrprobe(old_p)) { 1192 if (!kprobe_aggrprobe(orig_p)) {
1003 /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ 1193 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1004 ap = alloc_aggr_kprobe(old_p); 1194 ap = alloc_aggr_kprobe(orig_p);
1005 if (!ap) 1195 if (!ap)
1006 return -ENOMEM; 1196 return -ENOMEM;
1007 init_aggr_kprobe(ap, old_p); 1197 init_aggr_kprobe(ap, orig_p);
1008 } 1198 } else if (kprobe_unused(ap))
1199 /* This probe is going to die. Rescue it */
1200 reuse_unused_kprobe(ap);
1009 1201
1010 if (kprobe_gone(ap)) { 1202 if (kprobe_gone(ap)) {
1011 /* 1203 /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
1039 return add_new_kprobe(ap, p); 1231 return add_new_kprobe(ap, p);
1040} 1232}
1041 1233
1042/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1043static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1044{
1045 struct kprobe *kp;
1046
1047 list_for_each_entry_rcu(kp, &p->list, list) {
1048 if (!kprobe_disabled(kp))
1049 /*
1050 * There is an active probe on the list.
1051 * We can't disable aggr_kprobe.
1052 */
1053 return 0;
1054 }
1055 p->flags |= KPROBE_FLAG_DISABLED;
1056 return 1;
1057}
1058
1059static int __kprobes in_kprobes_functions(unsigned long addr) 1234static int __kprobes in_kprobes_functions(unsigned long addr)
1060{ 1235{
1061 struct kprobe_blackpoint *kb; 1236 struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1098/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1273/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1099static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1274static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1100{ 1275{
1101 struct kprobe *old_p, *list_p; 1276 struct kprobe *ap, *list_p;
1102 1277
1103 old_p = get_kprobe(p->addr); 1278 ap = get_kprobe(p->addr);
1104 if (unlikely(!old_p)) 1279 if (unlikely(!ap))
1105 return NULL; 1280 return NULL;
1106 1281
1107 if (p != old_p) { 1282 if (p != ap) {
1108 list_for_each_entry_rcu(list_p, &old_p->list, list) 1283 list_for_each_entry_rcu(list_p, &ap->list, list)
1109 if (list_p == p) 1284 if (list_p == p)
1110 /* kprobe p is a valid probe */ 1285 /* kprobe p is a valid probe */
1111 goto valid; 1286 goto valid;
1112 return NULL; 1287 return NULL;
1113 } 1288 }
1114valid: 1289valid:
1115 return old_p; 1290 return ap;
1116} 1291}
1117 1292
1118/* Return error if the kprobe is being re-registered */ 1293/* Return error if the kprobe is being re-registered */
1119static inline int check_kprobe_rereg(struct kprobe *p) 1294static inline int check_kprobe_rereg(struct kprobe *p)
1120{ 1295{
1121 int ret = 0; 1296 int ret = 0;
1122 struct kprobe *old_p;
1123 1297
1124 mutex_lock(&kprobe_mutex); 1298 mutex_lock(&kprobe_mutex);
1125 old_p = __get_valid_kprobe(p); 1299 if (__get_valid_kprobe(p))
1126 if (old_p)
1127 ret = -EINVAL; 1300 ret = -EINVAL;
1128 mutex_unlock(&kprobe_mutex); 1301 mutex_unlock(&kprobe_mutex);
1302
1129 return ret; 1303 return ret;
1130} 1304}
1131 1305
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
1229} 1403}
1230EXPORT_SYMBOL_GPL(register_kprobe); 1404EXPORT_SYMBOL_GPL(register_kprobe);
1231 1405
1406/* Check if all probes on the aggrprobe are disabled */
1407static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
1408{
1409 struct kprobe *kp;
1410
1411 list_for_each_entry_rcu(kp, &ap->list, list)
1412 if (!kprobe_disabled(kp))
1413 /*
1414 * There is an active probe on the list.
1415 * We can't disable this ap.
1416 */
1417 return 0;
1418
1419 return 1;
1420}
1421
1422/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1423static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1424{
1425 struct kprobe *orig_p;
1426
1427 /* Get an original kprobe for return */
1428 orig_p = __get_valid_kprobe(p);
1429 if (unlikely(orig_p == NULL))
1430 return NULL;
1431
1432 if (!kprobe_disabled(p)) {
1433 /* Disable probe if it is a child probe */
1434 if (p != orig_p)
1435 p->flags |= KPROBE_FLAG_DISABLED;
1436
1437 /* Try to disarm and disable this/parent probe */
1438 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1439 disarm_kprobe(orig_p);
1440 orig_p->flags |= KPROBE_FLAG_DISABLED;
1441 }
1442 }
1443
1444 return orig_p;
1445}
1446
1232/* 1447/*
1233 * Unregister a kprobe without a scheduler synchronization. 1448 * Unregister a kprobe without a scheduler synchronization.
1234 */ 1449 */
1235static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1450static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1236{ 1451{
1237 struct kprobe *old_p, *list_p; 1452 struct kprobe *ap, *list_p;
1238 1453
1239 old_p = __get_valid_kprobe(p); 1454 /* Disable kprobe. This will disarm it if needed. */
1240 if (old_p == NULL) 1455 ap = __disable_kprobe(p);
1456 if (ap == NULL)
1241 return -EINVAL; 1457 return -EINVAL;
1242 1458
1243 if (old_p == p || 1459 if (ap == p)
1244 (kprobe_aggrprobe(old_p) &&
1245 list_is_singular(&old_p->list))) {
1246 /* 1460 /*
1247 * Only probe on the hash list. Disarm only if kprobes are 1461 * This probe is an independent(and non-optimized) kprobe
1248 * enabled and not gone - otherwise, the breakpoint would 1462 * (not an aggrprobe). Remove from the hash list.
1249 * already have been removed. We save on flushing icache.
1250 */ 1463 */
1251 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) 1464 goto disarmed;
1252 disarm_kprobe(old_p); 1465
1253 hlist_del_rcu(&old_p->hlist); 1466 /* Following process expects this probe is an aggrprobe */
1254 } else { 1467 WARN_ON(!kprobe_aggrprobe(ap));
1468
1469 if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1470 /*
1471 * !disarmed could be happen if the probe is under delayed
1472 * unoptimizing.
1473 */
1474 goto disarmed;
1475 else {
1476 /* If disabling probe has special handlers, update aggrprobe */
1255 if (p->break_handler && !kprobe_gone(p)) 1477 if (p->break_handler && !kprobe_gone(p))
1256 old_p->break_handler = NULL; 1478 ap->break_handler = NULL;
1257 if (p->post_handler && !kprobe_gone(p)) { 1479 if (p->post_handler && !kprobe_gone(p)) {
1258 list_for_each_entry_rcu(list_p, &old_p->list, list) { 1480 list_for_each_entry_rcu(list_p, &ap->list, list) {
1259 if ((list_p != p) && (list_p->post_handler)) 1481 if ((list_p != p) && (list_p->post_handler))
1260 goto noclean; 1482 goto noclean;
1261 } 1483 }
1262 old_p->post_handler = NULL; 1484 ap->post_handler = NULL;
1263 } 1485 }
1264noclean: 1486noclean:
1487 /*
1488 * Remove from the aggrprobe: this path will do nothing in
1489 * __unregister_kprobe_bottom().
1490 */
1265 list_del_rcu(&p->list); 1491 list_del_rcu(&p->list);
1266 if (!kprobe_disabled(old_p)) { 1492 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1267 try_to_disable_aggr_kprobe(old_p); 1493 /*
1268 if (!kprobes_all_disarmed) { 1494 * Try to optimize this probe again, because post
1269 if (kprobe_disabled(old_p)) 1495 * handler may have been changed.
1270 disarm_kprobe(old_p); 1496 */
1271 else 1497 optimize_kprobe(ap);
1272 /* Try to optimize this probe again */
1273 optimize_kprobe(old_p);
1274 }
1275 }
1276 } 1498 }
1277 return 0; 1499 return 0;
1500
1501disarmed:
1502 BUG_ON(!kprobe_disarmed(ap));
1503 hlist_del_rcu(&ap->hlist);
1504 return 0;
1278} 1505}
1279 1506
1280static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1507static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1281{ 1508{
1282 struct kprobe *old_p; 1509 struct kprobe *ap;
1283 1510
1284 if (list_empty(&p->list)) 1511 if (list_empty(&p->list))
1512 /* This is an independent kprobe */
1285 arch_remove_kprobe(p); 1513 arch_remove_kprobe(p);
1286 else if (list_is_singular(&p->list)) { 1514 else if (list_is_singular(&p->list)) {
1287 /* "p" is the last child of an aggr_kprobe */ 1515 /* This is the last child of an aggrprobe */
1288 old_p = list_entry(p->list.next, struct kprobe, list); 1516 ap = list_entry(p->list.next, struct kprobe, list);
1289 list_del(&p->list); 1517 list_del(&p->list);
1290 arch_remove_kprobe(old_p); 1518 free_aggr_kprobe(ap);
1291 free_aggr_kprobe(old_p);
1292 } 1519 }
1520 /* Otherwise, do nothing. */
1293} 1521}
1294 1522
1295int __kprobes register_kprobes(struct kprobe **kps, int num) 1523int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1607int __kprobes disable_kprobe(struct kprobe *kp) 1835int __kprobes disable_kprobe(struct kprobe *kp)
1608{ 1836{
1609 int ret = 0; 1837 int ret = 0;
1610 struct kprobe *p;
1611 1838
1612 mutex_lock(&kprobe_mutex); 1839 mutex_lock(&kprobe_mutex);
1613 1840
1614 /* Check whether specified probe is valid. */ 1841 /* Disable this kprobe */
1615 p = __get_valid_kprobe(kp); 1842 if (__disable_kprobe(kp) == NULL)
1616 if (unlikely(p == NULL)) {
1617 ret = -EINVAL; 1843 ret = -EINVAL;
1618 goto out;
1619 }
1620 1844
1621 /* If the probe is already disabled (or gone), just return */
1622 if (kprobe_disabled(kp))
1623 goto out;
1624
1625 kp->flags |= KPROBE_FLAG_DISABLED;
1626 if (p != kp)
1627 /* When kp != p, p is always enabled. */
1628 try_to_disable_aggr_kprobe(p);
1629
1630 if (!kprobes_all_disarmed && kprobe_disabled(p))
1631 disarm_kprobe(p);
1632out:
1633 mutex_unlock(&kprobe_mutex); 1845 mutex_unlock(&kprobe_mutex);
1634 return ret; 1846 return ret;
1635} 1847}
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
1927 mutex_lock(&kprobe_mutex); 2139 mutex_lock(&kprobe_mutex);
1928 2140
1929 /* If kprobes are already disarmed, just return */ 2141 /* If kprobes are already disarmed, just return */
1930 if (kprobes_all_disarmed) 2142 if (kprobes_all_disarmed) {
1931 goto already_disabled; 2143 mutex_unlock(&kprobe_mutex);
2144 return;
2145 }
1932 2146
1933 kprobes_all_disarmed = true; 2147 kprobes_all_disarmed = true;
1934 printk(KERN_INFO "Kprobes globally disabled\n"); 2148 printk(KERN_INFO "Kprobes globally disabled\n");
1935 2149
1936 /*
1937 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1938 * because disarming may also unoptimize kprobes.
1939 */
1940 get_online_cpus();
1941 mutex_lock(&text_mutex); 2150 mutex_lock(&text_mutex);
1942 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2151 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1943 head = &kprobe_table[i]; 2152 head = &kprobe_table[i];
1944 hlist_for_each_entry_rcu(p, node, head, hlist) { 2153 hlist_for_each_entry_rcu(p, node, head, hlist) {
1945 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2154 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1946 __disarm_kprobe(p); 2155 __disarm_kprobe(p, false);
1947 } 2156 }
1948 } 2157 }
1949
1950 mutex_unlock(&text_mutex); 2158 mutex_unlock(&text_mutex);
1951 put_online_cpus();
1952 mutex_unlock(&kprobe_mutex); 2159 mutex_unlock(&kprobe_mutex);
1953 /* Allow all currently running kprobes to complete */
1954 synchronize_sched();
1955 return;
1956 2160
1957already_disabled: 2161 /* Wait for disarming all kprobes by optimizer */
1958 mutex_unlock(&kprobe_mutex); 2162 wait_for_kprobe_optimizer();
1959 return;
1960} 2163}
1961 2164
1962/* 2165/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2870feee81dd..11847bf1e8cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/idr.h>
16#include <linux/file.h> 17#include <linux/file.h>
17#include <linux/poll.h> 18#include <linux/poll.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
@@ -21,7 +22,9 @@
21#include <linux/dcache.h> 22#include <linux/dcache.h>
22#include <linux/percpu.h> 23#include <linux/percpu.h>
23#include <linux/ptrace.h> 24#include <linux/ptrace.h>
25#include <linux/reboot.h>
24#include <linux/vmstat.h> 26#include <linux/vmstat.h>
27#include <linux/device.h>
25#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
26#include <linux/hardirq.h> 29#include <linux/hardirq.h>
27#include <linux/rculist.h> 30#include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
133 } 136 }
134} 137}
135 138
139static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
140{
141 /*
142 * only top level events have the pid namespace they were created in
143 */
144 if (event->parent)
145 event = event->parent;
146
147 return task_tgid_nr_ns(p, event->ns);
148}
149
150static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
151{
152 /*
153 * only top level events have the pid namespace they were created in
154 */
155 if (event->parent)
156 event = event->parent;
157
158 return task_pid_nr_ns(p, event->ns);
159}
160
136/* 161/*
137 * If we inherit events we want to return the parent event id 162 * If we inherit events we want to return the parent event id
138 * to userspace. 163 * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 337 ctx->nr_stat++;
313} 338}
314 339
340/*
341 * Called at perf_event creation and when events are attached/detached from a
342 * group.
343 */
344static void perf_event__read_size(struct perf_event *event)
345{
346 int entry = sizeof(u64); /* value */
347 int size = 0;
348 int nr = 1;
349
350 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
351 size += sizeof(u64);
352
353 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
354 size += sizeof(u64);
355
356 if (event->attr.read_format & PERF_FORMAT_ID)
357 entry += sizeof(u64);
358
359 if (event->attr.read_format & PERF_FORMAT_GROUP) {
360 nr += event->group_leader->nr_siblings;
361 size += sizeof(u64);
362 }
363
364 size += entry * nr;
365 event->read_size = size;
366}
367
368static void perf_event__header_size(struct perf_event *event)
369{
370 struct perf_sample_data *data;
371 u64 sample_type = event->attr.sample_type;
372 u16 size = 0;
373
374 perf_event__read_size(event);
375
376 if (sample_type & PERF_SAMPLE_IP)
377 size += sizeof(data->ip);
378
379 if (sample_type & PERF_SAMPLE_ADDR)
380 size += sizeof(data->addr);
381
382 if (sample_type & PERF_SAMPLE_PERIOD)
383 size += sizeof(data->period);
384
385 if (sample_type & PERF_SAMPLE_READ)
386 size += event->read_size;
387
388 event->header_size = size;
389}
390
391static void perf_event__id_header_size(struct perf_event *event)
392{
393 struct perf_sample_data *data;
394 u64 sample_type = event->attr.sample_type;
395 u16 size = 0;
396
397 if (sample_type & PERF_SAMPLE_TID)
398 size += sizeof(data->tid_entry);
399
400 if (sample_type & PERF_SAMPLE_TIME)
401 size += sizeof(data->time);
402
403 if (sample_type & PERF_SAMPLE_ID)
404 size += sizeof(data->id);
405
406 if (sample_type & PERF_SAMPLE_STREAM_ID)
407 size += sizeof(data->stream_id);
408
409 if (sample_type & PERF_SAMPLE_CPU)
410 size += sizeof(data->cpu_entry);
411
412 event->id_header_size = size;
413}
414
315static void perf_group_attach(struct perf_event *event) 415static void perf_group_attach(struct perf_event *event)
316{ 416{
317 struct perf_event *group_leader = event->group_leader; 417 struct perf_event *group_leader = event->group_leader, *pos;
318 418
319 /* 419 /*
320 * We can have double attach due to group movement in perf_event_open. 420 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
333 433
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 434 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 435 group_leader->nr_siblings++;
436
437 perf_event__header_size(group_leader);
438
439 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
440 perf_event__header_size(pos);
336} 441}
337 442
338/* 443/*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 496 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 497 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 498 event->group_leader->nr_siblings--;
394 return; 499 goto out;
395 } 500 }
396 501
397 if (!list_empty(&event->group_entry)) 502 if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 515 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 516 sibling->group_flags = event->group_flags;
412 } 517 }
518
519out:
520 perf_event__header_size(event->group_leader);
521
522 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
523 perf_event__header_size(tmp);
413} 524}
414 525
415static inline int 526static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1073 /* 1184 /*
1074 * not supported on inherited events 1185 * not supported on inherited events
1075 */ 1186 */
1076 if (event->attr.inherit) 1187 if (event->attr.inherit || !is_sampling_event(event))
1077 return -EINVAL; 1188 return -EINVAL;
1078 1189
1079 atomic_add(refresh, &event->event_limit); 1190 atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2400 return perf_event_release_kernel(event);
2290} 2401}
2291 2402
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2403u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2404{
2319 struct perf_event *child; 2405 struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2514 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2515 return 0;
2430 2516
2431 if (count < perf_event_read_size(event)) 2517 if (count < event->read_size)
2432 return -ENOSPC; 2518 return -ENOSPC;
2433 2519
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2520 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2514 int ret = 0; 2600 int ret = 0;
2515 u64 value; 2601 u64 value;
2516 2602
2517 if (!event->attr.sample_period) 2603 if (!is_sampling_event(event))
2518 return -EINVAL; 2604 return -EINVAL;
2519 2605
2520 if (copy_from_user(&value, arg, sizeof(value))) 2606 if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3305 } while (len); 3391 } while (len);
3306} 3392}
3307 3393
3394static void __perf_event_header__init_id(struct perf_event_header *header,
3395 struct perf_sample_data *data,
3396 struct perf_event *event)
3397{
3398 u64 sample_type = event->attr.sample_type;
3399
3400 data->type = sample_type;
3401 header->size += event->id_header_size;
3402
3403 if (sample_type & PERF_SAMPLE_TID) {
3404 /* namespace issues */
3405 data->tid_entry.pid = perf_event_pid(event, current);
3406 data->tid_entry.tid = perf_event_tid(event, current);
3407 }
3408
3409 if (sample_type & PERF_SAMPLE_TIME)
3410 data->time = perf_clock();
3411
3412 if (sample_type & PERF_SAMPLE_ID)
3413 data->id = primary_event_id(event);
3414
3415 if (sample_type & PERF_SAMPLE_STREAM_ID)
3416 data->stream_id = event->id;
3417
3418 if (sample_type & PERF_SAMPLE_CPU) {
3419 data->cpu_entry.cpu = raw_smp_processor_id();
3420 data->cpu_entry.reserved = 0;
3421 }
3422}
3423
3424static void perf_event_header__init_id(struct perf_event_header *header,
3425 struct perf_sample_data *data,
3426 struct perf_event *event)
3427{
3428 if (event->attr.sample_id_all)
3429 __perf_event_header__init_id(header, data, event);
3430}
3431
3432static void __perf_event__output_id_sample(struct perf_output_handle *handle,
3433 struct perf_sample_data *data)
3434{
3435 u64 sample_type = data->type;
3436
3437 if (sample_type & PERF_SAMPLE_TID)
3438 perf_output_put(handle, data->tid_entry);
3439
3440 if (sample_type & PERF_SAMPLE_TIME)
3441 perf_output_put(handle, data->time);
3442
3443 if (sample_type & PERF_SAMPLE_ID)
3444 perf_output_put(handle, data->id);
3445
3446 if (sample_type & PERF_SAMPLE_STREAM_ID)
3447 perf_output_put(handle, data->stream_id);
3448
3449 if (sample_type & PERF_SAMPLE_CPU)
3450 perf_output_put(handle, data->cpu_entry);
3451}
3452
3453static void perf_event__output_id_sample(struct perf_event *event,
3454 struct perf_output_handle *handle,
3455 struct perf_sample_data *sample)
3456{
3457 if (event->attr.sample_id_all)
3458 __perf_event__output_id_sample(handle, sample);
3459}
3460
3308int perf_output_begin(struct perf_output_handle *handle, 3461int perf_output_begin(struct perf_output_handle *handle,
3309 struct perf_event *event, unsigned int size, 3462 struct perf_event *event, unsigned int size,
3310 int nmi, int sample) 3463 int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3312 struct perf_buffer *buffer; 3465 struct perf_buffer *buffer;
3313 unsigned long tail, offset, head; 3466 unsigned long tail, offset, head;
3314 int have_lost; 3467 int have_lost;
3468 struct perf_sample_data sample_data;
3315 struct { 3469 struct {
3316 struct perf_event_header header; 3470 struct perf_event_header header;
3317 u64 id; 3471 u64 id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
3338 goto out; 3492 goto out;
3339 3493
3340 have_lost = local_read(&buffer->lost); 3494 have_lost = local_read(&buffer->lost);
3341 if (have_lost) 3495 if (have_lost) {
3342 size += sizeof(lost_event); 3496 lost_event.header.size = sizeof(lost_event);
3497 perf_event_header__init_id(&lost_event.header, &sample_data,
3498 event);
3499 size += lost_event.header.size;
3500 }
3343 3501
3344 perf_output_get_handle(handle); 3502 perf_output_get_handle(handle);
3345 3503
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
3370 if (have_lost) { 3528 if (have_lost) {
3371 lost_event.header.type = PERF_RECORD_LOST; 3529 lost_event.header.type = PERF_RECORD_LOST;
3372 lost_event.header.misc = 0; 3530 lost_event.header.misc = 0;
3373 lost_event.header.size = sizeof(lost_event);
3374 lost_event.id = event->id; 3531 lost_event.id = event->id;
3375 lost_event.lost = local_xchg(&buffer->lost, 0); 3532 lost_event.lost = local_xchg(&buffer->lost, 0);
3376 3533
3377 perf_output_put(handle, lost_event); 3534 perf_output_put(handle, lost_event);
3535 perf_event__output_id_sample(event, handle, &sample_data);
3378 } 3536 }
3379 3537
3380 return 0; 3538 return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
3407 rcu_read_unlock(); 3565 rcu_read_unlock();
3408} 3566}
3409 3567
3410static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
3411{
3412 /*
3413 * only top level events have the pid namespace they were created in
3414 */
3415 if (event->parent)
3416 event = event->parent;
3417
3418 return task_tgid_nr_ns(p, event->ns);
3419}
3420
3421static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3422{
3423 /*
3424 * only top level events have the pid namespace they were created in
3425 */
3426 if (event->parent)
3427 event = event->parent;
3428
3429 return task_pid_nr_ns(p, event->ns);
3430}
3431
3432static void perf_output_read_one(struct perf_output_handle *handle, 3568static void perf_output_read_one(struct perf_output_handle *handle,
3433 struct perf_event *event, 3569 struct perf_event *event,
3434 u64 enabled, u64 running) 3570 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
3603{ 3739{
3604 u64 sample_type = event->attr.sample_type; 3740 u64 sample_type = event->attr.sample_type;
3605 3741
3606 data->type = sample_type;
3607
3608 header->type = PERF_RECORD_SAMPLE; 3742 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3743 header->size = sizeof(*header) + event->header_size;
3610 3744
3611 header->misc = 0; 3745 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3746 header->misc |= perf_misc_flags(regs);
3613 3747
3614 if (sample_type & PERF_SAMPLE_IP) { 3748 __perf_event_header__init_id(header, data, event);
3615 data->ip = perf_instruction_pointer(regs);
3616
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 }
3627
3628 if (sample_type & PERF_SAMPLE_TIME) {
3629 data->time = perf_clock();
3630
3631 header->size += sizeof(data->time);
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event);
3639
3640 header->size += sizeof(data->id);
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id;
3645
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 }
3655
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658 3749
3659 if (sample_type & PERF_SAMPLE_READ) 3750 if (sample_type & PERF_SAMPLE_IP)
3660 header->size += perf_event_read_size(event); 3751 data->ip = perf_instruction_pointer(regs);
3661 3752
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3753 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3754 int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
3722 struct task_struct *task) 3813 struct task_struct *task)
3723{ 3814{
3724 struct perf_output_handle handle; 3815 struct perf_output_handle handle;
3816 struct perf_sample_data sample;
3725 struct perf_read_event read_event = { 3817 struct perf_read_event read_event = {
3726 .header = { 3818 .header = {
3727 .type = PERF_RECORD_READ, 3819 .type = PERF_RECORD_READ,
3728 .misc = 0, 3820 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3821 .size = sizeof(read_event) + event->read_size,
3730 }, 3822 },
3731 .pid = perf_event_pid(event, task), 3823 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3824 .tid = perf_event_tid(event, task),
3733 }; 3825 };
3734 int ret; 3826 int ret;
3735 3827
3828 perf_event_header__init_id(&read_event.header, &sample, event);
3736 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); 3829 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
3737 if (ret) 3830 if (ret)
3738 return; 3831 return;
3739 3832
3740 perf_output_put(&handle, read_event); 3833 perf_output_put(&handle, read_event);
3741 perf_output_read(&handle, event); 3834 perf_output_read(&handle, event);
3835 perf_event__output_id_sample(event, &handle, &sample);
3742 3836
3743 perf_output_end(&handle); 3837 perf_output_end(&handle);
3744} 3838}
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
3768 struct perf_task_event *task_event) 3862 struct perf_task_event *task_event)
3769{ 3863{
3770 struct perf_output_handle handle; 3864 struct perf_output_handle handle;
3865 struct perf_sample_data sample;
3771 struct task_struct *task = task_event->task; 3866 struct task_struct *task = task_event->task;
3772 int size, ret; 3867 int ret, size = task_event->event_id.header.size;
3773 3868
3774 size = task_event->event_id.header.size; 3869 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
3775 ret = perf_output_begin(&handle, event, size, 0, 0);
3776 3870
3871 ret = perf_output_begin(&handle, event,
3872 task_event->event_id.header.size, 0, 0);
3777 if (ret) 3873 if (ret)
3778 return; 3874 goto out;
3779 3875
3780 task_event->event_id.pid = perf_event_pid(event, task); 3876 task_event->event_id.pid = perf_event_pid(event, task);
3781 task_event->event_id.ppid = perf_event_pid(event, current); 3877 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
3785 3881
3786 perf_output_put(&handle, task_event->event_id); 3882 perf_output_put(&handle, task_event->event_id);
3787 3883
3884 perf_event__output_id_sample(event, &handle, &sample);
3885
3788 perf_output_end(&handle); 3886 perf_output_end(&handle);
3887out:
3888 task_event->event_id.header.size = size;
3789} 3889}
3790 3890
3791static int perf_event_task_match(struct perf_event *event) 3891static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
3900 struct perf_comm_event *comm_event) 4000 struct perf_comm_event *comm_event)
3901{ 4001{
3902 struct perf_output_handle handle; 4002 struct perf_output_handle handle;
4003 struct perf_sample_data sample;
3903 int size = comm_event->event_id.header.size; 4004 int size = comm_event->event_id.header.size;
3904 int ret = perf_output_begin(&handle, event, size, 0, 0); 4005 int ret;
4006
4007 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
4008 ret = perf_output_begin(&handle, event,
4009 comm_event->event_id.header.size, 0, 0);
3905 4010
3906 if (ret) 4011 if (ret)
3907 return; 4012 goto out;
3908 4013
3909 comm_event->event_id.pid = perf_event_pid(event, comm_event->task); 4014 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
3910 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); 4015 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
3912 perf_output_put(&handle, comm_event->event_id); 4017 perf_output_put(&handle, comm_event->event_id);
3913 perf_output_copy(&handle, comm_event->comm, 4018 perf_output_copy(&handle, comm_event->comm,
3914 comm_event->comm_size); 4019 comm_event->comm_size);
4020
4021 perf_event__output_id_sample(event, &handle, &sample);
4022
3915 perf_output_end(&handle); 4023 perf_output_end(&handle);
4024out:
4025 comm_event->event_id.header.size = size;
3916} 4026}
3917 4027
3918static int perf_event_comm_match(struct perf_event *event) 4028static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3957 comm_event->comm_size = size; 4067 comm_event->comm_size = size;
3958 4068
3959 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 4069 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3960
3961 rcu_read_lock(); 4070 rcu_read_lock();
3962 list_for_each_entry_rcu(pmu, &pmus, entry) { 4071 list_for_each_entry_rcu(pmu, &pmus, entry) {
3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4072 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
4038 struct perf_mmap_event *mmap_event) 4147 struct perf_mmap_event *mmap_event)
4039{ 4148{
4040 struct perf_output_handle handle; 4149 struct perf_output_handle handle;
4150 struct perf_sample_data sample;
4041 int size = mmap_event->event_id.header.size; 4151 int size = mmap_event->event_id.header.size;
4042 int ret = perf_output_begin(&handle, event, size, 0, 0); 4152 int ret;
4043 4153
4154 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
4155 ret = perf_output_begin(&handle, event,
4156 mmap_event->event_id.header.size, 0, 0);
4044 if (ret) 4157 if (ret)
4045 return; 4158 goto out;
4046 4159
4047 mmap_event->event_id.pid = perf_event_pid(event, current); 4160 mmap_event->event_id.pid = perf_event_pid(event, current);
4048 mmap_event->event_id.tid = perf_event_tid(event, current); 4161 mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
4050 perf_output_put(&handle, mmap_event->event_id); 4163 perf_output_put(&handle, mmap_event->event_id);
4051 perf_output_copy(&handle, mmap_event->file_name, 4164 perf_output_copy(&handle, mmap_event->file_name,
4052 mmap_event->file_size); 4165 mmap_event->file_size);
4166
4167 perf_event__output_id_sample(event, &handle, &sample);
4168
4053 perf_output_end(&handle); 4169 perf_output_end(&handle);
4170out:
4171 mmap_event->event_id.header.size = size;
4054} 4172}
4055 4173
4056static int perf_event_mmap_match(struct perf_event *event, 4174static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
4205static void perf_log_throttle(struct perf_event *event, int enable) 4323static void perf_log_throttle(struct perf_event *event, int enable)
4206{ 4324{
4207 struct perf_output_handle handle; 4325 struct perf_output_handle handle;
4326 struct perf_sample_data sample;
4208 int ret; 4327 int ret;
4209 4328
4210 struct { 4329 struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
4226 if (enable) 4345 if (enable)
4227 throttle_event.header.type = PERF_RECORD_UNTHROTTLE; 4346 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
4228 4347
4229 ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); 4348 perf_event_header__init_id(&throttle_event.header, &sample, event);
4349
4350 ret = perf_output_begin(&handle, event,
4351 throttle_event.header.size, 1, 0);
4230 if (ret) 4352 if (ret)
4231 return; 4353 return;
4232 4354
4233 perf_output_put(&handle, throttle_event); 4355 perf_output_put(&handle, throttle_event);
4356 perf_event__output_id_sample(event, &handle, &sample);
4234 perf_output_end(&handle); 4357 perf_output_end(&handle);
4235} 4358}
4236 4359
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4246 struct hw_perf_event *hwc = &event->hw; 4369 struct hw_perf_event *hwc = &event->hw;
4247 int ret = 0; 4370 int ret = 0;
4248 4371
4372 /*
4373 * Non-sampling counters might still use the PMI to fold short
4374 * hardware counters, ignore those.
4375 */
4376 if (unlikely(!is_sampling_event(event)))
4377 return 0;
4378
4249 if (!throttle) { 4379 if (!throttle) {
4250 hwc->interrupts++; 4380 hwc->interrupts++;
4251 } else { 4381 } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4391 if (!regs) 4521 if (!regs)
4392 return; 4522 return;
4393 4523
4394 if (!hwc->sample_period) 4524 if (!is_sampling_event(event))
4395 return; 4525 return;
4396 4526
4397 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4527 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
4554 struct hw_perf_event *hwc = &event->hw; 4684 struct hw_perf_event *hwc = &event->hw;
4555 struct hlist_head *head; 4685 struct hlist_head *head;
4556 4686
4557 if (hwc->sample_period) { 4687 if (is_sampling_event(event)) {
4558 hwc->last_period = hwc->sample_period; 4688 hwc->last_period = hwc->sample_period;
4559 perf_swevent_set_period(event); 4689 perf_swevent_set_period(event);
4560 } 4690 }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
4811 if (event->attr.type != PERF_TYPE_TRACEPOINT) 4941 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4812 return -ENOENT; 4942 return -ENOENT;
4813 4943
4814 /*
4815 * Raw tracepoint data is a severe data leak, only allow root to
4816 * have these.
4817 */
4818 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4819 perf_paranoid_tracepoint_raw() &&
4820 !capable(CAP_SYS_ADMIN))
4821 return -EPERM;
4822
4823 err = perf_trace_init(event); 4944 err = perf_trace_init(event);
4824 if (err) 4945 if (err)
4825 return err; 4946 return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
4842 4963
4843static inline void perf_tp_register(void) 4964static inline void perf_tp_register(void)
4844{ 4965{
4845 perf_pmu_register(&perf_tracepoint); 4966 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
4846} 4967}
4847 4968
4848static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4969static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4932static void perf_swevent_start_hrtimer(struct perf_event *event) 5053static void perf_swevent_start_hrtimer(struct perf_event *event)
4933{ 5054{
4934 struct hw_perf_event *hwc = &event->hw; 5055 struct hw_perf_event *hwc = &event->hw;
5056 s64 period;
5057
5058 if (!is_sampling_event(event))
5059 return;
4935 5060
4936 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 5061 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4937 hwc->hrtimer.function = perf_swevent_hrtimer; 5062 hwc->hrtimer.function = perf_swevent_hrtimer;
4938 if (hwc->sample_period) {
4939 s64 period = local64_read(&hwc->period_left);
4940 5063
4941 if (period) { 5064 period = local64_read(&hwc->period_left);
4942 if (period < 0) 5065 if (period) {
4943 period = 10000; 5066 if (period < 0)
5067 period = 10000;
4944 5068
4945 local64_set(&hwc->period_left, 0); 5069 local64_set(&hwc->period_left, 0);
4946 } else { 5070 } else {
4947 period = max_t(u64, 10000, hwc->sample_period); 5071 period = max_t(u64, 10000, hwc->sample_period);
4948 } 5072 }
4949 __hrtimer_start_range_ns(&hwc->hrtimer, 5073 __hrtimer_start_range_ns(&hwc->hrtimer,
4950 ns_to_ktime(period), 0, 5074 ns_to_ktime(period), 0,
4951 HRTIMER_MODE_REL_PINNED, 0); 5075 HRTIMER_MODE_REL_PINNED, 0);
4952 }
4953} 5076}
4954 5077
4955static void perf_swevent_cancel_hrtimer(struct perf_event *event) 5078static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4956{ 5079{
4957 struct hw_perf_event *hwc = &event->hw; 5080 struct hw_perf_event *hwc = &event->hw;
4958 5081
4959 if (hwc->sample_period) { 5082 if (is_sampling_event(event)) {
4960 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 5083 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4961 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 5084 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4962 5085
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
5184out: 5307out:
5185 mutex_unlock(&pmus_lock); 5308 mutex_unlock(&pmus_lock);
5186} 5309}
5310static struct idr pmu_idr;
5311
5312static ssize_t
5313type_show(struct device *dev, struct device_attribute *attr, char *page)
5314{
5315 struct pmu *pmu = dev_get_drvdata(dev);
5316
5317 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
5318}
5319
5320static struct device_attribute pmu_dev_attrs[] = {
5321 __ATTR_RO(type),
5322 __ATTR_NULL,
5323};
5324
5325static int pmu_bus_running;
5326static struct bus_type pmu_bus = {
5327 .name = "event_source",
5328 .dev_attrs = pmu_dev_attrs,
5329};
5330
5331static void pmu_dev_release(struct device *dev)
5332{
5333 kfree(dev);
5334}
5335
5336static int pmu_dev_alloc(struct pmu *pmu)
5337{
5338 int ret = -ENOMEM;
5339
5340 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
5341 if (!pmu->dev)
5342 goto out;
5343
5344 device_initialize(pmu->dev);
5345 ret = dev_set_name(pmu->dev, "%s", pmu->name);
5346 if (ret)
5347 goto free_dev;
5348
5349 dev_set_drvdata(pmu->dev, pmu);
5350 pmu->dev->bus = &pmu_bus;
5351 pmu->dev->release = pmu_dev_release;
5352 ret = device_add(pmu->dev);
5353 if (ret)
5354 goto free_dev;
5355
5356out:
5357 return ret;
5358
5359free_dev:
5360 put_device(pmu->dev);
5361 goto out;
5362}
5187 5363
5188int perf_pmu_register(struct pmu *pmu) 5364int perf_pmu_register(struct pmu *pmu, char *name, int type)
5189{ 5365{
5190 int cpu, ret; 5366 int cpu, ret;
5191 5367
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
5195 if (!pmu->pmu_disable_count) 5371 if (!pmu->pmu_disable_count)
5196 goto unlock; 5372 goto unlock;
5197 5373
5374 pmu->type = -1;
5375 if (!name)
5376 goto skip_type;
5377 pmu->name = name;
5378
5379 if (type < 0) {
5380 int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
5381 if (!err)
5382 goto free_pdc;
5383
5384 err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
5385 if (err) {
5386 ret = err;
5387 goto free_pdc;
5388 }
5389 }
5390 pmu->type = type;
5391
5392 if (pmu_bus_running) {
5393 ret = pmu_dev_alloc(pmu);
5394 if (ret)
5395 goto free_idr;
5396 }
5397
5398skip_type:
5198 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); 5399 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
5199 if (pmu->pmu_cpu_context) 5400 if (pmu->pmu_cpu_context)
5200 goto got_cpu_context; 5401 goto got_cpu_context;
5201 5402
5202 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); 5403 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5203 if (!pmu->pmu_cpu_context) 5404 if (!pmu->pmu_cpu_context)
5204 goto free_pdc; 5405 goto free_dev;
5205 5406
5206 for_each_possible_cpu(cpu) { 5407 for_each_possible_cpu(cpu) {
5207 struct perf_cpu_context *cpuctx; 5408 struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
5245 5446
5246 return ret; 5447 return ret;
5247 5448
5449free_dev:
5450 device_del(pmu->dev);
5451 put_device(pmu->dev);
5452
5453free_idr:
5454 if (pmu->type >= PERF_TYPE_MAX)
5455 idr_remove(&pmu_idr, pmu->type);
5456
5248free_pdc: 5457free_pdc:
5249 free_percpu(pmu->pmu_disable_count); 5458 free_percpu(pmu->pmu_disable_count);
5250 goto unlock; 5459 goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
5264 synchronize_rcu(); 5473 synchronize_rcu();
5265 5474
5266 free_percpu(pmu->pmu_disable_count); 5475 free_percpu(pmu->pmu_disable_count);
5476 if (pmu->type >= PERF_TYPE_MAX)
5477 idr_remove(&pmu_idr, pmu->type);
5478 device_del(pmu->dev);
5479 put_device(pmu->dev);
5267 free_pmu_context(pmu); 5480 free_pmu_context(pmu);
5268} 5481}
5269 5482
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
5273 int idx; 5486 int idx;
5274 5487
5275 idx = srcu_read_lock(&pmus_srcu); 5488 idx = srcu_read_lock(&pmus_srcu);
5489
5490 rcu_read_lock();
5491 pmu = idr_find(&pmu_idr, event->attr.type);
5492 rcu_read_unlock();
5493 if (pmu)
5494 goto unlock;
5495
5276 list_for_each_entry_rcu(pmu, &pmus, entry) { 5496 list_for_each_entry_rcu(pmu, &pmus, entry) {
5277 int ret = pmu->event_init(event); 5497 int ret = pmu->event_init(event);
5278 if (!ret) 5498 if (!ret)
@@ -5738,6 +5958,12 @@ SYSCALL_DEFINE5(perf_event_open,
5738 mutex_unlock(&current->perf_event_mutex); 5958 mutex_unlock(&current->perf_event_mutex);
5739 5959
5740 /* 5960 /*
5961 * Precalculate sample_data sizes
5962 */
5963 perf_event__header_size(event);
5964 perf_event__id_header_size(event);
5965
5966 /*
5741 * Drop the reference on the group_event after placing the 5967 * Drop the reference on the group_event after placing the
5742 * new event on the sibling_list. This ensures destruction 5968 * new event on the sibling_list. This ensures destruction
5743 * of the group leader will find the pointer to itself in 5969 * of the group leader will find the pointer to itself in
@@ -6090,6 +6316,12 @@ inherit_event(struct perf_event *parent_event,
6090 child_event->overflow_handler = parent_event->overflow_handler; 6316 child_event->overflow_handler = parent_event->overflow_handler;
6091 6317
6092 /* 6318 /*
6319 * Precalculate sample_data sizes
6320 */
6321 perf_event__header_size(child_event);
6322 perf_event__id_header_size(child_event);
6323
6324 /*
6093 * Link it up in the child's context: 6325 * Link it up in the child's context:
6094 */ 6326 */
6095 raw_spin_lock_irqsave(&child_ctx->lock, flags); 6327 raw_spin_lock_irqsave(&child_ctx->lock, flags);
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
6320 mutex_unlock(&swhash->hlist_mutex); 6552 mutex_unlock(&swhash->hlist_mutex);
6321} 6553}
6322 6554
6323#ifdef CONFIG_HOTPLUG_CPU 6555#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
6324static void perf_pmu_rotate_stop(struct pmu *pmu) 6556static void perf_pmu_rotate_stop(struct pmu *pmu)
6325{ 6557{
6326 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 6558 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
6374static inline void perf_event_exit_cpu(int cpu) { } 6606static inline void perf_event_exit_cpu(int cpu) { }
6375#endif 6607#endif
6376 6608
6609static int
6610perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
6611{
6612 int cpu;
6613
6614 for_each_online_cpu(cpu)
6615 perf_event_exit_cpu(cpu);
6616
6617 return NOTIFY_OK;
6618}
6619
6620/*
6621 * Run the perf reboot notifier at the very last possible moment so that
6622 * the generic watchdog code runs as long as possible.
6623 */
6624static struct notifier_block perf_reboot_notifier = {
6625 .notifier_call = perf_reboot,
6626 .priority = INT_MIN,
6627};
6628
6377static int __cpuinit 6629static int __cpuinit
6378perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) 6630perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6379{ 6631{
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
6402{ 6654{
6403 int ret; 6655 int ret;
6404 6656
6657 idr_init(&pmu_idr);
6658
6405 perf_event_init_all_cpus(); 6659 perf_event_init_all_cpus();
6406 init_srcu_struct(&pmus_srcu); 6660 init_srcu_struct(&pmus_srcu);
6407 perf_pmu_register(&perf_swevent); 6661 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
6408 perf_pmu_register(&perf_cpu_clock); 6662 perf_pmu_register(&perf_cpu_clock, NULL, -1);
6409 perf_pmu_register(&perf_task_clock); 6663 perf_pmu_register(&perf_task_clock, NULL, -1);
6410 perf_tp_register(); 6664 perf_tp_register();
6411 perf_cpu_notifier(perf_cpu_notify); 6665 perf_cpu_notifier(perf_cpu_notify);
6666 register_reboot_notifier(&perf_reboot_notifier);
6412 6667
6413 ret = init_hw_breakpoint(); 6668 ret = init_hw_breakpoint();
6414 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 6669 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6415} 6670}
6671
6672static int __init perf_event_sysfs_init(void)
6673{
6674 struct pmu *pmu;
6675 int ret;
6676
6677 mutex_lock(&pmus_lock);
6678
6679 ret = bus_register(&pmu_bus);
6680 if (ret)
6681 goto unlock;
6682
6683 list_for_each_entry(pmu, &pmus, entry) {
6684 if (!pmu->name || pmu->type < 0)
6685 continue;
6686
6687 ret = pmu_dev_alloc(pmu);
6688 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
6689 }
6690 pmu_bus_running = 1;
6691 ret = 0;
6692
6693unlock:
6694 mutex_unlock(&pmus_lock);
6695
6696 return ret;
6697}
6698device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297d1a0eedb0..c68cead94dd7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8293,8 +8293,6 @@ void __init sched_init(void)
8293 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 8293 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
8294#endif /* SMP */ 8294#endif /* SMP */
8295 8295
8296 perf_event_init();
8297
8298 scheduler_running = 1; 8296 scheduler_running = 1;
8299} 8297}
8300 8298
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5abfa1518554..46404414d8a7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -745,21 +745,21 @@ static struct ctl_table kern_table[] = {
745 .extra1 = &zero, 745 .extra1 = &zero,
746 .extra2 = &one, 746 .extra2 = &one,
747 }, 747 },
748#endif
749#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
750 { 748 {
751 .procname = "unknown_nmi_panic", 749 .procname = "nmi_watchdog",
752 .data = &unknown_nmi_panic, 750 .data = &watchdog_enabled,
753 .maxlen = sizeof (int), 751 .maxlen = sizeof (int),
754 .mode = 0644, 752 .mode = 0644,
755 .proc_handler = proc_dointvec, 753 .proc_handler = proc_dowatchdog_enabled,
756 }, 754 },
755#endif
756#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
757 { 757 {
758 .procname = "nmi_watchdog", 758 .procname = "unknown_nmi_panic",
759 .data = &nmi_watchdog_enabled, 759 .data = &unknown_nmi_panic,
760 .maxlen = sizeof (int), 760 .maxlen = sizeof (int),
761 .mode = 0644, 761 .mode = 0644,
762 .proc_handler = proc_nmi_enabled, 762 .proc_handler = proc_dointvec,
763 }, 763 },
764#endif 764#endif
765#if defined(CONFIG_X86) 765#if defined(CONFIG_X86)
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 1357c5786064..4b2545a136ff 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, 136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, 137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" },
138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, 138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
139 { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" },
140 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, 139 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
141 {} 140 {}
142}; 141};
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059ca670e..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21/* Count the events in use (per event id, not per instance) */ 21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count; 22static int total_ref_count;
23 23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 48 struct perf_event *p_event)
26{ 49{
27 struct hlist_head __percpu *list; 50 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 51 int ret;
29 int cpu; 52 int cpu;
30 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
31 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
32 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
33 return 0; 60 return 0;
34 61
62 ret = -ENOMEM;
63
35 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
36 if (!list) 65 if (!list)
37 goto fail; 66 goto fail;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0725eeab1937..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
27 27
28DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
29 29
30DEFINE_MUTEX(event_storage_mutex);
31EXPORT_SYMBOL_GPL(event_storage_mutex);
32
33char event_storage[EVENT_STORAGE_SIZE];
34EXPORT_SYMBOL_GPL(event_storage);
35
30LIST_HEAD(ftrace_events); 36LIST_HEAD(ftrace_events);
31LIST_HEAD(ftrace_common_fields); 37LIST_HEAD(ftrace_common_fields);
32 38
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44deaac25..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \
83 83
84#undef __array 84#undef __array
85#define __array(type, item, len) \ 85#define __array(type, item, len) \
86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 do { \
87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
88 mutex_lock(&event_storage_mutex); \
89 snprintf(event_storage, sizeof(event_storage), \
90 "%s[%d]", #type, len); \
91 ret = trace_define_field(event_call, event_storage, #item, \
88 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
89 sizeof(field.item), \ 93 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \ 94 is_signed_type(type), FILTER_OTHER); \
91 if (ret) \ 95 mutex_unlock(&event_storage_mutex); \
92 return ret; 96 if (ret) \
97 return ret; \
98 } while (0);
93 99
94#undef __array_desc 100#undef __array_desc
95#define __array_desc(type, container, item, len) \ 101#define __array_desc(type, container, item, len) \
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..eb17e143b5da 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
57{ 57{
58 if (!strncmp(str, "panic", 5)) 58 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 59 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1;
60 return 1; 62 return 1;
61} 63}
62__setup("nmi_watchdog=", hardlockup_panic_setup); 64__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -547,13 +549,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
547 .notifier_call = cpu_callback 549 .notifier_call = cpu_callback
548}; 550};
549 551
550static int __init spawn_watchdog_task(void) 552void __init lockup_detector_init(void)
551{ 553{
552 void *cpu = (void *)(long)smp_processor_id(); 554 void *cpu = (void *)(long)smp_processor_id();
553 int err; 555 int err;
554 556
555 if (no_watchdog) 557 if (no_watchdog)
556 return 0; 558 return;
557 559
558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 560 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
559 WARN_ON(notifier_to_errno(err)); 561 WARN_ON(notifier_to_errno(err));
@@ -561,6 +563,5 @@ static int __init spawn_watchdog_task(void)
561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 563 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
562 register_cpu_notifier(&cpu_nfb); 564 register_cpu_notifier(&cpu_nfb);
563 565
564 return 0; 566 return;
565} 567}
566early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28b42b9274d0..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
173 An NMI is generated every 60 seconds or so to check for hardlockups. 173 An NMI is generated every 60 seconds or so to check for hardlockups.
174 174
175config HARDLOCKUP_DETECTOR 175config HARDLOCKUP_DETECTOR
176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI 176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
177 !ARCH_HAS_NMI_WATCHDOG
177 178
178config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
179 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 5ad25e17b6cb..4eb99ab34053 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT
214# The empty.o file is created in the make process in order to determine 214# The empty.o file is created in the make process in order to determine
215# the target endianness and word size. It is made before all other C 215# the target endianness and word size. It is made before all other C
216# files, including recordmcount. 216# files, including recordmcount.
217cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \ 217sub_cmd_record_mcount = \
218 $(objtree)/scripts/recordmcount "$(@)"; \ 218 if [ $(@) != "scripts/mod/empty.o" ]; then \
219 fi; 219 $(objtree)/scripts/recordmcount "$(@)"; \
220 fi;
220else 221else
221cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 222sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
222 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ 223 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
223 "$(if $(CONFIG_64BIT),64,32)" \ 224 "$(if $(CONFIG_64BIT),64,32)" \
224 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ 225 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
225 "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ 226 "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
226 "$(if $(part-of-module),1,0)" "$(@)"; 227 "$(if $(part-of-module),1,0)" "$(@)";
227endif 228endif
229cmd_record_mcount = \
230 if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \
231 $(sub_cmd_record_mcount) \
232 fi;
228endif 233endif
229 234
230define rule_cc_o_c 235define rule_cc_o_c
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index b2c63309a651..6f5a498608b2 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -24,12 +24,47 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27-d::
28--dsos=<dso[,dso...]>::
29 Only consider symbols in these dsos.
30-s::
31--symbol=<symbol>::
32 Symbol to annotate.
33
34-f::
35--force::
36 Don't complain, do it.
37
38-v::
39--verbose::
40 Be more verbose. (Show symbol address, etc)
41
42-D::
43--dump-raw-trace::
44 Dump raw trace in ASCII.
45
46-k::
47--vmlinux=<file>::
48 vmlinux pathname.
49
50-m::
51--modules::
52 Load module symbols. WARNING: use only with -k and LIVE kernel.
53
54-l::
55--print-line::
56 Print matching source lines (may be slow).
57
58-P::
59--full-paths::
60 Don't shorten the displayed pathnames.
61
27--stdio:: Use the stdio interface. 62--stdio:: Use the stdio interface.
28 63
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not 64--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is 65 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
33 68
34SEE ALSO 69SEE ALSO
35-------- 70--------
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index 01b642c0bf8f..5eaac6f26d51 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -18,6 +18,9 @@ perf report.
18 18
19OPTIONS 19OPTIONS
20------- 20-------
21-H::
22--with-hits::
23 Show only DSOs with hits.
21-i:: 24-i::
22--input=:: 25--input=::
23 Input file name. (default: perf.data) 26 Input file name. (default: perf.data)
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 20d97d84ea1c..74d7481ed7a6 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
19 19
20OPTIONS 20OPTIONS
21------- 21-------
22-M::
23--displacement::
24 Show position displacement relative to baseline.
25
26-D::
27--dump-raw-trace::
28 Dump raw trace in ASCII.
29
30-m::
31--modules::
32 Load module symbols. WARNING: use only with -k and LIVE kernel
33
22-d:: 34-d::
23--dsos=:: 35--dsos=::
24 Only consider symbols in these dsos. CSV that understands 36 Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
42--field-separator=:: 54--field-separator=::
43 55
44 Use a special separator character and don't pad with spaces, replacing 56 Use a special separator character and don't pad with spaces, replacing
45 all occurances of this separator in symbol names (and other output) 57 all occurrences of this separator in symbol names (and other output)
46 with a '.' character, that thus it's the only non valid separator. 58 with a '.' character, that thus it's the only non valid separator.
47 59
48-v:: 60-v::
@@ -50,6 +62,13 @@ OPTIONS
50 Be verbose, for instance, show the raw counts in addition to the 62 Be verbose, for instance, show the raw counts in addition to the
51 diff. 63 diff.
52 64
65-f::
66--force::
67 Don't complain, do it.
68
69--symfs=<directory>::
70 Look for files with symbols relative to this directory.
71
53SEE ALSO 72SEE ALSO
54-------- 73--------
55linkperf:perf-record[1] 74linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index d004e19fe6d6..dd84cb2f0a88 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
22 a performance counter profile of guest os in realtime 22 a performance counter profile of guest os in realtime
23 of an arbitrary workload. 23 of an arbitrary workload.
24 24
25 'perf kvm record <command>' to record the performance couinter profile 25 'perf kvm record <command>' to record the performance counter profile
26 of an arbitrary workload and save it into a perf data file. If both 26 of an arbitrary workload and save it into a perf data file. If both
27 --host and --guest are input, the perf data file name is perf.data.kvm. 27 --host and --guest are input, the perf data file name is perf.data.kvm.
28 If there is no --host but --guest, the file name is perf.data.guest. 28 If there is no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
40 40
41OPTIONS 41OPTIONS
42------- 42-------
43-i::
44--input=::
45 Input file name.
46-o::
47--output::
48 Output file name.
43--host=:: 49--host=::
44 Collect host side performance profile. 50 Collect host side performance profile.
45--guest=:: 51--guest=::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index b317102138c8..921de259ea10 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27OPTIONS
28-------
29
30-i::
31--input=<file>::
32 Input file name.
33
34-v::
35--verbose::
36 Be more verbose (show symbol address, etc).
37
38-D::
39--dump-raw-trace::
40 Dump raw trace in ASCII.
41
27SEE ALSO 42SEE ALSO
28-------- 43--------
29linkperf:perf[1] 44linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 62de1b7f4e76..86b797a35aa6 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -115,9 +115,9 @@ Each probe argument follows below syntax.
115 115
116LINE SYNTAX 116LINE SYNTAX
117----------- 117-----------
118Line range is descripted by following syntax. 118Line range is described by following syntax.
119 119
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" 120 "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
121 121
122FUNC specifies the function name of showing lines. 'RLN' is the start line 122FUNC specifies the function name of showing lines. 'RLN' is the start line
123number from function entry line, and 'RLN2' is the end line number. As same as 123number from function entry line, and 'RLN2' is the end line number. As same as
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index a91f9f9e6e5c..52462ae26455 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -39,15 +39,24 @@ OPTIONS
39 be passed as follows: '\mem:addr[:[r][w][x]]'. 39 be passed as follows: '\mem:addr[:[r][w][x]]'.
40 If you want to profile read-write accesses in 0x1000, just set 40 If you want to profile read-write accesses in 0x1000, just set
41 'mem:0x1000:rw'. 41 'mem:0x1000:rw'.
42
43--filter=<filter>::
44 Event filter.
45
42-a:: 46-a::
43 System-wide collection. 47--all-cpus::
48 System-wide collection from all CPUs.
44 49
45-l:: 50-l::
46 Scale counter values. 51 Scale counter values.
47 52
48-p:: 53-p::
49--pid=:: 54--pid=::
50 Record events on existing pid. 55 Record events on existing process ID.
56
57-t::
58--tid=::
59 Record events on existing thread ID.
51 60
52-r:: 61-r::
53--realtime=:: 62--realtime=::
@@ -99,6 +108,11 @@ OPTIONS
99--data:: 108--data::
100 Sample addresses. 109 Sample addresses.
101 110
111-T::
112--timestamp::
113 Sample timestamps. Use it with 'perf report -D' to see the timestamps,
114 for instance.
115
102-n:: 116-n::
103--no-samples:: 117--no-samples::
104 Don't sample. 118 Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
109 123
110-C:: 124-C::
111--cpu:: 125--cpu::
112Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a 126Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
113comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 127comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
114In per-thread mode with inheritance mode on (default), samples are captured only when 128In per-thread mode with inheritance mode on (default), samples are captured only when
115the thread executes on the designated CPUs. Default is to monitor all CPUs. 129the thread executes on the designated CPUs. Default is to monitor all CPUs.
116 130
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 12052c9ed0ba..8ba03d6e5398 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -20,6 +20,11 @@ OPTIONS
20-i:: 20-i::
21--input=:: 21--input=::
22 Input file name. (default: perf.data) 22 Input file name. (default: perf.data)
23
24-v::
25--verbose::
26 Be more verbose. (show symbol address, etc)
27
23-d:: 28-d::
24--dsos=:: 29--dsos=::
25 Only consider symbols in these dsos. CSV that understands 30 Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
27-n:: 32-n::
28--show-nr-samples:: 33--show-nr-samples::
29 Show the number of samples for each symbol 34 Show the number of samples for each symbol
35
36--showcpuutilization::
37 Show sample percentage for different cpu modes.
38
30-T:: 39-T::
31--threads:: 40--threads::
32 Show per-thread event counters 41 Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
39 Only consider these symbols. CSV that understands 48 Only consider these symbols. CSV that understands
40 file://filename entries. 49 file://filename entries.
41 50
51-U::
52--hide-unresolved::
53 Only display entries resolved to a symbol.
54
42-s:: 55-s::
43--sort=:: 56--sort=::
44 Sort by key(s): pid, comm, dso, symbol, parent. 57 Sort by key(s): pid, comm, dso, symbol, parent.
45 58
59-p::
60--parent=<regex>::
61 regex filter to identify parent, see: '--sort parent'
62
63-x::
64--exclude-other::
65 Only display entries with parent-match.
66
46-w:: 67-w::
47--field-width=:: 68--column-widths=<width[,width...]>::
48 Force each column width to the provided list, for large terminal 69 Force each column width to the provided list, for large terminal
49 readability. 70 readability.
50 71
@@ -52,19 +73,26 @@ OPTIONS
52--field-separator=:: 73--field-separator=::
53 74
54 Use a special separator character and don't pad with spaces, replacing 75 Use a special separator character and don't pad with spaces, replacing
55 all occurances of this separator in symbol names (and other output) 76 all occurrences of this separator in symbol names (and other output)
56 with a '.' character, that thus it's the only non valid separator. 77 with a '.' character, that thus it's the only non valid separator.
57 78
79-D::
80--dump-raw-trace::
81 Dump raw trace in ASCII.
82
58-g [type,min]:: 83-g [type,min]::
59--call-graph:: 84--call-graph::
60 Display callchains using type and min percent threshold. 85 Display call chains using type and min percent threshold.
61 type can be either: 86 type can be either:
62 - flat: single column, linear exposure of callchains. 87 - flat: single column, linear exposure of call chains.
63 - graph: use a graph tree, displaying absolute overhead rates. 88 - graph: use a graph tree, displaying absolute overhead rates.
64 - fractal: like graph, but displays relative rates. Each branch of 89 - fractal: like graph, but displays relative rates. Each branch of
65 the tree is considered as a new profiled object. + 90 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 91 Default: fractal,0.5.
67 92
93--pretty=<key>::
94 Pretty printing style. key: normal, raw
95
68--stdio:: Use the stdio interface. 96--stdio:: Use the stdio interface.
69 97
70--tui:: Use the TUI interface, that is integrated with annotate and allows 98--tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@ OPTIONS
72 requires a tty, if one is not present, as when piping to other 100 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used. 101 commands, the stdio interface is used.
74 102
103-k::
104--vmlinux=<file>::
105 vmlinux pathname
106
107--kallsyms=<file>::
108 kallsyms pathname
109
110-m::
111--modules::
112 Load module symbols. WARNING: This should only be used with -k and
113 a LIVE kernel.
114
115-f::
116--force::
117 Don't complain, do it.
118
119--symfs=<directory>::
120 Look for files with symbols relative to this directory.
121
75SEE ALSO 122SEE ALSO
76-------- 123--------
77linkperf:perf-stat[1] 124linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8417644a6166..46822d5fde1c 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf sched' {record|latency|replay|trace} 11'perf sched' {record|latency|map|replay|trace}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There are four variants of perf sched: 15There are five variants of perf sched:
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
30 of the workload as it occurred when it was recorded - and can repeat 30 of the workload as it occurred when it was recorded - and can repeat
31 it a number of times, measuring its performance.) 31 it a number of times, measuring its performance.)
32 32
33 'perf sched map' to print a textual context-switching outline of
34 workload captured via perf sched record. Columns stand for
35 individual CPUs, and the two-letter shortcuts stand for tasks that
36 are running on a CPU. A '*' denotes the CPU that had the event, and
37 a dot signals an idle CPU.
38
33OPTIONS 39OPTIONS
34------- 40-------
41-i::
42--input=<file>::
43 Input file name. (default: perf.data)
44
45-v::
46--verbose::
47 Be more verbose. (show symbol address, etc)
48
35-D:: 49-D::
36--dump-raw-trace=:: 50--dump-raw-trace=::
37 Display verbose dump of the sched data. 51 Display verbose dump of the sched data.
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index ee6525ee6d69..5bb41e55a3ac 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,19 +1,19 @@
1perf-trace-perl(1) 1perf-script-perl(1)
2================== 2==================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-perl - Process trace data with a Perl script 6perf-script-perl - Process trace data with a Perl script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Perl]:script[.pl] ] 11'perf script' [-s [Perl]:script[.pl] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Perl interpreter. It reads and processes the input file and 17built-in Perl interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Perl script, if any. 19Perl script, if any.
@@ -21,7 +21,7 @@ Perl script, if any.
21STARTER SCRIPTS 21STARTER SCRIPTS
22--------------- 22---------------
23 23
24You can avoid reading the rest of this document by running 'perf trace 24You can avoid reading the rest of this document by running 'perf script
25-g perl' in the same directory as an existing perf.data trace file. 25-g perl' in the same directory as an existing perf.data trace file.
26That will generate a starter script containing a handler for each of 26That will generate a starter script containing a handler for each of
27the event types in the trace file; it simply prints every available 27the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@ field for each event in the trace file.
30You can also look at the existing scripts in 30You can also look at the existing scripts in
31~/libexec/perf-core/scripts/perl for typical examples showing how to 31~/libexec/perf-core/scripts/perl for typical examples showing how to
32do basic things like aggregate event data, print results, etc. Also, 32do basic things like aggregate event data, print results, etc. Also,
33the check-perf-trace.pl script, while not interesting for its results, 33the check-perf-script.pl script, while not interesting for its results,
34attempts to exercise all of the main scripting features. 34attempts to exercise all of the main scripting features.
35 35
36EVENT HANDLERS 36EVENT HANDLERS
37-------------- 37--------------
38 38
39When perf trace is invoked using a trace script, a user-defined 39When perf script is invoked using a trace script, a user-defined
40'handler function' is called for each event in the trace. If there's 40'handler function' is called for each event in the trace. If there's
41no handler function defined for a given event type, the event is 41no handler function defined for a given event type, the event is
42ignored (or passed to a 'trace_handled' function, see below) and the 42ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest.
112SCRIPT LAYOUT 112SCRIPT LAYOUT
113------------- 113-------------
114 114
115Every perf trace Perl script should start by setting up a Perl module 115Every perf script Perl script should start by setting up a Perl module
116search path and 'use'ing a few support modules (see module 116search path and 'use'ing a few support modules (see module
117descriptions below): 117descriptions below):
118 118
119---- 119----
120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; 120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
121 use lib "./Perf-Trace-Util/lib"; 121 use lib "./perf-script-Util/lib";
122 use Perf::Trace::Core; 122 use Perf::Trace::Core;
123 use Perf::Trace::Context; 123 use Perf::Trace::Context;
124 use Perf::Trace::Util; 124 use Perf::Trace::Util;
@@ -162,7 +162,7 @@ sub trace_unhandled
162---- 162----
163 163
164The remaining sections provide descriptions of each of the available 164The remaining sections provide descriptions of each of the available
165built-in perf trace Perl modules and their associated functions. 165built-in perf script Perl modules and their associated functions.
166 166
167AVAILABLE MODULES AND FUNCTIONS 167AVAILABLE MODULES AND FUNCTIONS
168------------------------------- 168-------------------------------
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS
170The following sections describe the functions and variables available 170The following sections describe the functions and variables available
171via the various Perf::Trace::* Perl modules. To use the functions and 171via the various Perf::Trace::* Perl modules. To use the functions and
172variables from the given module, add the corresponding 'use 172variables from the given module, add the corresponding 'use
173Perf::Trace::XXX' line to your perf trace script. 173Perf::Trace::XXX' line to your perf script script.
174 174
175Perf::Trace::Core Module 175Perf::Trace::Core Module
176~~~~~~~~~~~~~~~~~~~~~~~~ 176~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ argument.
204Perf::Trace::Util Module 204Perf::Trace::Util Module
205~~~~~~~~~~~~~~~~~~~~~~~~ 205~~~~~~~~~~~~~~~~~~~~~~~~
206 206
207Various utility functions for use with perf trace: 207Various utility functions for use with perf script:
208 208
209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair 209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
210 nsecs_secs($nsecs) - returns whole secs portion given nsecs 210 nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace:
214 214
215SEE ALSO 215SEE ALSO
216-------- 216--------
217linkperf:perf-trace[1] 217linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 693be804dd3d..36b38277422c 100644
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -1,19 +1,19 @@
1perf-trace-python(1) 1perf-script-python(1)
2==================== 2====================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-python - Process trace data with a Python script 6perf-script-python - Process trace data with a Python script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Python]:script[.py] ] 11'perf script' [-s [Python]:script[.py] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Python interpreter. It reads and processes the input file and 17built-in Python interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Python script, if any. 19Python script, if any.
@@ -23,15 +23,15 @@ A QUICK EXAMPLE
23 23
24This section shows the process, start to finish, of creating a working 24This section shows the process, start to finish, of creating a working
25Python script that aggregates and extracts useful information from a 25Python script that aggregates and extracts useful information from a
26raw perf trace stream. You can avoid reading the rest of this 26raw perf script stream. You can avoid reading the rest of this
27document if an example is enough for you; the rest of the document 27document if an example is enough for you; the rest of the document
28provides more details on each step and lists the library functions 28provides more details on each step and lists the library functions
29available to script writers. 29available to script writers.
30 30
31This example actually details the steps that were used to create the 31This example actually details the steps that were used to create the
32'syscall-counts' script you see when you list the available perf trace 32'syscall-counts' script you see when you list the available perf script
33scripts via 'perf trace -l'. As such, this script also shows how to 33scripts via 'perf script -l'. As such, this script also shows how to
34integrate your script into the list of general-purpose 'perf trace' 34integrate your script into the list of general-purpose 'perf script'
35scripts listed by that command. 35scripts listed by that command.
36 36
37The syscall-counts script is a simple script, but demonstrates all the 37The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory
105called perf.data. 105called perf.data.
106 106
107Once we have a perf.data file containing our data, we can use the -g 107Once we have a perf.data file containing our data, we can use the -g
108'perf trace' option to generate a Python script that will contain a 108'perf script' option to generate a Python script that will contain a
109callback handler for each event type found in the perf.data trace 109callback handler for each event type found in the perf.data trace
110stream (for more details, see the STARTER SCRIPTS section). 110stream (for more details, see the STARTER SCRIPTS section).
111 111
112---- 112----
113# perf trace -g python 113# perf script -g python
114generated Python script: perf-trace.py 114generated Python script: perf-script.py
115 115
116The output file created also in the current directory is named 116The output file created also in the current directory is named
117perf-trace.py. Here's the file in its entirety: 117perf-script.py. Here's the file in its entirety:
118 118
119# perf trace event handlers, generated by perf trace -g python 119# perf script event handlers, generated by perf script -g python
120# Licensed under the terms of the GNU GPL License version 2 120# Licensed under the terms of the GNU GPL License version 2
121 121
122# The common_* event handler fields are the most useful fields common to 122# The common_* event handler fields are the most useful fields common to
123# all events. They don't necessarily correspond to the 'common_*' fields 123# all events. They don't necessarily correspond to the 'common_*' fields
124# in the format files. Those fields not available as handler params can 124# in the format files. Those fields not available as handler params can
125# be retrieved using Python functions of the form common_*(context). 125# be retrieved using Python functions of the form common_*(context).
126# See the perf-trace-python Documentation for the list of available functions. 126# See the perf-script-python Documentation for the list of available functions.
127 127
128import os 128import os
129import sys 129import sys
130 130
131sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 131sys.path.append(os.environ['PERF_EXEC_PATH'] + \
132 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 132 '/scripts/python/perf-script-Util/lib/Perf/Trace')
133 133
134from perf_trace_context import * 134from perf_trace_context import *
135from Core import * 135from Core import *
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm):
160---- 160----
161 161
162At the top is a comment block followed by some import statements and a 162At the top is a comment block followed by some import statements and a
163path append which every perf trace script should include. 163path append which every perf script script should include.
164 164
165Following that are a couple generated functions, trace_begin() and 165Following that are a couple generated functions, trace_begin() and
166trace_end(), which are called at the beginning and the end of the 166trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the
189script and run it to see the default output: 189script and run it to see the default output:
190 190
191---- 191----
192# mv perf-trace.py syscall-counts.py 192# mv perf-script.py syscall-counts.py
193# perf trace -s syscall-counts.py 193# perf script -s syscall-counts.py
194 194
195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= 195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= 196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
@@ -216,7 +216,7 @@ import os
216import sys 216import sys
217 217
218sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 218sys.path.append(os.environ['PERF_EXEC_PATH'] + \
219 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 219 '/scripts/python/perf-script-Util/lib/Perf/Trace')
220 220
221from perf_trace_context import * 221from perf_trace_context import *
222from Core import * 222from Core import *
@@ -279,7 +279,7 @@ import os
279import sys 279import sys
280 280
281sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 281sys.path.append(os.environ['PERF_EXEC_PATH'] + \
282 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 282 '/scripts/python/perf-script-Util/lib/Perf/Trace')
283 283
284from perf_trace_context import * 284from perf_trace_context import *
285from Core import * 285from Core import *
@@ -315,7 +315,7 @@ def print_syscall_totals():
315 315
316The script can be run just as before: 316The script can be run just as before:
317 317
318 # perf trace -s syscall-counts.py 318 # perf script -s syscall-counts.py
319 319
320So those are the essential steps in writing and running a script. The 320So those are the essential steps in writing and running a script. The
321process can be generalized to any tracepoint or set of tracepoints 321process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by
324'perf list' and/or look in /sys/kernel/debug/tracing events for 324'perf list' and/or look in /sys/kernel/debug/tracing events for
325detailed event and field info, record the corresponding trace data 325detailed event and field info, record the corresponding trace data
326using 'perf record', passing it the list of interesting events, 326using 'perf record', passing it the list of interesting events,
327generate a skeleton script using 'perf trace -g python' and modify the 327generate a skeleton script using 'perf script -g python' and modify the
328code to aggregate and display it for your particular needs. 328code to aggregate and display it for your particular needs.
329 329
330After you've done that you may end up with a general-purpose script 330After you've done that you may end up with a general-purpose script
331that you want to keep around and have available for future use. By 331that you want to keep around and have available for future use. By
332writing a couple of very simple shell scripts and putting them in the 332writing a couple of very simple shell scripts and putting them in the
333right place, you can have your script listed alongside the other 333right place, you can have your script listed alongside the other
334scripts listed by the 'perf trace -l' command e.g.: 334scripts listed by the 'perf script -l' command e.g.:
335 335
336---- 336----
337root@tropicana:~# perf trace -l 337root@tropicana:~# perf script -l
338List of available trace scripts: 338List of available trace scripts:
339 workqueue-stats workqueue stats (ins/exe/create/destroy) 339 workqueue-stats workqueue stats (ins/exe/create/destroy)
340 wakeup-latency system-wide min/max/avg wakeup latency 340 wakeup-latency system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter
365The 'report' script is also a shell script with the same base name as 365The 'report' script is also a shell script with the same base name as
366your script, but with -report appended. It should also be located in 366your script, but with -report appended. It should also be located in
367the perf/scripts/python/bin directory. In that script, you write the 367the perf/scripts/python/bin directory. In that script, you write the
368'perf trace -s' command-line needed for running your script: 368'perf script -s' command-line needed for running your script:
369 369
370---- 370----
371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report 371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
372 372
373#!/bin/bash 373#!/bin/bash
374# description: system-wide syscall counts 374# description: system-wide syscall counts
375perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py 375perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
376---- 376----
377 377
378Note that the location of the Python script given in the shell script 378Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@ total 32
390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . 390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. 391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin 392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py 393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util 394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py 395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
396---- 396----
397 397
398Once you've done that (don't forget to do a new 'make install', 398Once you've done that (don't forget to do a new 'make install',
399otherwise your script won't show up at run-time), 'perf trace -l' 399otherwise your script won't show up at run-time), 'perf script -l'
400should show a new entry for your script: 400should show a new entry for your script:
401 401
402---- 402----
403root@tropicana:~# perf trace -l 403root@tropicana:~# perf script -l
404List of available trace scripts: 404List of available trace scripts:
405 workqueue-stats workqueue stats (ins/exe/create/destroy) 405 workqueue-stats workqueue stats (ins/exe/create/destroy)
406 wakeup-latency system-wide min/max/avg wakeup latency 406 wakeup-latency system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@ List of available trace scripts:
409 syscall-counts system-wide syscall counts 409 syscall-counts system-wide syscall counts
410---- 410----
411 411
412You can now perform the record step via 'perf trace record': 412You can now perform the record step via 'perf script record':
413 413
414 # perf trace record syscall-counts 414 # perf script record syscall-counts
415 415
416and display the output using 'perf trace report': 416and display the output using 'perf script report':
417 417
418 # perf trace report syscall-counts 418 # perf script report syscall-counts
419 419
420STARTER SCRIPTS 420STARTER SCRIPTS
421--------------- 421---------------
422 422
423You can quickly get started writing a script for a particular set of 423You can quickly get started writing a script for a particular set of
424trace data by generating a skeleton script using 'perf trace -g 424trace data by generating a skeleton script using 'perf script -g
425python' in the same directory as an existing perf.data trace file. 425python' in the same directory as an existing perf.data trace file.
426That will generate a starter script containing a handler for each of 426That will generate a starter script containing a handler for each of
427the event types in the trace file; it simply prints every available 427the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@ field for each event in the trace file.
430You can also look at the existing scripts in 430You can also look at the existing scripts in
431~/libexec/perf-core/scripts/python for typical examples showing how to 431~/libexec/perf-core/scripts/python for typical examples showing how to
432do basic things like aggregate event data, print results, etc. Also, 432do basic things like aggregate event data, print results, etc. Also,
433the check-perf-trace.py script, while not interesting for its results, 433the check-perf-script.py script, while not interesting for its results,
434attempts to exercise all of the main scripting features. 434attempts to exercise all of the main scripting features.
435 435
436EVENT HANDLERS 436EVENT HANDLERS
437-------------- 437--------------
438 438
439When perf trace is invoked using a trace script, a user-defined 439When perf script is invoked using a trace script, a user-defined
440'handler function' is called for each event in the trace. If there's 440'handler function' is called for each event in the trace. If there's
441no handler function defined for a given event type, the event is 441no handler function defined for a given event type, the event is
442ignored (or passed to a 'trace_handled' function, see below) and the 442ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest.
510SCRIPT LAYOUT 510SCRIPT LAYOUT
511------------- 511-------------
512 512
513Every perf trace Python script should start by setting up a Python 513Every perf script Python script should start by setting up a Python
514module search path and 'import'ing a few support modules (see module 514module search path and 'import'ing a few support modules (see module
515descriptions below): 515descriptions below):
516 516
@@ -519,7 +519,7 @@ descriptions below):
519 import sys 519 import sys
520 520
521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
522 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 522 '/scripts/python/perf-script-Util/lib/Perf/Trace')
523 523
524 from perf_trace_context import * 524 from perf_trace_context import *
525 from Core import * 525 from Core import *
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs,
559---- 559----
560 560
561The remaining sections provide descriptions of each of the available 561The remaining sections provide descriptions of each of the available
562built-in perf trace Python modules and their associated functions. 562built-in perf script Python modules and their associated functions.
563 563
564AVAILABLE MODULES AND FUNCTIONS 564AVAILABLE MODULES AND FUNCTIONS
565------------------------------- 565-------------------------------
566 566
567The following sections describe the functions and variables available 567The following sections describe the functions and variables available
568via the various perf trace Python modules. To use the functions and 568via the various perf script Python modules. To use the functions and
569variables from the given module, add the corresponding 'from XXXX 569variables from the given module, add the corresponding 'from XXXX
570import' line to your perf trace script. 570import' line to your perf script script.
571 571
572Core.py Module 572Core.py Module
573~~~~~~~~~~~~~~ 573~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@ argument.
610Util.py Module 610Util.py Module
611~~~~~~~~~~~~~~ 611~~~~~~~~~~~~~~
612 612
613Various utility functions for use with perf trace: 613Various utility functions for use with perf script:
614 614
615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair 615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
616 nsecs_secs(nsecs) - returns whole secs portion given nsecs 616 nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace:
620 620
621SEE ALSO 621SEE ALSO
622-------- 622--------
623linkperf:perf-trace[1] 623linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt
index 26aff6bf9e50..29ad94293cd2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -1,71 +1,71 @@
1perf-trace(1) 1perf-script(1)
2============= 2=============
3 3
4NAME 4NAME
5---- 5----
6perf-trace - Read perf.data (created by perf record) and display trace output 6perf-script - Read perf.data (created by perf record) and display trace output
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [<options>] 11'perf script' [<options>]
12'perf trace' [<options>] record <script> [<record-options>] <command> 12'perf script' [<options>] record <script> [<record-options>] <command>
13'perf trace' [<options>] report <script> [script-args] 13'perf script' [<options>] report <script> [script-args]
14'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command> 14'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
15'perf trace' [<options>] <top-script> [script-args] 15'perf script' [<options>] <top-script> [script-args]
16 16
17DESCRIPTION 17DESCRIPTION
18----------- 18-----------
19This command reads the input file and displays the trace recorded. 19This command reads the input file and displays the trace recorded.
20 20
21There are several variants of perf trace: 21There are several variants of perf script:
22 22
23 'perf trace' to see a detailed trace of the workload that was 23 'perf script' to see a detailed trace of the workload that was
24 recorded. 24 recorded.
25 25
26 You can also run a set of pre-canned scripts that aggregate and 26 You can also run a set of pre-canned scripts that aggregate and
27 summarize the raw trace data in various ways (the list of scripts is 27 summarize the raw trace data in various ways (the list of scripts is
28 available via 'perf trace -l'). The following variants allow you to 28 available via 'perf script -l'). The following variants allow you to
29 record and run those scripts: 29 record and run those scripts:
30 30
31 'perf trace record <script> <command>' to record the events required 31 'perf script record <script> <command>' to record the events required
32 for 'perf trace report'. <script> is the name displayed in the 32 for 'perf script report'. <script> is the name displayed in the
33 output of 'perf trace --list' i.e. the actual script name minus any 33 output of 'perf script --list' i.e. the actual script name minus any
34 language extension. If <command> is not specified, the events are 34 language extension. If <command> is not specified, the events are
35 recorded using the -a (system-wide) 'perf record' option. 35 recorded using the -a (system-wide) 'perf record' option.
36 36
37 'perf trace report <script> [args]' to run and display the results 37 'perf script report <script> [args]' to run and display the results
38 of <script>. <script> is the name displayed in the output of 'perf 38 of <script>. <script> is the name displayed in the output of 'perf
39 trace --list' i.e. the actual script name minus any language 39 trace --list' i.e. the actual script name minus any language
40 extension. The perf.data output from a previous run of 'perf trace 40 extension. The perf.data output from a previous run of 'perf script
41 record <script>' is used and should be present for this command to 41 record <script>' is used and should be present for this command to
42 succeed. [args] refers to the (mainly optional) args expected by 42 succeed. [args] refers to the (mainly optional) args expected by
43 the script. 43 the script.
44 44
45 'perf trace <script> <required-script-args> <command>' to both 45 'perf script <script> <required-script-args> <command>' to both
46 record the events required for <script> and to run the <script> 46 record the events required for <script> and to run the <script>
47 using 'live-mode' i.e. without writing anything to disk. <script> 47 using 'live-mode' i.e. without writing anything to disk. <script>
48 is the name displayed in the output of 'perf trace --list' i.e. the 48 is the name displayed in the output of 'perf script --list' i.e. the
49 actual script name minus any language extension. If <command> is 49 actual script name minus any language extension. If <command> is
50 not specified, the events are recorded using the -a (system-wide) 50 not specified, the events are recorded using the -a (system-wide)
51 'perf record' option. If <script> has any required args, they 51 'perf record' option. If <script> has any required args, they
52 should be specified before <command>. This mode doesn't allow for 52 should be specified before <command>. This mode doesn't allow for
53 optional script args to be specified; if optional script args are 53 optional script args to be specified; if optional script args are
54 desired, they can be specified using separate 'perf trace record' 54 desired, they can be specified using separate 'perf script record'
55 and 'perf trace report' commands, with the stdout of the record step 55 and 'perf script report' commands, with the stdout of the record step
56 piped to the stdin of the report script, using the '-o -' and '-i -' 56 piped to the stdin of the report script, using the '-o -' and '-i -'
57 options of the corresponding commands. 57 options of the corresponding commands.
58 58
59 'perf trace <top-script>' to both record the events required for 59 'perf script <top-script>' to both record the events required for
60 <top-script> and to run the <top-script> using 'live-mode' 60 <top-script> and to run the <top-script> using 'live-mode'
61 i.e. without writing anything to disk. <top-script> is the name 61 i.e. without writing anything to disk. <top-script> is the name
62 displayed in the output of 'perf trace --list' i.e. the actual 62 displayed in the output of 'perf script --list' i.e. the actual
63 script name minus any language extension; a <top-script> is defined 63 script name minus any language extension; a <top-script> is defined
64 as any script name ending with the string 'top'. 64 as any script name ending with the string 'top'.
65 65
66 [<record-options>] can be passed to the record steps of 'perf trace 66 [<record-options>] can be passed to the record steps of 'perf script
67 record' and 'live-mode' variants; this isn't possible however for 67 record' and 'live-mode' variants; this isn't possible however for
68 <top-script> 'live-mode' or 'perf trace report' variants. 68 <top-script> 'live-mode' or 'perf script report' variants.
69 69
70 See the 'SEE ALSO' section for links to language-specific 70 See the 'SEE ALSO' section for links to language-specific
71 information on how to write and run your own trace scripts. 71 information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@ OPTIONS
76 Any command you can specify in a shell. 76 Any command you can specify in a shell.
77 77
78-D:: 78-D::
79--dump-raw-trace=:: 79--dump-raw-script=::
80 Display verbose dump of the trace data. 80 Display verbose dump of the trace data.
81 81
82-L:: 82-L::
@@ -95,7 +95,7 @@ OPTIONS
95 95
96-g:: 96-g::
97--gen-script=:: 97--gen-script=::
98 Generate perf-trace.[ext] starter script for given language, 98 Generate perf-script.[ext] starter script for given language,
99 using current perf.data. 99 using current perf.data.
100 100
101-a:: 101-a::
@@ -104,8 +104,15 @@ OPTIONS
104 normally don't - this option allows the latter to be run in 104 normally don't - this option allows the latter to be run in
105 system-wide mode. 105 system-wide mode.
106 106
107-i::
108--input=::
109 Input file name.
110
111-d::
112--debug-mode::
113 Do various checks like samples ordering and lost events.
107 114
108SEE ALSO 115SEE ALSO
109-------- 116--------
110linkperf:perf-record[1], linkperf:perf-trace-perl[1], 117linkperf:perf-record[1], linkperf:perf-script-perl[1],
111linkperf:perf-trace-python[1] 118linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d46b437..b6da7affbbee 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> 11'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
12'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] 12'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
13 13
14DESCRIPTION 14DESCRIPTION
15----------- 15-----------
@@ -35,24 +35,54 @@ OPTIONS
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36-p:: 36-p::
37--pid=<pid>:: 37--pid=<pid>::
38 stat events on existing pid 38 stat events on existing process id
39
40-t::
41--tid=<tid>::
42 stat events on existing thread id
43
39 44
40-a:: 45-a::
41 system-wide collection 46--all-cpus::
47 system-wide collection from all CPUs
42 48
43-c:: 49-c::
44 scale counter values 50--scale::
51 scale/normalize counter values
52
53-r::
54--repeat=<n>::
55 repeat command and print average + stddev (max: 100)
45 56
46-B:: 57-B::
58--big-num::
47 print large numbers with thousands' separators according to locale 59 print large numbers with thousands' separators according to locale
48 60
49-C:: 61-C::
50--cpu=:: 62--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a 63Count only on the list of CPUs provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 64comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary 65In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs. 66to activate system-wide monitoring. Default is to count on all CPUs.
55 67
68-A::
69--no-aggr::
70Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
71This option is only valid in system-wide mode.
72
73-n::
74--null::
75 null run - don't start any counters
76
77-v::
78--verbose::
79 be more verbose (show counter open errors, etc)
80
81-x SEP::
82--field-separator SEP::
83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP.
85
56EXAMPLES 86EXAMPLES
57-------- 87--------
58 88
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 1c4b5f5b7f71..2c3b462f64b0 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command does assorted sanity tests, initially thru linked routines but 15This command does assorted sanity tests, initially through linked routines but
16also will look for a directory with more tests in the form of scripts. 16also will look for a directory with more tests in the form of scripts.
17 17
18OPTIONS 18OPTIONS
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 4b1788355eca..d7b79e2ba2ad 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -38,6 +38,8 @@ OPTIONS
38--process:: 38--process::
39 Select the processes to display, by name or PID 39 Select the processes to display, by name or PID
40 40
41--symfs=<directory>::
42 Look for files with symbols relative to this directory.
41 43
42SEE ALSO 44SEE ALSO
43-------- 45--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 1f9687663f2a..f6eb1cdafb77 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command generates and displays a performance counter profile in realtime. 15This command generates and displays a performance counter profile in real time.
16 16
17 17
18OPTIONS 18OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
27 27
28-C <cpu-list>:: 28-C <cpu-list>::
29--cpu=<cpu>:: 29--cpu=<cpu>::
30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a 30Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 31comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS. 32Default is to monitor all CPUS.
33 33
34-d <seconds>:: 34-d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
50--count-filter=<count>:: 50--count-filter=<count>::
51 Only display functions with more events than this. 51 Only display functions with more events than this.
52 52
53-g::
54--group::
55 Put the counters into a counter group.
56
53-F <freq>:: 57-F <freq>::
54--freq=<freq>:: 58--freq=<freq>::
55 Profile at this frequency. 59 Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
68 72
69-p <pid>:: 73-p <pid>::
70--pid=<pid>:: 74--pid=<pid>::
71 Profile events on existing pid. 75 Profile events on existing Process ID.
76
77-t <tid>::
78--tid=<tid>::
79 Profile events on existing thread ID.
72 80
73-r <priority>:: 81-r <priority>::
74--realtime=<priority>:: 82--realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
78--sym-annotate=<symbol>:: 86--sym-annotate=<symbol>::
79 Annotate this symbol. 87 Annotate this symbol.
80 88
89-K::
90--hide_kernel_symbols::
91 Hide kernel symbols.
92
93-U::
94--hide_user_symbols::
95 Hide user symbols.
96
97-D::
98--dump-symtab::
99 Dump the symbol table used for profiling.
100
81-v:: 101-v::
82--verbose:: 102--verbose::
83 Be more verbose (show counter open errors, etc). 103 Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c7fc0c8f0b8..c12659d8cb26 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@ include/linux/stringify.h
7lib/rbtree.c 7lib/rbtree.c
8include/linux/swab.h 8include/linux/swab.h
9arch/*/include/asm/unistd*.h 9arch/*/include/asm/unistd*.h
10arch/*/lib/memcpy*.S
10include/linux/poison.h 11include/linux/poison.h
11include/linux/magic.h 12include/linux/magic.h
12include/linux/hw_breakpoint.h 13include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f676a4b..ac6692cf5508 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
185 ARCH := x86 185 ARCH := x86
186endif 186endif
187ifeq ($(ARCH),x86_64) 187ifeq ($(ARCH),x86_64)
188 RAW_ARCH := x86_64
188 ARCH := x86 189 ARCH := x86
190 ARCH_CFLAGS := -DARCH_X86_64
191 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
189endif 192endif
190 193
191# CFLAGS and LDFLAGS are for the users to override from the command line. 194# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
375LIB_H += util/include/linux/rbtree.h 378LIB_H += util/include/linux/rbtree.h
376LIB_H += util/include/linux/string.h 379LIB_H += util/include/linux/string.h
377LIB_H += util/include/linux/types.h 380LIB_H += util/include/linux/types.h
381LIB_H += util/include/linux/linkage.h
378LIB_H += util/include/asm/asm-offsets.h 382LIB_H += util/include/asm/asm-offsets.h
379LIB_H += util/include/asm/bug.h 383LIB_H += util/include/asm/bug.h
380LIB_H += util/include/asm/byteorder.h 384LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
383LIB_H += util/include/asm/system.h 387LIB_H += util/include/asm/system.h
384LIB_H += util/include/asm/uaccess.h 388LIB_H += util/include/asm/uaccess.h
385LIB_H += util/include/dwarf-regs.h 389LIB_H += util/include/dwarf-regs.h
390LIB_H += util/include/asm/dwarf2.h
391LIB_H += util/include/asm/cpufeature.h
386LIB_H += perf.h 392LIB_H += perf.h
387LIB_H += util/cache.h 393LIB_H += util/cache.h
388LIB_H += util/callchain.h 394LIB_H += util/callchain.h
@@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h
417LIB_H += util/probe-event.h 423LIB_H += util/probe-event.h
418LIB_H += util/pstack.h 424LIB_H += util/pstack.h
419LIB_H += util/cpumap.h 425LIB_H += util/cpumap.h
426LIB_H += $(ARCH_INCLUDE)
420 427
421LIB_OBJS += $(OUTPUT)util/abspath.o 428LIB_OBJS += $(OUTPUT)util/abspath.o
422LIB_OBJS += $(OUTPUT)util/alias.o 429LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
472# Benchmark modules 479# Benchmark modules
473BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o 480BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
474BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 481BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
482ifeq ($(RAW_ARCH),x86_64)
483BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
484endif
475BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 485BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
476 486
477BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 487BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +495,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
485BUILTIN_OBJS += $(OUTPUT)builtin-stat.o 495BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
486BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o 496BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
487BUILTIN_OBJS += $(OUTPUT)builtin-top.o 497BUILTIN_OBJS += $(OUTPUT)builtin-top.o
488BUILTIN_OBJS += $(OUTPUT)builtin-trace.o 498BUILTIN_OBJS += $(OUTPUT)builtin-script.o
489BUILTIN_OBJS += $(OUTPUT)builtin-probe.o 499BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
490BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o 500BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
491BUILTIN_OBJS += $(OUTPUT)builtin-lock.o 501BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE)
507-include config.mak 517-include config.mak
508 518
509ifndef NO_DWARF 519ifndef NO_DWARF
510FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) 520FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
511ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) 521ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 522 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 523 NO_DWARF := 1
@@ -554,7 +564,7 @@ ifndef NO_DWARF
554ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) 564ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
555 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); 565 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
556else 566else
557 BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT 567 BASIC_CFLAGS += -DDWARF_SUPPORT
558 EXTLIBS += -lelf -ldw 568 EXTLIBS += -lelf -ldw
559 LIB_OBJS += $(OUTPUT)util/probe-finder.o 569 LIB_OBJS += $(OUTPUT)util/probe-finder.o
560endif # PERF_HAVE_DWARF_REGS 570endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +901,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
891SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) 901SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
892PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) 902PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
893 903
894LIBS = $(PERFLIBS) $(EXTLIBS) 904LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
895 905
896BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ 906BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
897 $(COMPAT_CFLAGS) 907 $(COMPAT_CFLAGS)
898LIB_OBJS += $(COMPAT_OBJS) 908LIB_OBJS += $(COMPAT_OBJS)
899 909
900ALL_CFLAGS += $(BASIC_CFLAGS) 910ALL_CFLAGS += $(BASIC_CFLAGS)
911ALL_CFLAGS += $(ARCH_CFLAGS)
901ALL_LDFLAGS += $(BASIC_LDFLAGS) 912ALL_LDFLAGS += $(BASIC_LDFLAGS)
902 913
903export TAR INSTALL DESTDIR SHELL_PATH 914export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 000000000000..a72e36cb5394
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMCPY_FN(fn, name, desc) \
5 extern void *fn(void *, const void *, size_t);
6
7#include "mem-memcpy-x86-64-asm-def.h"
8
9#undef MEMCPY_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 000000000000..d588b87696fc
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,4 @@
1
2MEMCPY_FN(__memcpy,
3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 000000000000..a57b66e853c2
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,2 @@
1
2#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae7465142..db82021f4b91 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -12,6 +12,7 @@
12#include "../util/parse-options.h" 12#include "../util/parse-options.h"
13#include "../util/header.h" 13#include "../util/header.h"
14#include "bench.h" 14#include "bench.h"
15#include "mem-memcpy-arch.h"
15 16
16#include <stdio.h> 17#include <stdio.h>
17#include <stdlib.h> 18#include <stdlib.h>
@@ -23,8 +24,10 @@
23 24
24static const char *length_str = "1MB"; 25static const char *length_str = "1MB";
25static const char *routine = "default"; 26static const char *routine = "default";
26static bool use_clock = false; 27static bool use_clock;
27static int clock_fd; 28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
28 31
29static const struct option options[] = { 32static const struct option options[] = {
30 OPT_STRING('l', "length", &length_str, "1MB", 33 OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
34 "Specify routine to copy"), 37 "Specify routine to copy"),
35 OPT_BOOLEAN('c', "clock", &use_clock, 38 OPT_BOOLEAN('c', "clock", &use_clock,
36 "Use CPU clock for measuring"), 39 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
41 "Show only the result with page faults before memcpy()"),
42 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
43 "Show only the result without page faults before memcpy()"),
37 OPT_END() 44 OPT_END()
38}; 45};
39 46
47typedef void *(*memcpy_t)(void *, const void *, size_t);
48
40struct routine { 49struct routine {
41 const char *name; 50 const char *name;
42 const char *desc; 51 const char *desc;
43 void * (*fn)(void *dst, const void *src, size_t len); 52 memcpy_t fn;
44}; 53};
45 54
46struct routine routines[] = { 55struct routine routines[] = {
47 { "default", 56 { "default",
48 "Default memcpy() provided by glibc", 57 "Default memcpy() provided by glibc",
49 memcpy }, 58 memcpy },
59#ifdef ARCH_X86_64
60
61#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
62#include "mem-memcpy-x86-64-asm-def.h"
63#undef MEMCPY_FN
64
65#endif
66
50 { NULL, 67 { NULL,
51 NULL, 68 NULL,
52 NULL } 69 NULL }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
89 (double)ts->tv_usec / (double)1000000; 106 (double)ts->tv_usec / (double)1000000;
90} 107}
91 108
109static void alloc_mem(void **dst, void **src, size_t length)
110{
111 *dst = zalloc(length);
112 if (!dst)
113 die("memory allocation failed - maybe length is too large?\n");
114
115 *src = zalloc(length);
116 if (!src)
117 die("memory allocation failed - maybe length is too large?\n");
118}
119
120static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121{
122 u64 clock_start = 0ULL, clock_end = 0ULL;
123 void *src = NULL, *dst = NULL;
124
125 alloc_mem(&src, &dst, len);
126
127 if (prefault)
128 fn(dst, src, len);
129
130 clock_start = get_clock();
131 fn(dst, src, len);
132 clock_end = get_clock();
133
134 free(src);
135 free(dst);
136 return clock_end - clock_start;
137}
138
139static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140{
141 struct timeval tv_start, tv_end, tv_diff;
142 void *src = NULL, *dst = NULL;
143
144 alloc_mem(&src, &dst, len);
145
146 if (prefault)
147 fn(dst, src, len);
148
149 BUG_ON(gettimeofday(&tv_start, NULL));
150 fn(dst, src, len);
151 BUG_ON(gettimeofday(&tv_end, NULL));
152
153 timersub(&tv_end, &tv_start, &tv_diff);
154
155 free(src);
156 free(dst);
157 return (double)((double)len / timeval2double(&tv_diff));
158}
159
160#define pf (no_prefault ? 0 : 1)
161
162#define print_bps(x) do { \
163 if (x < K) \
164 printf(" %14lf B/Sec", x); \
165 else if (x < K * K) \
166 printf(" %14lfd KB/Sec", x / K); \
167 else if (x < K * K * K) \
168 printf(" %14lf MB/Sec", x / K / K); \
169 else \
170 printf(" %14lf GB/Sec", x / K / K / K); \
171 } while (0)
172
92int bench_mem_memcpy(int argc, const char **argv, 173int bench_mem_memcpy(int argc, const char **argv,
93 const char *prefix __used) 174 const char *prefix __used)
94{ 175{
95 int i; 176 int i;
96 void *dst, *src; 177 size_t len;
97 size_t length; 178 double result_bps[2];
98 double bps = 0.0; 179 u64 result_clock[2];
99 struct timeval tv_start, tv_end, tv_diff;
100 u64 clock_start, clock_end, clock_diff;
101 180
102 clock_start = clock_end = clock_diff = 0ULL;
103 argc = parse_options(argc, argv, options, 181 argc = parse_options(argc, argv, options,
104 bench_mem_memcpy_usage, 0); 182 bench_mem_memcpy_usage, 0);
105 183
106 tv_diff.tv_sec = 0; 184 if (use_clock)
107 tv_diff.tv_usec = 0; 185 init_clock();
108 length = (size_t)perf_atoll((char *)length_str); 186
187 len = (size_t)perf_atoll((char *)length_str);
109 188
110 if ((s64)length <= 0) { 189 result_clock[0] = result_clock[1] = 0ULL;
190 result_bps[0] = result_bps[1] = 0.0;
191
192 if ((s64)len <= 0) {
111 fprintf(stderr, "Invalid length:%s\n", length_str); 193 fprintf(stderr, "Invalid length:%s\n", length_str);
112 return 1; 194 return 1;
113 } 195 }
114 196
197 /* same to without specifying either of prefault and no-prefault */
198 if (only_prefault && no_prefault)
199 only_prefault = no_prefault = false;
200
115 for (i = 0; routines[i].name; i++) { 201 for (i = 0; routines[i].name; i++) {
116 if (!strcmp(routines[i].name, routine)) 202 if (!strcmp(routines[i].name, routine))
117 break; 203 break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
126 return 1; 212 return 1;
127 } 213 }
128 214
129 dst = zalloc(length); 215 if (bench_format == BENCH_FORMAT_DEFAULT)
130 if (!dst) 216 printf("# Copying %s Bytes ...\n\n", length_str);
131 die("memory allocation failed - maybe length is too large?\n");
132
133 src = zalloc(length);
134 if (!src)
135 die("memory allocation failed - maybe length is too large?\n");
136
137 if (bench_format == BENCH_FORMAT_DEFAULT) {
138 printf("# Copying %s Bytes from %p to %p ...\n\n",
139 length_str, src, dst);
140 }
141
142 if (use_clock) {
143 init_clock();
144 clock_start = get_clock();
145 } else {
146 BUG_ON(gettimeofday(&tv_start, NULL));
147 }
148
149 routines[i].fn(dst, src, length);
150 217
151 if (use_clock) { 218 if (!only_prefault && !no_prefault) {
152 clock_end = get_clock(); 219 /* show both of results */
153 clock_diff = clock_end - clock_start; 220 if (use_clock) {
221 result_clock[0] =
222 do_memcpy_clock(routines[i].fn, len, false);
223 result_clock[1] =
224 do_memcpy_clock(routines[i].fn, len, true);
225 } else {
226 result_bps[0] =
227 do_memcpy_gettimeofday(routines[i].fn,
228 len, false);
229 result_bps[1] =
230 do_memcpy_gettimeofday(routines[i].fn,
231 len, true);
232 }
154 } else { 233 } else {
155 BUG_ON(gettimeofday(&tv_end, NULL)); 234 if (use_clock) {
156 timersub(&tv_end, &tv_start, &tv_diff); 235 result_clock[pf] =
157 bps = (double)((double)length / timeval2double(&tv_diff)); 236 do_memcpy_clock(routines[i].fn,
237 len, only_prefault);
238 } else {
239 result_bps[pf] =
240 do_memcpy_gettimeofday(routines[i].fn,
241 len, only_prefault);
242 }
158 } 243 }
159 244
160 switch (bench_format) { 245 switch (bench_format) {
161 case BENCH_FORMAT_DEFAULT: 246 case BENCH_FORMAT_DEFAULT:
162 if (use_clock) { 247 if (!only_prefault && !no_prefault) {
163 printf(" %14lf Clock/Byte\n", 248 if (use_clock) {
164 (double)clock_diff / (double)length); 249 printf(" %14lf Clock/Byte\n",
165 } else { 250 (double)result_clock[0]
166 if (bps < K) 251 / (double)len);
167 printf(" %14lf B/Sec\n", bps); 252 printf(" %14lf Clock/Byte (with prefault)\n",
168 else if (bps < K * K) 253 (double)result_clock[1]
169 printf(" %14lfd KB/Sec\n", bps / 1024); 254 / (double)len);
170 else if (bps < K * K * K) 255 } else {
171 printf(" %14lf MB/Sec\n", bps / 1024 / 1024); 256 print_bps(result_bps[0]);
172 else { 257 printf("\n");
173 printf(" %14lf GB/Sec\n", 258 print_bps(result_bps[1]);
174 bps / 1024 / 1024 / 1024); 259 printf(" (with prefault)\n");
175 } 260 }
261 } else {
262 if (use_clock) {
263 printf(" %14lf Clock/Byte",
264 (double)result_clock[pf]
265 / (double)len);
266 } else
267 print_bps(result_bps[pf]);
268
269 printf("%s\n", only_prefault ? " (with prefault)" : "");
176 } 270 }
177 break; 271 break;
178 case BENCH_FORMAT_SIMPLE: 272 case BENCH_FORMAT_SIMPLE:
179 if (use_clock) { 273 if (!only_prefault && !no_prefault) {
180 printf("%14lf\n", 274 if (use_clock) {
181 (double)clock_diff / (double)length); 275 printf("%lf %lf\n",
182 } else 276 (double)result_clock[0] / (double)len,
183 printf("%lf\n", bps); 277 (double)result_clock[1] / (double)len);
278 } else {
279 printf("%lf %lf\n",
280 result_bps[0], result_bps[1]);
281 }
282 } else {
283 if (use_clock) {
284 printf("%lf\n", (double)result_clock[pf]
285 / (double)len);
286 } else
287 printf("%lf\n", result_bps[pf]);
288 }
184 break; 289 break;
185 default: 290 default:
186 /* reaching this means there's some disaster: */ 291 /* reaching this means there's some disaster: */
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6d5604d8df95..c056cdc06912 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
58 return hist_entry__inc_addr_samples(he, al->addr); 58 return hist_entry__inc_addr_samples(he, al->addr);
59} 59}
60 60
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct sample_data *sample,
62 struct perf_session *session)
62{ 63{
63 struct addr_location al; 64 struct addr_location al;
64 struct sample_data data;
65 65
66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 68 event->header.type);
69 return -1; 69 return -1;
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
375 .mmap = event__process_mmap, 375 .mmap = event__process_mmap,
376 .comm = event__process_comm, 376 .comm = event__process_comm,
377 .fork = event__process_task, 377 .fork = event__process_task,
378 .ordered_samples = true,
379 .ordering_requires_timestamps = true,
378}; 380};
379 381
380static int __cmd_annotate(void) 382static int __cmd_annotate(void)
@@ -382,7 +384,7 @@ static int __cmd_annotate(void)
382 int ret; 384 int ret;
383 struct perf_session *session; 385 struct perf_session *session;
384 386
385 session = perf_session__new(input_name, O_RDONLY, force, false); 387 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
386 if (session == NULL) 388 if (session == NULL)
387 return -ENOMEM; 389 return -ENOMEM;
388 390
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index c49837de7d3f..5af32ae9031e 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -38,7 +38,8 @@ static int __cmd_buildid_list(void)
38{ 38{
39 struct perf_session *session; 39 struct perf_session *session;
40 40
41 session = perf_session__new(input_name, O_RDONLY, force, false); 41 session = perf_session__new(input_name, O_RDONLY, force, false,
42 &build_id__mark_dso_hit_ops);
42 if (session == NULL) 43 if (session == NULL)
43 return -1; 44 return -1;
44 45
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index fca1d4402910..3153e492dbcc 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 30 return -ENOMEM;
31} 31}
32 32
33static int diff__process_sample_event(event_t *event, struct perf_session *session) 33static int diff__process_sample_event(event_t *event,
34 struct sample_data *sample,
35 struct perf_session *session)
34{ 36{
35 struct addr_location al; 37 struct addr_location al;
36 struct sample_data data = { .period = 1, };
37 38
38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
39 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
40 event->header.type); 41 event->header.type);
41 return -1; 42 return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
44 if (al.filtered || al.sym == NULL) 45 if (al.filtered || al.sym == NULL)
45 return 0; 46 return 0;
46 47
47 if (hists__add_entry(&session->hists, &al, data.period)) { 48 if (hists__add_entry(&session->hists, &al, sample->period)) {
48 pr_warning("problem incrementing symbol period, skipping event\n"); 49 pr_warning("problem incrementing symbol period, skipping event\n");
49 return -1; 50 return -1;
50 } 51 }
51 52
52 session->hists.stats.total_period += data.period; 53 session->hists.stats.total_period += sample->period;
53 return 0; 54 return 0;
54} 55}
55 56
@@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = {
60 .exit = event__process_task, 61 .exit = event__process_task,
61 .fork = event__process_task, 62 .fork = event__process_task,
62 .lost = event__process_lost, 63 .lost = event__process_lost,
64 .ordered_samples = true,
65 .ordering_requires_timestamps = true,
63}; 66};
64 67
65static void perf_session__insert_hist_entry_by_name(struct rb_root *root, 68static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@ static int __cmd_diff(void)
141 int ret, i; 144 int ret, i;
142 struct perf_session *session[2]; 145 struct perf_session *session[2];
143 146
144 session[0] = perf_session__new(input_old, O_RDONLY, force, false); 147 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
145 session[1] = perf_session__new(input_new, O_RDONLY, force, false); 148 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
146 if (session[0] == NULL || session[1] == NULL) 149 if (session[0] == NULL || session[1] == NULL)
147 return -ENOMEM; 150 return -ENOMEM;
148 151
@@ -173,7 +176,7 @@ static const char * const diff_usage[] = {
173static const struct option options[] = { 176static const struct option options[] = {
174 OPT_INCR('v', "verbose", &verbose, 177 OPT_INCR('v', "verbose", &verbose,
175 "be more verbose (show symbol address, etc)"), 178 "be more verbose (show symbol address, etc)"),
176 OPT_BOOLEAN('m', "displacement", &show_displacement, 179 OPT_BOOLEAN('M', "displacement", &show_displacement,
177 "Show position displacement relative to baseline"), 180 "Show position displacement relative to baseline"),
178 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 181 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
179 "dump raw trace in ASCII"), 182 "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@ static const struct option options[] = {
191 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 194 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
192 "separator for columns, no spaces will be added between " 195 "separator for columns, no spaces will be added between "
193 "columns '.' is reserved."), 196 "columns '.' is reserved."),
197 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
198 "Look for files with symbols relative to this directory"),
194 OPT_END() 199 OPT_END()
195}; 200};
196 201
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8e3e47b064ce..0c78ffa7bf67 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
16static char const *input_name = "-"; 16static char const *input_name = "-";
17static bool inject_build_ids; 17static bool inject_build_ids;
18 18
19static int event__repipe(event_t *event __used, 19static int event__repipe_synth(event_t *event,
20 struct perf_session *session __used) 20 struct perf_session *session __used)
21{ 21{
22 uint32_t size; 22 uint32_t size;
23 void *buf = event; 23 void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
36 return 0; 36 return 0;
37} 37}
38 38
39static int event__repipe_mmap(event_t *self, struct perf_session *session) 39static int event__repipe(event_t *event, struct sample_data *sample __used,
40 struct perf_session *session)
41{
42 return event__repipe_synth(event, session);
43}
44
45static int event__repipe_mmap(event_t *self, struct sample_data *sample,
46 struct perf_session *session)
40{ 47{
41 int err; 48 int err;
42 49
43 err = event__process_mmap(self, session); 50 err = event__process_mmap(self, sample, session);
44 event__repipe(self, session); 51 event__repipe(self, sample, session);
45 52
46 return err; 53 return err;
47} 54}
48 55
49static int event__repipe_task(event_t *self, struct perf_session *session) 56static int event__repipe_task(event_t *self, struct sample_data *sample,
57 struct perf_session *session)
50{ 58{
51 int err; 59 int err;
52 60
53 err = event__process_task(self, session); 61 err = event__process_task(self, sample, session);
54 event__repipe(self, session); 62 event__repipe(self, sample, session);
55 63
56 return err; 64 return err;
57} 65}
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
61{ 69{
62 int err; 70 int err;
63 71
64 event__repipe(self, session); 72 event__repipe_synth(self, session);
65 err = event__process_tracing_data(self, session); 73 err = event__process_tracing_data(self, session);
66 74
67 return err; 75 return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
111 return 0; 119 return 0;
112} 120}
113 121
114static int event__inject_buildid(event_t *event, struct perf_session *session) 122static int event__inject_buildid(event_t *event, struct sample_data *sample,
123 struct perf_session *session)
115{ 124{
116 struct addr_location al; 125 struct addr_location al;
117 struct thread *thread; 126 struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
146 } 155 }
147 156
148repipe: 157repipe:
149 event__repipe(event, session); 158 event__repipe(event, sample, session);
150 return 0; 159 return 0;
151} 160}
152 161
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
160 .read = event__repipe, 169 .read = event__repipe,
161 .throttle = event__repipe, 170 .throttle = event__repipe,
162 .unthrottle = event__repipe, 171 .unthrottle = event__repipe,
163 .attr = event__repipe, 172 .attr = event__repipe_synth,
164 .event_type = event__repipe, 173 .event_type = event__repipe_synth,
165 .tracing_data = event__repipe, 174 .tracing_data = event__repipe_synth,
166 .build_id = event__repipe, 175 .build_id = event__repipe_synth,
167}; 176};
168 177
169extern volatile int session_done; 178extern volatile int session_done;
@@ -187,7 +196,7 @@ static int __cmd_inject(void)
187 inject_ops.tracing_data = event__repipe_tracing_data; 196 inject_ops.tracing_data = event__repipe_tracing_data;
188 } 197 }
189 198
190 session = perf_session__new(input_name, O_RDONLY, false, true); 199 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
191 if (session == NULL) 200 if (session == NULL)
192 return -ENOMEM; 201 return -ENOMEM;
193 202
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 31f60a2535e0..def7ddc2fd4f 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
304 } 304 }
305} 305}
306 306
307static int process_sample_event(event_t *event, struct perf_session *session) 307static int process_sample_event(event_t *event, struct sample_data *sample,
308 struct perf_session *session)
308{ 309{
309 struct sample_data data; 310 struct thread *thread = perf_session__findnew(session, event->ip.pid);
310 struct thread *thread;
311 311
312 memset(&data, 0, sizeof(data));
313 data.time = -1;
314 data.cpu = -1;
315 data.period = 1;
316
317 event__parse_sample(event, session->sample_type, &data);
318
319 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
320 data.pid, data.tid, data.ip, data.period);
321
322 thread = perf_session__findnew(session, event->ip.pid);
323 if (thread == NULL) { 312 if (thread == NULL) {
324 pr_debug("problem processing %d event, skipping it.\n", 313 pr_debug("problem processing %d event, skipping it.\n",
325 event->header.type); 314 event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
328 317
329 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 318 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
330 319
331 process_raw_event(event, data.raw_data, data.cpu, 320 process_raw_event(event, sample->raw_data, sample->cpu,
332 data.time, thread); 321 sample->time, thread);
333 322
334 return 0; 323 return 0;
335} 324}
@@ -492,7 +481,8 @@ static void sort_result(void)
492static int __cmd_kmem(void) 481static int __cmd_kmem(void)
493{ 482{
494 int err = -EINVAL; 483 int err = -EINVAL;
495 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 484 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
485 0, false, &event_ops);
496 if (session == NULL) 486 if (session == NULL)
497 return -ENOMEM; 487 return -ENOMEM;
498 488
@@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv)
747 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 737 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
748 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 738 rec_argv = calloc(rec_argc + 1, sizeof(char *));
749 739
740 if (rec_argv == NULL)
741 return -ENOMEM;
742
750 for (i = 0; i < ARRAY_SIZE(record_args); i++) 743 for (i = 0; i < ARRAY_SIZE(record_args); i++)
751 rec_argv[i] = strdup(record_args[i]); 744 rec_argv[i] = strdup(record_args[i]);
752 745
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 821c1586a22b..b9c6e5432971 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,22 +834,18 @@ static void dump_info(void)
834 die("Unknown type of information\n"); 834 die("Unknown type of information\n");
835} 835}
836 836
837static int process_sample_event(event_t *self, struct perf_session *s) 837static int process_sample_event(event_t *self, struct sample_data *sample,
838 struct perf_session *s)
838{ 839{
839 struct sample_data data; 840 struct thread *thread = perf_session__findnew(s, sample->tid);
840 struct thread *thread;
841 841
842 bzero(&data, sizeof(data));
843 event__parse_sample(self, s->sample_type, &data);
844
845 thread = perf_session__findnew(s, data.tid);
846 if (thread == NULL) { 842 if (thread == NULL) {
847 pr_debug("problem processing %d event, skipping it.\n", 843 pr_debug("problem processing %d event, skipping it.\n",
848 self->header.type); 844 self->header.type);
849 return -1; 845 return -1;
850 } 846 }
851 847
852 process_raw_event(data.raw_data, data.cpu, data.time, thread); 848 process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
853 849
854 return 0; 850 return 0;
855} 851}
@@ -862,7 +858,7 @@ static struct perf_event_ops eops = {
862 858
863static int read_events(void) 859static int read_events(void)
864{ 860{
865 session = perf_session__new(input_name, O_RDONLY, 0, false); 861 session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
866 if (!session) 862 if (!session)
867 die("Initializing perf session failed\n"); 863 die("Initializing perf session failed\n");
868 864
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
947 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 943 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
948 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 944 rec_argv = calloc(rec_argc + 1, sizeof(char *));
949 945
946 if (rec_argv == NULL)
947 return -ENOMEM;
948
950 for (i = 0; i < ARRAY_SIZE(record_args); i++) 949 for (i = 0; i < ARRAY_SIZE(record_args); i++)
951 rec_argv[i] = strdup(record_args[i]); 950 rec_argv[i] = strdup(record_args[i]);
952 951
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
982 usage_with_options(report_usage, report_options); 981 usage_with_options(report_usage, report_options);
983 } 982 }
984 __cmd_report(); 983 __cmd_report();
985 } else if (!strcmp(argv[0], "trace")) { 984 } else if (!strcmp(argv[0], "script")) {
986 /* Aliased to 'perf trace' */ 985 /* Aliased to 'perf script' */
987 return cmd_trace(argc, argv, prefix); 986 return cmd_script(argc, argv, prefix);
988 } else if (!strcmp(argv[0], "info")) { 987 } else if (!strcmp(argv[0], "info")) {
989 if (argc) { 988 if (argc) {
990 argc = parse_options(argc, argv, 989 argc = parse_options(argc, argv,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 564491fa18b2..50efbd509b8f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -36,6 +36,7 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
36 36
37static u64 user_interval = ULLONG_MAX; 37static u64 user_interval = ULLONG_MAX;
38static u64 default_interval = 0; 38static u64 default_interval = 0;
39static u64 sample_type;
39 40
40static int nr_cpus = 0; 41static int nr_cpus = 0;
41static unsigned int page_size; 42static unsigned int page_size;
@@ -48,6 +49,7 @@ static const char *output_name = "perf.data";
48static int group = 0; 49static int group = 0;
49static int realtime_prio = 0; 50static int realtime_prio = 0;
50static bool raw_samples = false; 51static bool raw_samples = false;
52static bool sample_id_all_avail = true;
51static bool system_wide = false; 53static bool system_wide = false;
52static pid_t target_pid = -1; 54static pid_t target_pid = -1;
53static pid_t target_tid = -1; 55static pid_t target_tid = -1;
@@ -60,7 +62,9 @@ static bool call_graph = false;
60static bool inherit_stat = false; 62static bool inherit_stat = false;
61static bool no_samples = false; 63static bool no_samples = false;
62static bool sample_address = false; 64static bool sample_address = false;
65static bool sample_time = false;
63static bool no_buildid = false; 66static bool no_buildid = false;
67static bool no_buildid_cache = false;
64 68
65static long samples = 0; 69static long samples = 0;
66static u64 bytes_written = 0; 70static u64 bytes_written = 0;
@@ -128,6 +132,7 @@ static void write_output(void *buf, size_t size)
128} 132}
129 133
130static int process_synthesized_event(event_t *event, 134static int process_synthesized_event(event_t *event,
135 struct sample_data *sample __used,
131 struct perf_session *self __used) 136 struct perf_session *self __used)
132{ 137{
133 write_output(event, event->header.size); 138 write_output(event, event->header.size);
@@ -238,6 +243,19 @@ static void create_counter(int counter, int cpu)
238 u64 time_running; 243 u64 time_running;
239 u64 id; 244 u64 id;
240 } read_data; 245 } read_data;
246 /*
247 * Check if parse_single_tracepoint_event has already asked for
248 * PERF_SAMPLE_TIME.
249 *
250 * XXX this is kludgy but short term fix for problems introduced by
251 * eac23d1c that broke 'perf script' by having different sample_types
252 * when using multiple tracepoint events when we use a perf binary
253 * that tries to use sample_id_all on an older kernel.
254 *
255 * We need to move counter creation to perf_session, support
256 * different sample_types, etc.
257 */
258 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
241 259
242 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 260 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
243 PERF_FORMAT_TOTAL_TIME_RUNNING | 261 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +298,10 @@ static void create_counter(int counter, int cpu)
280 if (system_wide) 298 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU; 299 attr->sample_type |= PERF_SAMPLE_CPU;
282 300
301 if (sample_id_all_avail &&
302 (sample_time || system_wide || !no_inherit || cpu_list))
303 attr->sample_type |= PERF_SAMPLE_TIME;
304
283 if (raw_samples) { 305 if (raw_samples) {
284 attr->sample_type |= PERF_SAMPLE_TIME; 306 attr->sample_type |= PERF_SAMPLE_TIME;
285 attr->sample_type |= PERF_SAMPLE_RAW; 307 attr->sample_type |= PERF_SAMPLE_RAW;
@@ -293,6 +315,8 @@ static void create_counter(int counter, int cpu)
293 attr->disabled = 1; 315 attr->disabled = 1;
294 attr->enable_on_exec = 1; 316 attr->enable_on_exec = 1;
295 } 317 }
318retry_sample_id:
319 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
296 320
297 for (thread_index = 0; thread_index < thread_num; thread_index++) { 321 for (thread_index = 0; thread_index < thread_num; thread_index++) {
298try_again: 322try_again:
@@ -309,6 +333,15 @@ try_again:
309 else if (err == ENODEV && cpu_list) { 333 else if (err == ENODEV && cpu_list) {
310 die("No such device - did you specify" 334 die("No such device - did you specify"
311 " an out-of-range profile CPU?\n"); 335 " an out-of-range profile CPU?\n");
336 } else if (err == EINVAL && sample_id_all_avail) {
337 /*
338 * Old kernel, no attr->sample_id_type_all field
339 */
340 sample_id_all_avail = false;
341 if (!sample_time && !raw_samples && !time_needed)
342 attr->sample_type &= ~PERF_SAMPLE_TIME;
343
344 goto retry_sample_id;
312 } 345 }
313 346
314 /* 347 /*
@@ -326,7 +359,7 @@ try_again:
326 goto try_again; 359 goto try_again;
327 } 360 }
328 printf("\n"); 361 printf("\n");
329 error("perfcounter syscall returned with %d (%s)\n", 362 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
330 fd[nr_cpu][counter][thread_index], strerror(err)); 363 fd[nr_cpu][counter][thread_index], strerror(err));
331 364
332#if defined(__i386__) || defined(__x86_64__) 365#if defined(__i386__) || defined(__x86_64__)
@@ -406,6 +439,9 @@ try_again:
406 } 439 }
407 } 440 }
408 } 441 }
442
443 if (!sample_type)
444 sample_type = attr->sample_type;
409} 445}
410 446
411static void open_counters(int cpu) 447static void open_counters(int cpu)
@@ -437,7 +473,8 @@ static void atexit_header(void)
437 if (!pipe_output) { 473 if (!pipe_output) {
438 session->header.data_size += bytes_written; 474 session->header.data_size += bytes_written;
439 475
440 process_buildids(); 476 if (!no_buildid)
477 process_buildids();
441 perf_header__write(&session->header, output, true); 478 perf_header__write(&session->header, output, true);
442 perf_session__delete(session); 479 perf_session__delete(session);
443 symbol__exit(); 480 symbol__exit();
@@ -552,12 +589,15 @@ static int __cmd_record(int argc, const char **argv)
552 } 589 }
553 590
554 session = perf_session__new(output_name, O_WRONLY, 591 session = perf_session__new(output_name, O_WRONLY,
555 write_mode == WRITE_FORCE, false); 592 write_mode == WRITE_FORCE, false, NULL);
556 if (session == NULL) { 593 if (session == NULL) {
557 pr_err("Not enough memory for reading perf file header\n"); 594 pr_err("Not enough memory for reading perf file header\n");
558 return -1; 595 return -1;
559 } 596 }
560 597
598 if (!no_buildid)
599 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
600
561 if (!file_new) { 601 if (!file_new) {
562 err = perf_header__read(session, output); 602 err = perf_header__read(session, output);
563 if (err < 0) 603 if (err < 0)
@@ -639,6 +679,8 @@ static int __cmd_record(int argc, const char **argv)
639 open_counters(cpumap[i]); 679 open_counters(cpumap[i]);
640 } 680 }
641 681
682 perf_session__set_sample_type(session, sample_type);
683
642 if (pipe_output) { 684 if (pipe_output) {
643 err = perf_header__write_pipe(output); 685 err = perf_header__write_pipe(output);
644 if (err < 0) 686 if (err < 0)
@@ -651,6 +693,8 @@ static int __cmd_record(int argc, const char **argv)
651 693
652 post_processing_offset = lseek(output, 0, SEEK_CUR); 694 post_processing_offset = lseek(output, 0, SEEK_CUR);
653 695
696 perf_session__set_sample_id_all(session, sample_id_all_avail);
697
654 if (pipe_output) { 698 if (pipe_output) {
655 err = event__synthesize_attrs(&session->header, 699 err = event__synthesize_attrs(&session->header,
656 process_synthesized_event, 700 process_synthesized_event,
@@ -831,10 +875,13 @@ const struct option record_options[] = {
831 "per thread counts"), 875 "per thread counts"),
832 OPT_BOOLEAN('d', "data", &sample_address, 876 OPT_BOOLEAN('d', "data", &sample_address,
833 "Sample addresses"), 877 "Sample addresses"),
878 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
834 OPT_BOOLEAN('n', "no-samples", &no_samples, 879 OPT_BOOLEAN('n', "no-samples", &no_samples,
835 "don't sample"), 880 "don't sample"),
836 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid, 881 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
837 "do not update the buildid cache"), 882 "do not update the buildid cache"),
883 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
884 "do not collect buildids in perf.data"),
838 OPT_END() 885 OPT_END()
839}; 886};
840 887
@@ -859,7 +906,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
859 } 906 }
860 907
861 symbol__init(); 908 symbol__init();
862 if (no_buildid) 909
910 if (no_buildid_cache || no_buildid)
863 disable_buildid_cache(); 911 disable_buildid_cache();
864 912
865 if (!nr_counters) { 913 if (!nr_counters) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5de405d45230..75183a4518e6 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
150 return 0; 150 return 0;
151} 151}
152 152
153static int process_sample_event(event_t *event, struct perf_session *session) 153static int process_sample_event(event_t *event, struct sample_data *sample,
154 struct perf_session *session)
154{ 155{
155 struct sample_data data = { .period = 1, };
156 struct addr_location al; 156 struct addr_location al;
157 struct perf_event_attr *attr; 157 struct perf_event_attr *attr;
158 158
159 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 160 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 161 event->header.type);
162 return -1; 162 return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
165 if (al.filtered || (hide_unresolved && al.sym == NULL)) 165 if (al.filtered || (hide_unresolved && al.sym == NULL))
166 return 0; 166 return 0;
167 167
168 if (perf_session__add_hist_entry(session, &al, &data)) { 168 if (perf_session__add_hist_entry(session, &al, sample)) {
169 pr_debug("problem incrementing symbol period, skipping event\n"); 169 pr_debug("problem incrementing symbol period, skipping event\n");
170 return -1; 170 return -1;
171 } 171 }
172 172
173 attr = perf_header__find_attr(data.id, &session->header); 173 attr = perf_header__find_attr(sample->id, &session->header);
174 174
175 if (add_event_total(session, &data, attr)) { 175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n"); 176 pr_debug("problem adding event period\n");
177 return -1; 177 return -1;
178 } 178 }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
180 return 0; 180 return 0;
181} 181}
182 182
183static int process_read_event(event_t *event, struct perf_session *session __used) 183static int process_read_event(event_t *event, struct sample_data *sample __used,
184 struct perf_session *session __used)
184{ 185{
185 struct perf_event_attr *attr; 186 struct perf_event_attr *attr;
186 187
@@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = {
243 .event_type = event__process_event_type, 244 .event_type = event__process_event_type,
244 .tracing_data = event__process_tracing_data, 245 .tracing_data = event__process_tracing_data,
245 .build_id = event__process_build_id, 246 .build_id = event__process_build_id,
247 .ordered_samples = true,
248 .ordering_requires_timestamps = true,
246}; 249};
247 250
248extern volatile int session_done; 251extern volatile int session_done;
@@ -307,7 +310,7 @@ static int __cmd_report(void)
307 310
308 signal(SIGINT, sig_handler); 311 signal(SIGINT, sig_handler);
309 312
310 session = perf_session__new(input_name, O_RDONLY, force, false); 313 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
311 if (session == NULL) 314 if (session == NULL)
312 return -ENOMEM; 315 return -ENOMEM;
313 316
@@ -442,6 +445,8 @@ static const struct option options[] = {
442 "dump raw trace in ASCII"), 445 "dump raw trace in ASCII"),
443 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 446 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
444 "file", "vmlinux pathname"), 447 "file", "vmlinux pathname"),
448 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
449 "file", "kallsyms pathname"),
445 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 450 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
446 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 451 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
447 "load module symbols - WARNING: use only with -k and LIVE kernel"), 452 "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@ static const struct option options[] = {
478 "columns '.' is reserved."), 483 "columns '.' is reserved."),
479 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, 484 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
480 "Only display entries resolved to a symbol"), 485 "Only display entries resolved to a symbol"),
486 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
487 "Look for files with symbols relative to this directory"),
481 OPT_END() 488 OPT_END()
482}; 489};
483 490
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 55f3b5dcc731..7a4ebeb8b016 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1607} 1607}
1608 1608
1609static int process_sample_event(event_t *event, struct perf_session *session) 1609static int process_sample_event(event_t *event, struct sample_data *sample,
1610 struct perf_session *session)
1610{ 1611{
1611 struct sample_data data;
1612 struct thread *thread; 1612 struct thread *thread;
1613 1613
1614 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1614 if (!(session->sample_type & PERF_SAMPLE_RAW))
1615 return 0; 1615 return 0;
1616 1616
1617 memset(&data, 0, sizeof(data)); 1617 thread = perf_session__findnew(session, sample->pid);
1618 data.time = -1;
1619 data.cpu = -1;
1620 data.period = -1;
1621
1622 event__parse_sample(event, session->sample_type, &data);
1623
1624 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
1625 data.pid, data.tid, data.ip, data.period);
1626
1627 thread = perf_session__findnew(session, data.pid);
1628 if (thread == NULL) { 1618 if (thread == NULL) {
1629 pr_debug("problem processing %d event, skipping it.\n", 1619 pr_debug("problem processing %d event, skipping it.\n",
1630 event->header.type); 1620 event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
1633 1623
1634 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1624 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1635 1625
1636 if (profile_cpu != -1 && profile_cpu != (int)data.cpu) 1626 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
1637 return 0; 1627 return 0;
1638 1628
1639 process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread); 1629 process_raw_event(event, session, sample->raw_data, sample->cpu,
1630 sample->time, thread);
1640 1631
1641 return 0; 1632 return 0;
1642} 1633}
@@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = {
1652static int read_events(void) 1643static int read_events(void)
1653{ 1644{
1654 int err = -EINVAL; 1645 int err = -EINVAL;
1655 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 1646 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
1647 0, false, &event_ops);
1656 if (session == NULL) 1648 if (session == NULL)
1657 return -ENOMEM; 1649 return -ENOMEM;
1658 1650
@@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv)
1869 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1861 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1870 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1862 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1871 1863
1864 if (rec_argv)
1865 return -ENOMEM;
1866
1872 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1867 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1873 rec_argv[i] = strdup(record_args[i]); 1868 rec_argv[i] = strdup(record_args[i]);
1874 1869
@@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1888 usage_with_options(sched_usage, sched_options); 1883 usage_with_options(sched_usage, sched_options);
1889 1884
1890 /* 1885 /*
1891 * Aliased to 'perf trace' for now: 1886 * Aliased to 'perf script' for now:
1892 */ 1887 */
1893 if (!strcmp(argv[0], "trace")) 1888 if (!strcmp(argv[0], "script"))
1894 return cmd_trace(argc, argv, prefix); 1889 return cmd_script(argc, argv, prefix);
1895 1890
1896 symbol__init(); 1891 symbol__init();
1897 if (!strncmp(argv[0], "rec", 3)) { 1892 if (!strncmp(argv[0], "rec", 3)) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c
index 86cfe3800e6b..43480fd66db7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-script.c
@@ -56,29 +56,18 @@ static void setup_scripting(void)
56 56
57static int cleanup_scripting(void) 57static int cleanup_scripting(void)
58{ 58{
59 pr_debug("\nperf trace script stopped\n"); 59 pr_debug("\nperf script stopped\n");
60 60
61 return scripting_ops->stop_script(); 61 return scripting_ops->stop_script();
62} 62}
63 63
64static char const *input_name = "perf.data"; 64static char const *input_name = "perf.data";
65 65
66static int process_sample_event(event_t *event, struct perf_session *session) 66static int process_sample_event(event_t *event, struct sample_data *sample,
67 struct perf_session *session)
67{ 68{
68 struct sample_data data; 69 struct thread *thread = perf_session__findnew(session, event->ip.pid);
69 struct thread *thread;
70 70
71 memset(&data, 0, sizeof(data));
72 data.time = -1;
73 data.cpu = -1;
74 data.period = 1;
75
76 event__parse_sample(event, session->sample_type, &data);
77
78 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
79 data.pid, data.tid, data.ip, data.period);
80
81 thread = perf_session__findnew(session, event->ip.pid);
82 if (thread == NULL) { 71 if (thread == NULL) {
83 pr_debug("problem processing %d event, skipping it.\n", 72 pr_debug("problem processing %d event, skipping it.\n",
84 event->header.type); 73 event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
87 76
88 if (session->sample_type & PERF_SAMPLE_RAW) { 77 if (session->sample_type & PERF_SAMPLE_RAW) {
89 if (debug_mode) { 78 if (debug_mode) {
90 if (data.time < last_timestamp) { 79 if (sample->time < last_timestamp) {
91 pr_err("Samples misordered, previous: %llu " 80 pr_err("Samples misordered, previous: %llu "
92 "this: %llu\n", last_timestamp, 81 "this: %llu\n", last_timestamp,
93 data.time); 82 sample->time);
94 nr_unordered++; 83 nr_unordered++;
95 } 84 }
96 last_timestamp = data.time; 85 last_timestamp = sample->time;
97 return 0; 86 return 0;
98 } 87 }
99 /* 88 /*
@@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
101 * field, although it should be the same than this perf 90 * field, although it should be the same than this perf
102 * event pid 91 * event pid
103 */ 92 */
104 scripting_ops->process_event(data.cpu, data.raw_data, 93 scripting_ops->process_event(sample->cpu, sample->raw_data,
105 data.raw_size, 94 sample->raw_size,
106 data.time, thread->comm); 95 sample->time, thread->comm);
107 } 96 }
108 97
109 session->hists.stats.total_period += data.period; 98 session->hists.stats.total_period += sample->period;
110 return 0; 99 return 0;
111} 100}
112 101
113static u64 nr_lost; 102static u64 nr_lost;
114 103
115static int process_lost_event(event_t *event, struct perf_session *session __used) 104static int process_lost_event(event_t *event, struct sample_data *sample __used,
105 struct perf_session *session __used)
116{ 106{
117 nr_lost += event->lost.lost; 107 nr_lost += event->lost.lost;
118 108
@@ -127,6 +117,7 @@ static struct perf_event_ops event_ops = {
127 .tracing_data = event__process_tracing_data, 117 .tracing_data = event__process_tracing_data,
128 .build_id = event__process_build_id, 118 .build_id = event__process_build_id,
129 .lost = process_lost_event, 119 .lost = process_lost_event,
120 .ordering_requires_timestamps = true,
130 .ordered_samples = true, 121 .ordered_samples = true,
131}; 122};
132 123
@@ -137,7 +128,7 @@ static void sig_handler(int sig __unused)
137 session_done = 1; 128 session_done = 1;
138} 129}
139 130
140static int __cmd_trace(struct perf_session *session) 131static int __cmd_script(struct perf_session *session)
141{ 132{
142 int ret; 133 int ret;
143 134
@@ -247,7 +238,7 @@ static void list_available_languages(void)
247 238
248 fprintf(stderr, "\n"); 239 fprintf(stderr, "\n");
249 fprintf(stderr, "Scripting language extensions (used in " 240 fprintf(stderr, "Scripting language extensions (used in "
250 "perf trace -s [spec:]script.[spec]):\n\n"); 241 "perf script -s [spec:]script.[spec]):\n\n");
251 242
252 list_for_each_entry(s, &script_specs, node) 243 list_for_each_entry(s, &script_specs, node)
253 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); 244 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +292,34 @@ static int parse_scriptname(const struct option *opt __used,
301 return 0; 292 return 0;
302} 293}
303 294
304#define for_each_lang(scripts_dir, lang_dirent, lang_next) \ 295/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
296static int is_directory(const char *base_path, const struct dirent *dent)
297{
298 char path[PATH_MAX];
299 struct stat st;
300
301 sprintf(path, "%s/%s", base_path, dent->d_name);
302 if (stat(path, &st))
303 return 0;
304
305 return S_ISDIR(st.st_mode);
306}
307
308#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
305 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ 309 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \
306 lang_next) \ 310 lang_next) \
307 if (lang_dirent.d_type == DT_DIR && \ 311 if ((lang_dirent.d_type == DT_DIR || \
312 (lang_dirent.d_type == DT_UNKNOWN && \
313 is_directory(scripts_path, &lang_dirent))) && \
308 (strcmp(lang_dirent.d_name, ".")) && \ 314 (strcmp(lang_dirent.d_name, ".")) && \
309 (strcmp(lang_dirent.d_name, ".."))) 315 (strcmp(lang_dirent.d_name, "..")))
310 316
311#define for_each_script(lang_dir, script_dirent, script_next) \ 317#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
312 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ 318 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \
313 script_next) \ 319 script_next) \
314 if (script_dirent.d_type != DT_DIR) 320 if (script_dirent.d_type != DT_DIR && \
321 (script_dirent.d_type != DT_UNKNOWN || \
322 !is_directory(lang_path, &script_dirent)))
315 323
316 324
317#define RECORD_SUFFIX "-record" 325#define RECORD_SUFFIX "-record"
@@ -380,10 +388,10 @@ out_delete_desc:
380 return NULL; 388 return NULL;
381} 389}
382 390
383static char *ends_with(char *str, const char *suffix) 391static const char *ends_with(const char *str, const char *suffix)
384{ 392{
385 size_t suffix_len = strlen(suffix); 393 size_t suffix_len = strlen(suffix);
386 char *p = str; 394 const char *p = str;
387 395
388 if (strlen(str) > suffix_len) { 396 if (strlen(str) > suffix_len) {
389 p = str + strlen(str) - suffix_len; 397 p = str + strlen(str) - suffix_len;
@@ -466,16 +474,16 @@ static int list_available_scripts(const struct option *opt __used,
466 if (!scripts_dir) 474 if (!scripts_dir)
467 return -1; 475 return -1;
468 476
469 for_each_lang(scripts_dir, lang_dirent, lang_next) { 477 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
470 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 478 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
471 lang_dirent.d_name); 479 lang_dirent.d_name);
472 lang_dir = opendir(lang_path); 480 lang_dir = opendir(lang_path);
473 if (!lang_dir) 481 if (!lang_dir)
474 continue; 482 continue;
475 483
476 for_each_script(lang_dir, script_dirent, script_next) { 484 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
477 script_root = strdup(script_dirent.d_name); 485 script_root = strdup(script_dirent.d_name);
478 str = ends_with(script_root, REPORT_SUFFIX); 486 str = (char *)ends_with(script_root, REPORT_SUFFIX);
479 if (str) { 487 if (str) {
480 *str = '\0'; 488 *str = '\0';
481 desc = script_desc__findnew(script_root); 489 desc = script_desc__findnew(script_root);
@@ -514,16 +522,16 @@ static char *get_script_path(const char *script_root, const char *suffix)
514 if (!scripts_dir) 522 if (!scripts_dir)
515 return NULL; 523 return NULL;
516 524
517 for_each_lang(scripts_dir, lang_dirent, lang_next) { 525 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
518 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 526 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
519 lang_dirent.d_name); 527 lang_dirent.d_name);
520 lang_dir = opendir(lang_path); 528 lang_dir = opendir(lang_path);
521 if (!lang_dir) 529 if (!lang_dir)
522 continue; 530 continue;
523 531
524 for_each_script(lang_dir, script_dirent, script_next) { 532 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
525 __script_root = strdup(script_dirent.d_name); 533 __script_root = strdup(script_dirent.d_name);
526 str = ends_with(__script_root, suffix); 534 str = (char *)ends_with(__script_root, suffix);
527 if (str) { 535 if (str) {
528 *str = '\0'; 536 *str = '\0';
529 if (strcmp(__script_root, script_root)) 537 if (strcmp(__script_root, script_root))
@@ -543,7 +551,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
543 551
544static bool is_top_script(const char *script_path) 552static bool is_top_script(const char *script_path)
545{ 553{
546 return ends_with((char *)script_path, "top") == NULL ? false : true; 554 return ends_with(script_path, "top") == NULL ? false : true;
547} 555}
548 556
549static int has_required_arg(char *script_path) 557static int has_required_arg(char *script_path)
@@ -569,12 +577,12 @@ out:
569 return n_args; 577 return n_args;
570} 578}
571 579
572static const char * const trace_usage[] = { 580static const char * const script_usage[] = {
573 "perf trace [<options>]", 581 "perf script [<options>]",
574 "perf trace [<options>] record <script> [<record-options>] <command>", 582 "perf script [<options>] record <script> [<record-options>] <command>",
575 "perf trace [<options>] report <script> [script-args]", 583 "perf script [<options>] report <script> [script-args]",
576 "perf trace [<options>] <script> [<record-options>] <command>", 584 "perf script [<options>] <script> [<record-options>] <command>",
577 "perf trace [<options>] <top-script> [script-args]", 585 "perf script [<options>] <top-script> [script-args]",
578 NULL 586 NULL
579}; 587};
580 588
@@ -591,7 +599,7 @@ static const struct option options[] = {
591 "script file name (lang:script name, script name, or *)", 599 "script file name (lang:script name, script name, or *)",
592 parse_scriptname), 600 parse_scriptname),
593 OPT_STRING('g', "gen-script", &generate_script_lang, "lang", 601 OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
594 "generate perf-trace.xx script in specified language"), 602 "generate perf-script.xx script in specified language"),
595 OPT_STRING('i', "input", &input_name, "file", 603 OPT_STRING('i', "input", &input_name, "file",
596 "input file name"), 604 "input file name"),
597 OPT_BOOLEAN('d', "debug-mode", &debug_mode, 605 OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +622,7 @@ static bool have_cmd(int argc, const char **argv)
614 return argc != 0; 622 return argc != 0;
615} 623}
616 624
617int cmd_trace(int argc, const char **argv, const char *prefix __used) 625int cmd_script(int argc, const char **argv, const char *prefix __used)
618{ 626{
619 char *rec_script_path = NULL; 627 char *rec_script_path = NULL;
620 char *rep_script_path = NULL; 628 char *rep_script_path = NULL;
@@ -626,7 +634,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
626 634
627 setup_scripting(); 635 setup_scripting();
628 636
629 argc = parse_options(argc, argv, options, trace_usage, 637 argc = parse_options(argc, argv, options, script_usage,
630 PARSE_OPT_STOP_AT_NON_OPTION); 638 PARSE_OPT_STOP_AT_NON_OPTION);
631 639
632 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { 640 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +648,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
640 if (!rep_script_path) { 648 if (!rep_script_path) {
641 fprintf(stderr, 649 fprintf(stderr,
642 "Please specify a valid report script" 650 "Please specify a valid report script"
643 "(see 'perf trace -l' for listing)\n"); 651 "(see 'perf script -l' for listing)\n");
644 return -1; 652 return -1;
645 } 653 }
646 } 654 }
@@ -658,8 +666,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
658 666
659 if (!rec_script_path && !rep_script_path) { 667 if (!rec_script_path && !rep_script_path) {
660 fprintf(stderr, " Couldn't find script %s\n\n See perf" 668 fprintf(stderr, " Couldn't find script %s\n\n See perf"
661 " trace -l for available scripts.\n", argv[0]); 669 " script -l for available scripts.\n", argv[0]);
662 usage_with_options(trace_usage, options); 670 usage_with_options(script_usage, options);
663 } 671 }
664 672
665 if (is_top_script(argv[0])) { 673 if (is_top_script(argv[0])) {
@@ -671,9 +679,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
671 rec_args = (argc - 1) - rep_args; 679 rec_args = (argc - 1) - rep_args;
672 if (rec_args < 0) { 680 if (rec_args < 0) {
673 fprintf(stderr, " %s script requires options." 681 fprintf(stderr, " %s script requires options."
674 "\n\n See perf trace -l for available " 682 "\n\n See perf script -l for available "
675 "scripts and options.\n", argv[0]); 683 "scripts and options.\n", argv[0]);
676 usage_with_options(trace_usage, options); 684 usage_with_options(script_usage, options);
677 } 685 }
678 } 686 }
679 687
@@ -772,7 +780,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
772 if (!script_name) 780 if (!script_name)
773 setup_pager(); 781 setup_pager();
774 782
775 session = perf_session__new(input_name, O_RDONLY, 0, false); 783 session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
776 if (session == NULL) 784 if (session == NULL)
777 return -ENOMEM; 785 return -ENOMEM;
778 786
@@ -806,7 +814,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
806 return -1; 814 return -1;
807 } 815 }
808 816
809 err = scripting_ops->generate_script("perf-trace"); 817 err = scripting_ops->generate_script("perf-script");
810 goto out; 818 goto out;
811 } 819 }
812 820
@@ -814,10 +822,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
814 err = scripting_ops->start_script(script_name, argc, argv); 822 err = scripting_ops->start_script(script_name, argc, argv);
815 if (err) 823 if (err)
816 goto out; 824 goto out;
817 pr_debug("perf trace started with script %s\n\n", script_name); 825 pr_debug("perf script started with script %s\n\n", script_name);
818 } 826 }
819 827
820 err = __cmd_trace(session); 828 err = __cmd_script(session);
821 829
822 perf_session__delete(session); 830 perf_session__delete(session);
823 cleanup_scripting(); 831 cleanup_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44f9502..7ff746da7e6c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,8 @@
52#include <math.h> 52#include <math.h>
53#include <locale.h> 53#include <locale.h>
54 54
55#define DEFAULT_SEPARATOR " "
56
55static struct perf_event_attr default_attrs[] = { 57static struct perf_event_attr default_attrs[] = {
56 58
57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 59 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -75,20 +77,30 @@ static int run_idx = 0;
75static int run_count = 1; 77static int run_count = 1;
76static bool no_inherit = false; 78static bool no_inherit = false;
77static bool scale = true; 79static bool scale = true;
80static bool no_aggr = false;
78static pid_t target_pid = -1; 81static pid_t target_pid = -1;
79static pid_t target_tid = -1; 82static pid_t target_tid = -1;
80static pid_t *all_tids = NULL; 83static pid_t *all_tids = NULL;
81static int thread_num = 0; 84static int thread_num = 0;
82static pid_t child_pid = -1; 85static pid_t child_pid = -1;
83static bool null_run = false; 86static bool null_run = false;
84static bool big_num = false; 87static bool big_num = true;
88static int big_num_opt = -1;
85static const char *cpu_list; 89static const char *cpu_list;
90static const char *csv_sep = NULL;
91static bool csv_output = false;
86 92
87 93
88static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 94static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
89 95
90static int event_scaled[MAX_COUNTERS]; 96static int event_scaled[MAX_COUNTERS];
91 97
98static struct {
99 u64 val;
100 u64 ena;
101 u64 run;
102} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
103
92static volatile int done = 0; 104static volatile int done = 0;
93 105
94struct stats 106struct stats
@@ -136,19 +148,19 @@ static double stddev_stats(struct stats *stats)
136} 148}
137 149
138struct stats event_res_stats[MAX_COUNTERS][3]; 150struct stats event_res_stats[MAX_COUNTERS][3];
139struct stats runtime_nsecs_stats; 151struct stats runtime_nsecs_stats[MAX_NR_CPUS];
152struct stats runtime_cycles_stats[MAX_NR_CPUS];
153struct stats runtime_branches_stats[MAX_NR_CPUS];
140struct stats walltime_nsecs_stats; 154struct stats walltime_nsecs_stats;
141struct stats runtime_cycles_stats;
142struct stats runtime_branches_stats;
143 155
144#define MATCH_EVENT(t, c, counter) \ 156#define MATCH_EVENT(t, c, counter) \
145 (attrs[counter].type == PERF_TYPE_##t && \ 157 (attrs[counter].type == PERF_TYPE_##t && \
146 attrs[counter].config == PERF_COUNT_##c) 158 attrs[counter].config == PERF_COUNT_##c)
147 159
148#define ERR_PERF_OPEN \ 160#define ERR_PERF_OPEN \
149"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" 161"counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information."
150 162
151static int create_perf_stat_counter(int counter) 163static int create_perf_stat_counter(int counter, bool *perm_err)
152{ 164{
153 struct perf_event_attr *attr = attrs + counter; 165 struct perf_event_attr *attr = attrs + counter;
154 int thread; 166 int thread;
@@ -164,11 +176,14 @@ static int create_perf_stat_counter(int counter)
164 for (cpu = 0; cpu < nr_cpus; cpu++) { 176 for (cpu = 0; cpu < nr_cpus; cpu++) {
165 fd[cpu][counter][0] = sys_perf_event_open(attr, 177 fd[cpu][counter][0] = sys_perf_event_open(attr,
166 -1, cpumap[cpu], -1, 0); 178 -1, cpumap[cpu], -1, 0);
167 if (fd[cpu][counter][0] < 0) 179 if (fd[cpu][counter][0] < 0) {
168 pr_debug(ERR_PERF_OPEN, counter, 180 if (errno == EPERM || errno == EACCES)
181 *perm_err = true;
182 error(ERR_PERF_OPEN, counter,
169 fd[cpu][counter][0], strerror(errno)); 183 fd[cpu][counter][0], strerror(errno));
170 else 184 } else {
171 ++ncreated; 185 ++ncreated;
186 }
172 } 187 }
173 } else { 188 } else {
174 attr->inherit = !no_inherit; 189 attr->inherit = !no_inherit;
@@ -179,12 +194,15 @@ static int create_perf_stat_counter(int counter)
179 for (thread = 0; thread < thread_num; thread++) { 194 for (thread = 0; thread < thread_num; thread++) {
180 fd[0][counter][thread] = sys_perf_event_open(attr, 195 fd[0][counter][thread] = sys_perf_event_open(attr,
181 all_tids[thread], -1, -1, 0); 196 all_tids[thread], -1, -1, 0);
182 if (fd[0][counter][thread] < 0) 197 if (fd[0][counter][thread] < 0) {
183 pr_debug(ERR_PERF_OPEN, counter, 198 if (errno == EPERM || errno == EACCES)
199 *perm_err = true;
200 error(ERR_PERF_OPEN, counter,
184 fd[0][counter][thread], 201 fd[0][counter][thread],
185 strerror(errno)); 202 strerror(errno));
186 else 203 } else {
187 ++ncreated; 204 ++ncreated;
205 }
188 } 206 }
189 } 207 }
190 208
@@ -205,8 +223,9 @@ static inline int nsec_counter(int counter)
205 223
206/* 224/*
207 * Read out the results of a single counter: 225 * Read out the results of a single counter:
226 * aggregate counts across CPUs in system-wide mode
208 */ 227 */
209static void read_counter(int counter) 228static void read_counter_aggr(int counter)
210{ 229{
211 u64 count[3], single_count[3]; 230 u64 count[3], single_count[3];
212 int cpu; 231 int cpu;
@@ -264,11 +283,58 @@ static void read_counter(int counter)
264 * Save the full runtime - to allow normalization during printout: 283 * Save the full runtime - to allow normalization during printout:
265 */ 284 */
266 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 285 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
267 update_stats(&runtime_nsecs_stats, count[0]); 286 update_stats(&runtime_nsecs_stats[0], count[0]);
268 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 287 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
269 update_stats(&runtime_cycles_stats, count[0]); 288 update_stats(&runtime_cycles_stats[0], count[0]);
270 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 289 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
271 update_stats(&runtime_branches_stats, count[0]); 290 update_stats(&runtime_branches_stats[0], count[0]);
291}
292
293/*
294 * Read out the results of a single counter:
295 * do not aggregate counts across CPUs in system-wide mode
296 */
297static void read_counter(int counter)
298{
299 u64 count[3];
300 int cpu;
301 size_t res, nv;
302
303 count[0] = count[1] = count[2] = 0;
304
305 nv = scale ? 3 : 1;
306
307 for (cpu = 0; cpu < nr_cpus; cpu++) {
308
309 if (fd[cpu][counter][0] < 0)
310 continue;
311
312 res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
313
314 assert(res == nv * sizeof(u64));
315
316 close(fd[cpu][counter][0]);
317 fd[cpu][counter][0] = -1;
318
319 if (scale) {
320 if (count[2] == 0) {
321 count[0] = 0;
322 } else if (count[2] < count[1]) {
323 count[0] = (unsigned long long)
324 ((double)count[0] * count[1] / count[2] + 0.5);
325 }
326 }
327 cpu_counts[cpu][counter].val = count[0]; /* scaled count */
328 cpu_counts[cpu][counter].ena = count[1];
329 cpu_counts[cpu][counter].run = count[2];
330
331 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
332 update_stats(&runtime_nsecs_stats[cpu], count[0]);
333 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
334 update_stats(&runtime_cycles_stats[cpu], count[0]);
335 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
336 update_stats(&runtime_branches_stats[cpu], count[0]);
337 }
272} 338}
273 339
274static int run_perf_stat(int argc __used, const char **argv) 340static int run_perf_stat(int argc __used, const char **argv)
@@ -277,6 +343,7 @@ static int run_perf_stat(int argc __used, const char **argv)
277 int status = 0; 343 int status = 0;
278 int counter, ncreated = 0; 344 int counter, ncreated = 0;
279 int child_ready_pipe[2], go_pipe[2]; 345 int child_ready_pipe[2], go_pipe[2];
346 bool perm_err = false;
280 const bool forks = (argc > 0); 347 const bool forks = (argc > 0);
281 char buf; 348 char buf;
282 349
@@ -335,12 +402,15 @@ static int run_perf_stat(int argc __used, const char **argv)
335 } 402 }
336 403
337 for (counter = 0; counter < nr_counters; counter++) 404 for (counter = 0; counter < nr_counters; counter++)
338 ncreated += create_perf_stat_counter(counter); 405 ncreated += create_perf_stat_counter(counter, &perm_err);
339 406
340 if (ncreated == 0) { 407 if (ncreated < nr_counters) {
341 pr_err("No permission to collect %sstats.\n" 408 if (perm_err)
342 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", 409 error("You may not have permission to collect %sstats.\n"
343 system_wide ? "system-wide " : ""); 410 "\t Consider tweaking"
411 " /proc/sys/kernel/perf_event_paranoid or running as root.",
412 system_wide ? "system-wide " : "");
413 die("Not all events could be opened.\n");
344 if (child_pid != -1) 414 if (child_pid != -1)
345 kill(child_pid, SIGTERM); 415 kill(child_pid, SIGTERM);
346 return -1; 416 return -1;
@@ -362,9 +432,13 @@ static int run_perf_stat(int argc __used, const char **argv)
362 432
363 update_stats(&walltime_nsecs_stats, t1 - t0); 433 update_stats(&walltime_nsecs_stats, t1 - t0);
364 434
365 for (counter = 0; counter < nr_counters; counter++) 435 if (no_aggr) {
366 read_counter(counter); 436 for (counter = 0; counter < nr_counters; counter++)
367 437 read_counter(counter);
438 } else {
439 for (counter = 0; counter < nr_counters; counter++)
440 read_counter_aggr(counter);
441 }
368 return WEXITSTATUS(status); 442 return WEXITSTATUS(status);
369} 443}
370 444
@@ -377,11 +451,21 @@ static void print_noise(int counter, double avg)
377 100 * stddev_stats(&event_res_stats[counter][0]) / avg); 451 100 * stddev_stats(&event_res_stats[counter][0]) / avg);
378} 452}
379 453
380static void nsec_printout(int counter, double avg) 454static void nsec_printout(int cpu, int counter, double avg)
381{ 455{
382 double msecs = avg / 1e6; 456 double msecs = avg / 1e6;
457 char cpustr[16] = { '\0', };
458 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
459
460 if (no_aggr)
461 sprintf(cpustr, "CPU%*d%s",
462 csv_output ? 0 : -4,
463 cpumap[cpu], csv_sep);
383 464
384 fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); 465 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
466
467 if (csv_output)
468 return;
385 469
386 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { 470 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
387 fprintf(stderr, " # %10.3f CPUs ", 471 fprintf(stderr, " # %10.3f CPUs ",
@@ -389,33 +473,49 @@ static void nsec_printout(int counter, double avg)
389 } 473 }
390} 474}
391 475
392static void abs_printout(int counter, double avg) 476static void abs_printout(int cpu, int counter, double avg)
393{ 477{
394 double total, ratio = 0.0; 478 double total, ratio = 0.0;
479 char cpustr[16] = { '\0', };
480 const char *fmt;
481
482 if (csv_output)
483 fmt = "%s%.0f%s%s";
484 else if (big_num)
485 fmt = "%s%'18.0f%s%-24s";
486 else
487 fmt = "%s%18.0f%s%-24s";
395 488
396 if (big_num) 489 if (no_aggr)
397 fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); 490 sprintf(cpustr, "CPU%*d%s",
491 csv_output ? 0 : -4,
492 cpumap[cpu], csv_sep);
398 else 493 else
399 fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); 494 cpu = 0;
495
496 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
497
498 if (csv_output)
499 return;
400 500
401 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { 501 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
402 total = avg_stats(&runtime_cycles_stats); 502 total = avg_stats(&runtime_cycles_stats[cpu]);
403 503
404 if (total) 504 if (total)
405 ratio = avg / total; 505 ratio = avg / total;
406 506
407 fprintf(stderr, " # %10.3f IPC ", ratio); 507 fprintf(stderr, " # %10.3f IPC ", ratio);
408 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && 508 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
409 runtime_branches_stats.n != 0) { 509 runtime_branches_stats[cpu].n != 0) {
410 total = avg_stats(&runtime_branches_stats); 510 total = avg_stats(&runtime_branches_stats[cpu]);
411 511
412 if (total) 512 if (total)
413 ratio = avg * 100 / total; 513 ratio = avg * 100 / total;
414 514
415 fprintf(stderr, " # %10.3f %% ", ratio); 515 fprintf(stderr, " # %10.3f %% ", ratio);
416 516
417 } else if (runtime_nsecs_stats.n != 0) { 517 } else if (runtime_nsecs_stats[cpu].n != 0) {
418 total = avg_stats(&runtime_nsecs_stats); 518 total = avg_stats(&runtime_nsecs_stats[cpu]);
419 519
420 if (total) 520 if (total)
421 ratio = 1000.0 * avg / total; 521 ratio = 1000.0 * avg / total;
@@ -426,22 +526,29 @@ static void abs_printout(int counter, double avg)
426 526
427/* 527/*
428 * Print out the results of a single counter: 528 * Print out the results of a single counter:
529 * aggregated counts in system-wide mode
429 */ 530 */
430static void print_counter(int counter) 531static void print_counter_aggr(int counter)
431{ 532{
432 double avg = avg_stats(&event_res_stats[counter][0]); 533 double avg = avg_stats(&event_res_stats[counter][0]);
433 int scaled = event_scaled[counter]; 534 int scaled = event_scaled[counter];
434 535
435 if (scaled == -1) { 536 if (scaled == -1) {
436 fprintf(stderr, " %18s %-24s\n", 537 fprintf(stderr, "%*s%s%-24s\n",
437 "<not counted>", event_name(counter)); 538 csv_output ? 0 : 18,
539 "<not counted>", csv_sep, event_name(counter));
438 return; 540 return;
439 } 541 }
440 542
441 if (nsec_counter(counter)) 543 if (nsec_counter(counter))
442 nsec_printout(counter, avg); 544 nsec_printout(-1, counter, avg);
443 else 545 else
444 abs_printout(counter, avg); 546 abs_printout(-1, counter, avg);
547
548 if (csv_output) {
549 fputc('\n', stderr);
550 return;
551 }
445 552
446 print_noise(counter, avg); 553 print_noise(counter, avg);
447 554
@@ -458,40 +565,91 @@ static void print_counter(int counter)
458 fprintf(stderr, "\n"); 565 fprintf(stderr, "\n");
459} 566}
460 567
568/*
569 * Print out the results of a single counter:
570 * does not use aggregated count in system-wide
571 */
572static void print_counter(int counter)
573{
574 u64 ena, run, val;
575 int cpu;
576
577 for (cpu = 0; cpu < nr_cpus; cpu++) {
578 val = cpu_counts[cpu][counter].val;
579 ena = cpu_counts[cpu][counter].ena;
580 run = cpu_counts[cpu][counter].run;
581 if (run == 0 || ena == 0) {
582 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
583 csv_output ? 0 : -4,
584 cpumap[cpu], csv_sep,
585 csv_output ? 0 : 18,
586 "<not counted>", csv_sep,
587 event_name(counter));
588
589 fprintf(stderr, "\n");
590 continue;
591 }
592
593 if (nsec_counter(counter))
594 nsec_printout(cpu, counter, val);
595 else
596 abs_printout(cpu, counter, val);
597
598 if (!csv_output) {
599 print_noise(counter, 1.0);
600
601 if (run != ena) {
602 fprintf(stderr, " (scaled from %.2f%%)",
603 100.0 * run / ena);
604 }
605 }
606 fprintf(stderr, "\n");
607 }
608}
609
461static void print_stat(int argc, const char **argv) 610static void print_stat(int argc, const char **argv)
462{ 611{
463 int i, counter; 612 int i, counter;
464 613
465 fflush(stdout); 614 fflush(stdout);
466 615
467 fprintf(stderr, "\n"); 616 if (!csv_output) {
468 fprintf(stderr, " Performance counter stats for "); 617 fprintf(stderr, "\n");
469 if(target_pid == -1 && target_tid == -1) { 618 fprintf(stderr, " Performance counter stats for ");
470 fprintf(stderr, "\'%s", argv[0]); 619 if(target_pid == -1 && target_tid == -1) {
471 for (i = 1; i < argc; i++) 620 fprintf(stderr, "\'%s", argv[0]);
472 fprintf(stderr, " %s", argv[i]); 621 for (i = 1; i < argc; i++)
473 } else if (target_pid != -1) 622 fprintf(stderr, " %s", argv[i]);
474 fprintf(stderr, "process id \'%d", target_pid); 623 } else if (target_pid != -1)
475 else 624 fprintf(stderr, "process id \'%d", target_pid);
476 fprintf(stderr, "thread id \'%d", target_tid); 625 else
477 626 fprintf(stderr, "thread id \'%d", target_tid);
478 fprintf(stderr, "\'"); 627
479 if (run_count > 1) 628 fprintf(stderr, "\'");
480 fprintf(stderr, " (%d runs)", run_count); 629 if (run_count > 1)
481 fprintf(stderr, ":\n\n"); 630 fprintf(stderr, " (%d runs)", run_count);
631 fprintf(stderr, ":\n\n");
632 }
482 633
483 for (counter = 0; counter < nr_counters; counter++) 634 if (no_aggr) {
484 print_counter(counter); 635 for (counter = 0; counter < nr_counters; counter++)
636 print_counter(counter);
637 } else {
638 for (counter = 0; counter < nr_counters; counter++)
639 print_counter_aggr(counter);
640 }
485 641
486 fprintf(stderr, "\n"); 642 if (!csv_output) {
487 fprintf(stderr, " %18.9f seconds time elapsed", 643 fprintf(stderr, "\n");
488 avg_stats(&walltime_nsecs_stats)/1e9); 644 fprintf(stderr, " %18.9f seconds time elapsed",
489 if (run_count > 1) { 645 avg_stats(&walltime_nsecs_stats)/1e9);
490 fprintf(stderr, " ( +- %7.3f%% )", 646 if (run_count > 1) {
647 fprintf(stderr, " ( +- %7.3f%% )",
491 100*stddev_stats(&walltime_nsecs_stats) / 648 100*stddev_stats(&walltime_nsecs_stats) /
492 avg_stats(&walltime_nsecs_stats)); 649 avg_stats(&walltime_nsecs_stats));
650 }
651 fprintf(stderr, "\n\n");
493 } 652 }
494 fprintf(stderr, "\n\n");
495} 653}
496 654
497static volatile int signr = -1; 655static volatile int signr = -1;
@@ -521,6 +679,13 @@ static const char * const stat_usage[] = {
521 NULL 679 NULL
522}; 680};
523 681
682static int stat__set_big_num(const struct option *opt __used,
683 const char *s __used, int unset)
684{
685 big_num_opt = unset ? 0 : 1;
686 return 0;
687}
688
524static const struct option options[] = { 689static const struct option options[] = {
525 OPT_CALLBACK('e', "event", NULL, "event", 690 OPT_CALLBACK('e', "event", NULL, "event",
526 "event selector. use 'perf list' to list available events", 691 "event selector. use 'perf list' to list available events",
@@ -541,10 +706,15 @@ static const struct option options[] = {
541 "repeat command and print average + stddev (max: 100)"), 706 "repeat command and print average + stddev (max: 100)"),
542 OPT_BOOLEAN('n', "null", &null_run, 707 OPT_BOOLEAN('n', "null", &null_run,
543 "null run - dont start any counters"), 708 "null run - dont start any counters"),
544 OPT_BOOLEAN('B', "big-num", &big_num, 709 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
545 "print large numbers with thousands\' separators"), 710 "print large numbers with thousands\' separators",
711 stat__set_big_num),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu", 712 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"), 713 "list of cpus to monitor in system-wide"),
714 OPT_BOOLEAN('A', "no-aggr", &no_aggr,
715 "disable CPU count aggregation"),
716 OPT_STRING('x', "field-separator", &csv_sep, "separator",
717 "print counts with custom separator"),
548 OPT_END() 718 OPT_END()
549}; 719};
550 720
@@ -557,11 +727,34 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
557 727
558 argc = parse_options(argc, argv, options, stat_usage, 728 argc = parse_options(argc, argv, options, stat_usage,
559 PARSE_OPT_STOP_AT_NON_OPTION); 729 PARSE_OPT_STOP_AT_NON_OPTION);
730
731 if (csv_sep)
732 csv_output = true;
733 else
734 csv_sep = DEFAULT_SEPARATOR;
735
736 /*
737 * let the spreadsheet do the pretty-printing
738 */
739 if (csv_output) {
740 /* User explicitely passed -B? */
741 if (big_num_opt == 1) {
742 fprintf(stderr, "-B option not supported with -x\n");
743 usage_with_options(stat_usage, options);
744 } else /* Nope, so disable big number formatting */
745 big_num = false;
746 } else if (big_num_opt == 0) /* User passed --no-big-num */
747 big_num = false;
748
560 if (!argc && target_pid == -1 && target_tid == -1) 749 if (!argc && target_pid == -1 && target_tid == -1)
561 usage_with_options(stat_usage, options); 750 usage_with_options(stat_usage, options);
562 if (run_count <= 0) 751 if (run_count <= 0)
563 usage_with_options(stat_usage, options); 752 usage_with_options(stat_usage, options);
564 753
754 /* no_aggr is for system-wide only */
755 if (no_aggr && !system_wide)
756 usage_with_options(stat_usage, options);
757
565 /* Set attrs and nr_counters if no event is selected and !null_run */ 758 /* Set attrs and nr_counters if no event is selected and !null_run */
566 if (!null_run && !nr_counters) { 759 if (!null_run && !nr_counters) {
567 memcpy(attrs, default_attrs, sizeof(default_attrs)); 760 memcpy(attrs, default_attrs, sizeof(default_attrs));
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 035b9fa063a9..e0c3f471f22d 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void)
119 * end addresses too. 119 * end addresses too.
120 */ 120 */
121 for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { 121 for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
122 struct symbol *pair; 122 struct symbol *pair, *first_pair;
123 bool backwards = true;
123 124
124 sym = rb_entry(nd, struct symbol, rb_node); 125 sym = rb_entry(nd, struct symbol, rb_node);
125 pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL); 126
127 if (sym->start == sym->end)
128 continue;
129
130 first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
131 pair = first_pair;
126 132
127 if (pair && pair->start == sym->start) { 133 if (pair && pair->start == sym->start) {
128next_pair: 134next_pair:
@@ -143,8 +149,10 @@ next_pair:
143 pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n", 149 pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
144 sym->start, sym->name, sym->end, pair->end); 150 sym->start, sym->name, sym->end, pair->end);
145 } else { 151 } else {
146 struct rb_node *nnd = rb_prev(&pair->rb_node); 152 struct rb_node *nnd;
147 153detour:
154 nnd = backwards ? rb_prev(&pair->rb_node) :
155 rb_next(&pair->rb_node);
148 if (nnd) { 156 if (nnd) {
149 struct symbol *next = rb_entry(nnd, struct symbol, rb_node); 157 struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
150 158
@@ -153,6 +161,13 @@ next_pair:
153 goto next_pair; 161 goto next_pair;
154 } 162 }
155 } 163 }
164
165 if (backwards) {
166 backwards = false;
167 pair = first_pair;
168 goto detour;
169 }
170
156 pr_debug("%#Lx: diff name v: %s k: %s\n", 171 pr_debug("%#Lx: diff name v: %s k: %s\n",
157 sym->start, sym->name, pair->name); 172 sym->start, sym->name, pair->name);
158 } 173 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9bcc38f0b706..d75084bccdb7 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS];
272static u64 cpus_pstate_start_times[MAX_CPUS]; 272static u64 cpus_pstate_start_times[MAX_CPUS];
273static u64 cpus_pstate_state[MAX_CPUS]; 273static u64 cpus_pstate_state[MAX_CPUS];
274 274
275static int process_comm_event(event_t *event, struct perf_session *session __used) 275static int process_comm_event(event_t *event, struct sample_data *sample __used,
276 struct perf_session *session __used)
276{ 277{
277 pid_set_comm(event->comm.tid, event->comm.comm); 278 pid_set_comm(event->comm.tid, event->comm.comm);
278 return 0; 279 return 0;
279} 280}
280 281
281static int process_fork_event(event_t *event, struct perf_session *session __used) 282static int process_fork_event(event_t *event, struct sample_data *sample __used,
283 struct perf_session *session __used)
282{ 284{
283 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 285 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
284 return 0; 286 return 0;
285} 287}
286 288
287static int process_exit_event(event_t *event, struct perf_session *session __used) 289static int process_exit_event(event_t *event, struct sample_data *sample __used,
290 struct perf_session *session __used)
288{ 291{
289 pid_exit(event->fork.pid, event->fork.time); 292 pid_exit(event->fork.pid, event->fork.time);
290 return 0; 293 return 0;
@@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
470} 473}
471 474
472 475
473static int process_sample_event(event_t *event, struct perf_session *session) 476static int process_sample_event(event_t *event __used,
477 struct sample_data *sample,
478 struct perf_session *session)
474{ 479{
475 struct sample_data data;
476 struct trace_entry *te; 480 struct trace_entry *te;
477 481
478 memset(&data, 0, sizeof(data));
479
480 event__parse_sample(event, session->sample_type, &data);
481
482 if (session->sample_type & PERF_SAMPLE_TIME) { 482 if (session->sample_type & PERF_SAMPLE_TIME) {
483 if (!first_time || first_time > data.time) 483 if (!first_time || first_time > sample->time)
484 first_time = data.time; 484 first_time = sample->time;
485 if (last_time < data.time) 485 if (last_time < sample->time)
486 last_time = data.time; 486 last_time = sample->time;
487 } 487 }
488 488
489 te = (void *)data.raw_data; 489 te = (void *)sample->raw_data;
490 if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) { 490 if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
491 char *event_str; 491 char *event_str;
492 struct power_entry *pe; 492 struct power_entry *pe;
493 493
@@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
499 return 0; 499 return 0;
500 500
501 if (strcmp(event_str, "power:power_start") == 0) 501 if (strcmp(event_str, "power:power_start") == 0)
502 c_state_start(pe->cpu_id, data.time, pe->value); 502 c_state_start(pe->cpu_id, sample->time, pe->value);
503 503
504 if (strcmp(event_str, "power:power_end") == 0) 504 if (strcmp(event_str, "power:power_end") == 0)
505 c_state_end(pe->cpu_id, data.time); 505 c_state_end(pe->cpu_id, sample->time);
506 506
507 if (strcmp(event_str, "power:power_frequency") == 0) 507 if (strcmp(event_str, "power:power_frequency") == 0)
508 p_state_change(pe->cpu_id, data.time, pe->value); 508 p_state_change(pe->cpu_id, sample->time, pe->value);
509 509
510 if (strcmp(event_str, "sched:sched_wakeup") == 0) 510 if (strcmp(event_str, "sched:sched_wakeup") == 0)
511 sched_wakeup(data.cpu, data.time, data.pid, te); 511 sched_wakeup(sample->cpu, sample->time, sample->pid, te);
512 512
513 if (strcmp(event_str, "sched:sched_switch") == 0) 513 if (strcmp(event_str, "sched:sched_switch") == 0)
514 sched_switch(data.cpu, data.time, te); 514 sched_switch(sample->cpu, sample->time, te);
515 } 515 }
516 return 0; 516 return 0;
517} 517}
@@ -937,7 +937,8 @@ static struct perf_event_ops event_ops = {
937 937
938static int __cmd_timechart(void) 938static int __cmd_timechart(void)
939{ 939{
940 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 940 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
941 0, false, &event_ops);
941 int ret = -EINVAL; 942 int ret = -EINVAL;
942 943
943 if (session == NULL) 944 if (session == NULL)
@@ -989,6 +990,9 @@ static int __cmd_record(int argc, const char **argv)
989 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 990 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
990 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 991 rec_argv = calloc(rec_argc + 1, sizeof(char *));
991 992
993 if (rec_argv == NULL)
994 return -ENOMEM;
995
992 for (i = 0; i < ARRAY_SIZE(record_args); i++) 996 for (i = 0; i < ARRAY_SIZE(record_args); i++)
993 rec_argv[i] = strdup(record_args[i]); 997 rec_argv[i] = strdup(record_args[i]);
994 998
@@ -1018,6 +1022,8 @@ static const struct option options[] = {
1018 OPT_CALLBACK('p', "process", NULL, "process", 1022 OPT_CALLBACK('p', "process", NULL, "process",
1019 "process selector. Pass a pid or process name.", 1023 "process selector. Pass a pid or process name.",
1020 parse_process), 1024 parse_process),
1025 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
1026 "Look for files with symbols relative to this directory"),
1021 OPT_END() 1027 OPT_END()
1022}; 1028};
1023 1029
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd625808c2a5..ae15f046c405 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
977} 977}
978 978
979static void event__process_sample(const event_t *self, 979static void event__process_sample(const event_t *self,
980 struct perf_session *session, int counter) 980 struct sample_data *sample,
981 struct perf_session *session, int counter)
981{ 982{
982 u64 ip = self->ip.ip; 983 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 984 struct sym_entry *syme;
984 struct addr_location al; 985 struct addr_location al;
985 struct sample_data data;
986 struct machine *machine; 986 struct machine *machine;
987 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 987 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
988 988
@@ -1025,7 +1025,7 @@ static void event__process_sample(const event_t *self,
1025 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1025 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1026 exact_samples++; 1026 exact_samples++;
1027 1027
1028 if (event__preprocess_sample(self, session, &al, &data, 1028 if (event__preprocess_sample(self, session, &al, sample,
1029 symbol_filter) < 0 || 1029 symbol_filter) < 0 ||
1030 al.filtered) 1030 al.filtered)
1031 return; 1031 return;
@@ -1105,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1105 unsigned int head = mmap_read_head(md); 1105 unsigned int head = mmap_read_head(md);
1106 unsigned int old = md->prev; 1106 unsigned int old = md->prev;
1107 unsigned char *data = md->base + page_size; 1107 unsigned char *data = md->base + page_size;
1108 struct sample_data sample;
1108 int diff; 1109 int diff;
1109 1110
1110 /* 1111 /*
@@ -1152,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1152 event = &event_copy; 1153 event = &event_copy;
1153 } 1154 }
1154 1155
1156 event__parse_sample(event, self, &sample);
1155 if (event->header.type == PERF_RECORD_SAMPLE) 1157 if (event->header.type == PERF_RECORD_SAMPLE)
1156 event__process_sample(event, self, md->counter); 1158 event__process_sample(event, &sample, self, md->counter);
1157 else 1159 else
1158 event__process(event, self); 1160 event__process(event, &sample, self);
1159 old += size; 1161 old += size;
1160 } 1162 }
1161 1163
@@ -1214,7 +1216,9 @@ try_again:
1214 int err = errno; 1216 int err = errno;
1215 1217
1216 if (err == EPERM || err == EACCES) 1218 if (err == EPERM || err == EACCES)
1217 die("No permission - are you root?\n"); 1219 die("Permission error - are you root?\n"
1220 "\t Consider tweaking"
1221 " /proc/sys/kernel/perf_event_paranoid.\n");
1218 /* 1222 /*
1219 * If it's cycles then fall back to hrtimer 1223 * If it's cycles then fall back to hrtimer
1220 * based cpu-clock-tick sw counter, which 1224 * based cpu-clock-tick sw counter, which
@@ -1231,7 +1235,7 @@ try_again:
1231 goto try_again; 1235 goto try_again;
1232 } 1236 }
1233 printf("\n"); 1237 printf("\n");
1234 error("perfcounter syscall returned with %d (%s)\n", 1238 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
1235 fd[i][counter][thread_index], strerror(err)); 1239 fd[i][counter][thread_index], strerror(err));
1236 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1240 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1237 exit(-1); 1241 exit(-1);
@@ -1268,7 +1272,7 @@ static int __cmd_top(void)
1268 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 1272 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
1269 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 1273 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1270 */ 1274 */
1271 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false); 1275 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
1272 if (session == NULL) 1276 if (session == NULL)
1273 return -ENOMEM; 1277 return -ENOMEM;
1274 1278
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 921245b28583..c7798c7f24ed 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
27extern int cmd_stat(int argc, const char **argv, const char *prefix); 27extern int cmd_stat(int argc, const char **argv, const char *prefix);
28extern int cmd_timechart(int argc, const char **argv, const char *prefix); 28extern int cmd_timechart(int argc, const char **argv, const char *prefix);
29extern int cmd_top(int argc, const char **argv, const char *prefix); 29extern int cmd_top(int argc, const char **argv, const char *prefix);
30extern int cmd_trace(int argc, const char **argv, const char *prefix); 30extern int cmd_script(int argc, const char **argv, const char *prefix);
31extern int cmd_version(int argc, const char **argv, const char *prefix); 31extern int cmd_version(int argc, const char **argv, const char *prefix);
32extern int cmd_probe(int argc, const char **argv, const char *prefix); 32extern int cmd_probe(int argc, const char **argv, const char *prefix);
33extern int cmd_kmem(int argc, const char **argv, const char *prefix); 33extern int cmd_kmem(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 949d77fc0b97..16b5088cf8f4 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,7 +16,7 @@ perf-report mainporcelain common
16perf-stat mainporcelain common 16perf-stat mainporcelain common
17perf-timechart mainporcelain common 17perf-timechart mainporcelain common
18perf-top mainporcelain common 18perf-top mainporcelain common
19perf-trace mainporcelain common 19perf-script mainporcelain common
20perf-probe mainporcelain common 20perf-probe mainporcelain common
21perf-kmem mainporcelain common 21perf-kmem mainporcelain common
22perf-lock mainporcelain common 22perf-lock mainporcelain common
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index b253db634f04..b041ca67a2cb 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -9,8 +9,8 @@ endef
9ifndef NO_DWARF 9ifndef NO_DWARF
10define SOURCE_DWARF 10define SOURCE_DWARF
11#include <dwarf.h> 11#include <dwarf.h>
12#include <libdw.h> 12#include <elfutils/libdw.h>
13#include <version.h> 13#include <elfutils/version.h>
14#ifndef _ELFUTILS_PREREQ 14#ifndef _ELFUTILS_PREREQ
15#error 15#error
16#endif 16#endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cdd6c03f1e14..595d0f4a7103 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -323,7 +323,7 @@ static void handle_internal_command(int argc, const char **argv)
323 { "top", cmd_top, 0 }, 323 { "top", cmd_top, 0 },
324 { "annotate", cmd_annotate, 0 }, 324 { "annotate", cmd_annotate, 0 },
325 { "version", cmd_version, 0 }, 325 { "version", cmd_version, 0 },
326 { "trace", cmd_trace, 0 }, 326 { "script", cmd_script, 0 },
327 { "sched", cmd_sched, 0 }, 327 { "sched", cmd_sched, 0 },
328 { "probe", cmd_probe, 0 }, 328 { "probe", cmd_probe, 0 },
329 { "kmem", cmd_kmem, 0 }, 329 { "kmem", cmd_kmem, 0 },
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
index 01a64ad693f2..790ceba6ad3f 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -8,7 +8,7 @@
8 8
9#line 1 "Context.xs" 9#line 1 "Context.xs"
10/* 10/*
11 * Context.xs. XS interfaces for perf trace. 11 * Context.xs. XS interfaces for perf script.
12 * 12 *
13 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 13 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
14 * 14 *
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
index 549cf0467d30..c1e2ed1ed34e 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -1,5 +1,5 @@
1/* 1/*
2 * Context.xs. XS interfaces for perf trace. 2 * Context.xs. XS interfaces for perf script.
3 * 3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
@@ -23,7 +23,7 @@
23#include "perl.h" 23#include "perl.h"
24#include "XSUB.h" 24#include "XSUB.h"
25#include "../../../perf.h" 25#include "../../../perf.h"
26#include "../../../util/trace-event.h" 26#include "../../../util/script-event.h"
27 27
28MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context 28MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context
29PROTOTYPES: ENABLE 29PROTOTYPES: ENABLE
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
index 9a9707630791..2f0c7f3043ee 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/README
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -1,7 +1,7 @@
1Perf-Trace-Util version 0.01 1Perf-Trace-Util version 0.01
2============================ 2============================
3 3
4This module contains utility functions for use with perf trace. 4This module contains utility functions for use with perf script.
5 5
6Core.pm and Util.pm are pure Perl modules; Core.pm contains routines 6Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
7that the core perf support for Perl calls on and should always be 7that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@ After you do that:
33 33
34INSTALLATION 34INSTALLATION
35 35
36Building perf with perf trace Perl scripting should install this 36Building perf with perf script Perl scripting should install this
37module in the right place. 37module in the right place.
38 38
39You should make sure libperl and ExtUtils/Embed.pm are installed first 39You should make sure libperl and ExtUtils/Embed.pm are installed first
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
index 6c7f3659cb17..4e2f6039ac92 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf.
34 34
35=head1 SEE ALSO 35=head1 SEE ALSO
36 36
37Perf (trace) documentation 37Perf (script) documentation
38 38
39=head1 AUTHOR 39=head1 AUTHOR
40 40
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
index 9df376a9f629..9158458d3eeb 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -163,7 +163,7 @@ sub dump_symbolic_fields
163__END__ 163__END__
164=head1 NAME 164=head1 NAME
165 165
166Perf::Trace::Core - Perl extension for perf trace 166Perf::Trace::Core - Perl extension for perf script
167 167
168=head1 SYNOPSIS 168=head1 SYNOPSIS
169 169
@@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace
171 171
172=head1 SEE ALSO 172=head1 SEE ALSO
173 173
174Perf (trace) documentation 174Perf (script) documentation
175 175
176=head1 AUTHOR 176=head1 AUTHOR
177 177
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index d94b40c8ac85..053500114625 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -65,7 +65,7 @@ sub clear_term
65__END__ 65__END__
66=head1 NAME 66=head1 NAME
67 67
68Perf::Trace::Util - Perl extension for perf trace 68Perf::Trace::Util - Perl extension for perf script
69 69
70=head1 SYNOPSIS 70=head1 SYNOPSIS
71 71
@@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace
73 73
74=head1 SEE ALSO 74=head1 SEE ALSO
75 75
76Perf (trace) documentation 76Perf (script) documentation
77 77
78=head1 AUTHOR 78=head1 AUTHOR
79 79
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index 4028d92dc4ae..9f83cc1ad8ba 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index ba25f4d41fb0..77200b3f3100 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then
7fi 7fi
8comm=$1 8comm=$1
9shift 9shift
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
11
12
13
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 641a3f5d085c..a27b9f311f95 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide r/w activity 2# description: system-wide r/w activity
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
4
5
6
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
index 4918dba77021..83e11ec2e190 100644
--- a/tools/perf/scripts/perl/bin/rwtop-report
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then
17 interval=$1 17 interval=$1
18 shift 18 shift
19fi 19fi
20perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval 20perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
21
22
23
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index 49052ebcb632..889e8130cca5 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide min/max/avg wakeup latency 2# description: system-wide min/max/avg wakeup latency
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
4
5
6
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index df0c65f4ca93..6d91411d248c 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,7 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: workqueue stats (ins/exe/create/destroy) 2# description: workqueue stats (ins/exe/create/destroy)
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
4
5
6
7
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7dc0a407a5..4e7076c20616 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -1,4 +1,4 @@
1# perf trace event handlers, generated by perf trace -g perl 1# perf script event handlers, generated by perf script -g perl
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com> 2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2 3# Licensed under the terms of the GNU GPL License version 2
4 4
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 2a39097687b9..74844ee2be3e 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core; 18use Perf::Trace::Core;
19use Perf::Trace::Util; 19use Perf::Trace::Util;
20 20
21my $usage = "perf trace -s rw-by-file.pl <comm>\n"; 21my $usage = "perf script -s rw-by-file.pl <comm>\n";
22 22
23my $for_comm = shift or die $usage; 23my $for_comm = shift or die $usage;
24 24
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
index b84b12699b70..a8eaff5119e0 100644
--- a/tools/perf/scripts/perl/workqueue-stats.pl
+++ b/tools/perf/scripts/perl/workqueue-stats.pl
@@ -10,7 +10,7 @@
10# workqueue:workqueue_destruction -e workqueue:workqueue_execution 10# workqueue:workqueue_destruction -e workqueue:workqueue_execution
11# -e workqueue:workqueue_insertion 11# -e workqueue:workqueue_insertion
12# 12#
13# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl 13# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
14 14
15use 5.010000; 15use 5.010000;
16use strict; 16use strict;
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 957085dd5d8d..315067b8f552 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Context.c. Python interfaces for perf trace. 2 * Context.c. Python interfaces for perf script.
3 * 3 *
4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index aad7525bca1d..de7211e4fa47 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -1,4 +1,4 @@
1# Core.py - Python extension for perf trace, core functions 1# Core.py - Python extension for perf script, core functions
2# 2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> 3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4# 4#
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index ae9a56e43e05..fdd92f699055 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -1,4 +1,4 @@
1# SchedGui.py - Python extension for perf trace, basic GUI code for 1# SchedGui.py - Python extension for perf script, basic GUI code for
2# traces drawing and overview. 2# traces drawing and overview.
3# 3#
4# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com> 4# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 13cc02b5893a..15c8400240fd 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -1,4 +1,4 @@
1# Util.py - Python extension for perf trace, miscellaneous utility code 1# Util.py - Python extension for perf script, miscellaneous utility code
2# 2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> 3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4# 4#
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 03587021463d..fda5096d0cbf 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
index c8268138fb7e..6c44271091ab 100644
--- a/tools/perf/scripts/python/bin/futex-contention-report
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -1,4 +1,4 @@
1#!/bin/bash 1#!/bin/bash
2# description: futext contention measurement 2# description: futext contention measurement
3 3
4perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py 4perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
index 4ad361b31249..8f759291da86 100644
--- a/tools/perf/scripts/python/bin/netdev-times-report
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -2,4 +2,4 @@
2# description: display a process of packet and processing time 2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug] 3# args: [tx] [rx] [dev=] [debug]
4 4
5perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@ 5perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
index df1791f07c24..68b037a1849b 100644
--- a/tools/perf/scripts/python/bin/sched-migration-report
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -1,3 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: sched migration overview 2# description: sched migration overview
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
index 36b409c05e50..c32db294124d 100644
--- a/tools/perf/scripts/python/bin/sctop-report
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
21 interval=$1 21 interval=$1
22 shift 22 shift
23fi 23fi
24perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval 24perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index 4eb88c9fc83c..16eb8d65c543 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index cb2f9c5cf17e..0f0e9d453bb4 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
index d9f7893e315c..4647a7694cf6 100644
--- a/tools/perf/scripts/python/check-perf-trace.py
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -1,4 +1,4 @@
1# perf trace event handlers, generated by perf trace -g python 1# perf script event handlers, generated by perf script -g python
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com> 2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2 3# Licensed under the terms of the GNU GPL License version 2
4# 4#
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index acd7848717b3..85805fac4116 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -15,7 +15,7 @@ from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import * 16from Util import *
17 17
18usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n"; 18usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
19 19
20for_comm = None 20for_comm = None
21for_pid = None 21for_pid = None
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index b934383c3364..74d55ec08aed 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -4,7 +4,7 @@
4# 4#
5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> 5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
6# 6#
7# perf trace event handlers have been generated by perf trace -g python 7# perf script event handlers have been generated by perf script -g python
8# 8#
9# This software is distributed under the terms of the GNU General 9# This software is distributed under the terms of the GNU General
10# Public License ("GPL") version 2 as published by the Free Software 10# Public License ("GPL") version 2 as published by the Free Software
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 7a6ec2c7d8ab..42c267e292fa 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -17,7 +17,7 @@ from perf_trace_context import *
17from Core import * 17from Core import *
18from Util import * 18from Util import *
19 19
20usage = "perf trace -s sctop.py [comm] [interval]\n"; 20usage = "perf script -s sctop.py [comm] [interval]\n";
21 21
22for_comm = None 22for_comm = None
23default_interval = 3 23default_interval = 3
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index d1ee3ec10cf2..c64d1c55d745 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -14,7 +14,7 @@ from perf_trace_context import *
14from Core import * 14from Core import *
15from Util import syscall_name 15from Util import syscall_name
16 16
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; 17usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
18 18
19for_comm = None 19for_comm = None
20for_pid = None 20for_pid = None
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index ea183dc82d29..b435d3f188e8 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -15,7 +15,7 @@ from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import syscall_name 16from Util import syscall_name
17 17
18usage = "perf trace -s syscall-counts.py [comm]\n"; 18usage = "perf script -s syscall-counts.py [comm]\n";
19 19
20for_comm = None 20for_comm = None
21 21
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e437edb72417..deffb8c96071 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,7 +14,9 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16 16
17static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) 17static int build_id__mark_dso_hit(event_t *event,
18 struct sample_data *sample __used,
19 struct perf_session *session)
18{ 20{
19 struct addr_location al; 21 struct addr_location al;
20 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 22 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
35 return 0; 37 return 0;
36} 38}
37 39
38static int event__exit_del_thread(event_t *self, struct perf_session *session) 40static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
41 struct perf_session *session)
39{ 42{
40 struct thread *thread = perf_session__findnew(session, self->fork.tid); 43 struct thread *thread = perf_session__findnew(session, self->fork.tid);
41 44
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c8d81b00089d..01bbe8ecec3f 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
46 return ret; 46 return ret;
47} 47}
48 48
49static int dump_printf_color(const char *fmt, const char *color, ...) 49#ifdef NO_NEWT_SUPPORT
50void ui__warning(const char *format, ...)
50{ 51{
51 va_list args; 52 va_list args;
52 int ret = 0;
53 53
54 if (dump_trace) { 54 va_start(args, format);
55 va_start(args, color); 55 vfprintf(stderr, format, args);
56 ret = color_vfprintf(stdout, color, fmt, args); 56 va_end(args);
57 va_end(args);
58 }
59
60 return ret;
61} 57}
62 58#endif
63 59
64void trace_event(event_t *event) 60void trace_event(event_t *event)
65{ 61{
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
70 if (!dump_trace) 66 if (!dump_trace)
71 return; 67 return;
72 68
73 dump_printf("."); 69 printf(".");
74 dump_printf_color("\n. ... raw event: size %d bytes\n", color, 70 color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
75 event->header.size); 71 event->header.size);
76 72
77 for (i = 0; i < event->header.size; i++) { 73 for (i = 0; i < event->header.size; i++) {
78 if ((i & 15) == 0) { 74 if ((i & 15) == 0) {
79 dump_printf("."); 75 printf(".");
80 dump_printf_color(" %04x: ", color, i); 76 color_fprintf(stdout, color, " %04x: ", i);
81 } 77 }
82 78
83 dump_printf_color(" %02x", color, raw_event[i]); 79 color_fprintf(stdout, color, " %02x", raw_event[i]);
84 80
85 if (((i & 15) == 15) || i == event->header.size-1) { 81 if (((i & 15) == 15) || i == event->header.size-1) {
86 dump_printf_color(" ", color); 82 color_fprintf(stdout, color, " ");
87 for (j = 0; j < 15-(i & 15); j++) 83 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 84 color_fprintf(stdout, color, " ");
89 for (j = i & ~15; j <= i; j++) { 85 for (j = i & ~15; j <= i; j++) {
90 dump_printf_color("%c", color, 86 color_fprintf(stdout, color, "%c",
91 isprint(raw_event[j]) ? 87 isprint(raw_event[j]) ?
92 raw_event[j] : '.'); 88 raw_event[j] : '.');
93 } 89 }
94 dump_printf_color("\n", color); 90 color_fprintf(stdout, color, "\n");
95 } 91 }
96 } 92 }
97 dump_printf(".\n"); 93 printf(".\n");
98} 94}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7b514082bbaf..ca35fd66b5df 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
35#include "ui/progress.h" 35#include "ui/progress.h"
36#endif 36#endif
37 37
38void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
39
38#endif /* __PERF_DEBUG_H */ 40#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dab9e754a281..2302ec051bb4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -7,7 +7,7 @@
7#include "strlist.h" 7#include "strlist.h"
8#include "thread.h" 8#include "thread.h"
9 9
10const char *event__name[] = { 10static const char *event__name[] = {
11 [0] = "TOTAL", 11 [0] = "TOTAL",
12 [PERF_RECORD_MMAP] = "MMAP", 12 [PERF_RECORD_MMAP] = "MMAP",
13 [PERF_RECORD_LOST] = "LOST", 13 [PERF_RECORD_LOST] = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
25 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
25}; 26};
26 27
27static pid_t event__synthesize_comm(pid_t pid, int full, 28const char *event__get_event_name(unsigned int id)
29{
30 if (id >= ARRAY_SIZE(event__name))
31 return "INVALID";
32 if (!event__name[id])
33 return "UNKNOWN";
34 return event__name[id];
35}
36
37static struct sample_data synth_sample = {
38 .pid = -1,
39 .tid = -1,
40 .time = -1,
41 .stream_id = -1,
42 .cpu = -1,
43 .period = 1,
44};
45
46static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
28 event__handler_t process, 47 event__handler_t process,
29 struct perf_session *session) 48 struct perf_session *session)
30{ 49{
31 event_t ev;
32 char filename[PATH_MAX]; 50 char filename[PATH_MAX];
33 char bf[BUFSIZ]; 51 char bf[BUFSIZ];
34 FILE *fp; 52 FILE *fp;
@@ -49,34 +67,39 @@ out_race:
49 return 0; 67 return 0;
50 } 68 }
51 69
52 memset(&ev.comm, 0, sizeof(ev.comm)); 70 memset(&event->comm, 0, sizeof(event->comm));
53 while (!ev.comm.comm[0] || !ev.comm.pid) { 71
54 if (fgets(bf, sizeof(bf), fp) == NULL) 72 while (!event->comm.comm[0] || !event->comm.pid) {
55 goto out_failure; 73 if (fgets(bf, sizeof(bf), fp) == NULL) {
74 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
75 goto out;
76 }
56 77
57 if (memcmp(bf, "Name:", 5) == 0) { 78 if (memcmp(bf, "Name:", 5) == 0) {
58 char *name = bf + 5; 79 char *name = bf + 5;
59 while (*name && isspace(*name)) 80 while (*name && isspace(*name))
60 ++name; 81 ++name;
61 size = strlen(name) - 1; 82 size = strlen(name) - 1;
62 memcpy(ev.comm.comm, name, size++); 83 memcpy(event->comm.comm, name, size++);
63 } else if (memcmp(bf, "Tgid:", 5) == 0) { 84 } else if (memcmp(bf, "Tgid:", 5) == 0) {
64 char *tgids = bf + 5; 85 char *tgids = bf + 5;
65 while (*tgids && isspace(*tgids)) 86 while (*tgids && isspace(*tgids))
66 ++tgids; 87 ++tgids;
67 tgid = ev.comm.pid = atoi(tgids); 88 tgid = event->comm.pid = atoi(tgids);
68 } 89 }
69 } 90 }
70 91
71 ev.comm.header.type = PERF_RECORD_COMM; 92 event->comm.header.type = PERF_RECORD_COMM;
72 size = ALIGN(size, sizeof(u64)); 93 size = ALIGN(size, sizeof(u64));
73 ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); 94 memset(event->comm.comm + size, 0, session->id_hdr_size);
74 95 event->comm.header.size = (sizeof(event->comm) -
96 (sizeof(event->comm.comm) - size) +
97 session->id_hdr_size);
75 if (!full) { 98 if (!full) {
76 ev.comm.tid = pid; 99 event->comm.tid = pid;
77 100
78 process(&ev, session); 101 process(event, &synth_sample, session);
79 goto out_fclose; 102 goto out;
80 } 103 }
81 104
82 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 105 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
91 if (*end) 114 if (*end)
92 continue; 115 continue;
93 116
94 ev.comm.tid = pid; 117 event->comm.tid = pid;
95 118
96 process(&ev, session); 119 process(event, &synth_sample, session);
97 } 120 }
98 closedir(tasks);
99 121
100out_fclose: 122 closedir(tasks);
123out:
101 fclose(fp); 124 fclose(fp);
102 return tgid;
103 125
104out_failure: 126 return tgid;
105 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
106 return -1;
107} 127}
108 128
109static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, 129static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
110 event__handler_t process, 130 event__handler_t process,
111 struct perf_session *session) 131 struct perf_session *session)
112{ 132{
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
124 return -1; 144 return -1;
125 } 145 }
126 146
147 event->header.type = PERF_RECORD_MMAP;
148 /*
149 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
150 */
151 event->header.misc = PERF_RECORD_MISC_USER;
152
127 while (1) { 153 while (1) {
128 char bf[BUFSIZ], *pbf = bf; 154 char bf[BUFSIZ], *pbf = bf;
129 event_t ev = {
130 .header = {
131 .type = PERF_RECORD_MMAP,
132 /*
133 * Just like the kernel, see __perf_event_mmap
134 * in kernel/perf_event.c
135 */
136 .misc = PERF_RECORD_MISC_USER,
137 },
138 };
139 int n; 155 int n;
140 size_t size; 156 size_t size;
141 if (fgets(bf, sizeof(bf), fp) == NULL) 157 if (fgets(bf, sizeof(bf), fp) == NULL)
142 break; 158 break;
143 159
144 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ 160 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
145 n = hex2u64(pbf, &ev.mmap.start); 161 n = hex2u64(pbf, &event->mmap.start);
146 if (n < 0) 162 if (n < 0)
147 continue; 163 continue;
148 pbf += n + 1; 164 pbf += n + 1;
149 n = hex2u64(pbf, &ev.mmap.len); 165 n = hex2u64(pbf, &event->mmap.len);
150 if (n < 0) 166 if (n < 0)
151 continue; 167 continue;
152 pbf += n + 3; 168 pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
161 continue; 177 continue;
162 178
163 pbf += 3; 179 pbf += 3;
164 n = hex2u64(pbf, &ev.mmap.pgoff); 180 n = hex2u64(pbf, &event->mmap.pgoff);
165 181
166 size = strlen(execname); 182 size = strlen(execname);
167 execname[size - 1] = '\0'; /* Remove \n */ 183 execname[size - 1] = '\0'; /* Remove \n */
168 memcpy(ev.mmap.filename, execname, size); 184 memcpy(event->mmap.filename, execname, size);
169 size = ALIGN(size, sizeof(u64)); 185 size = ALIGN(size, sizeof(u64));
170 ev.mmap.len -= ev.mmap.start; 186 event->mmap.len -= event->mmap.start;
171 ev.mmap.header.size = (sizeof(ev.mmap) - 187 event->mmap.header.size = (sizeof(event->mmap) -
172 (sizeof(ev.mmap.filename) - size)); 188 (sizeof(event->mmap.filename) - size));
173 ev.mmap.pid = tgid; 189 memset(event->mmap.filename + size, 0, session->id_hdr_size);
174 ev.mmap.tid = pid; 190 event->mmap.header.size += session->id_hdr_size;
175 191 event->mmap.pid = tgid;
176 process(&ev, session); 192 event->mmap.tid = pid;
193
194 process(event, &synth_sample, session);
177 } 195 }
178 } 196 }
179 197
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
187{ 205{
188 struct rb_node *nd; 206 struct rb_node *nd;
189 struct map_groups *kmaps = &machine->kmaps; 207 struct map_groups *kmaps = &machine->kmaps;
190 u16 misc; 208 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
209
210 if (event == NULL) {
211 pr_debug("Not enough memory synthesizing mmap event "
212 "for kernel modules\n");
213 return -1;
214 }
215
216 event->header.type = PERF_RECORD_MMAP;
191 217
192 /* 218 /*
193 * kernel uses 0 for user space maps, see kernel/perf_event.c 219 * kernel uses 0 for user space maps, see kernel/perf_event.c
194 * __perf_event_mmap 220 * __perf_event_mmap
195 */ 221 */
196 if (machine__is_host(machine)) 222 if (machine__is_host(machine))
197 misc = PERF_RECORD_MISC_KERNEL; 223 event->header.misc = PERF_RECORD_MISC_KERNEL;
198 else 224 else
199 misc = PERF_RECORD_MISC_GUEST_KERNEL; 225 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
200 226
201 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); 227 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
202 nd; nd = rb_next(nd)) { 228 nd; nd = rb_next(nd)) {
203 event_t ev;
204 size_t size; 229 size_t size;
205 struct map *pos = rb_entry(nd, struct map, rb_node); 230 struct map *pos = rb_entry(nd, struct map, rb_node);
206 231
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
208 continue; 233 continue;
209 234
210 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); 235 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
211 memset(&ev, 0, sizeof(ev)); 236 event->mmap.header.type = PERF_RECORD_MMAP;
212 ev.mmap.header.misc = misc; 237 event->mmap.header.size = (sizeof(event->mmap) -
213 ev.mmap.header.type = PERF_RECORD_MMAP; 238 (sizeof(event->mmap.filename) - size));
214 ev.mmap.header.size = (sizeof(ev.mmap) - 239 memset(event->mmap.filename + size, 0, session->id_hdr_size);
215 (sizeof(ev.mmap.filename) - size)); 240 event->mmap.header.size += session->id_hdr_size;
216 ev.mmap.start = pos->start; 241 event->mmap.start = pos->start;
217 ev.mmap.len = pos->end - pos->start; 242 event->mmap.len = pos->end - pos->start;
218 ev.mmap.pid = machine->pid; 243 event->mmap.pid = machine->pid;
219 244
220 memcpy(ev.mmap.filename, pos->dso->long_name, 245 memcpy(event->mmap.filename, pos->dso->long_name,
221 pos->dso->long_name_len + 1); 246 pos->dso->long_name_len + 1);
222 process(&ev, session); 247 process(event, &synth_sample, session);
223 } 248 }
224 249
250 free(event);
225 return 0; 251 return 0;
226} 252}
227 253
228int event__synthesize_thread(pid_t pid, event__handler_t process, 254static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
229 struct perf_session *session) 255 pid_t pid, event__handler_t process,
256 struct perf_session *session)
230{ 257{
231 pid_t tgid = event__synthesize_comm(pid, 1, process, session); 258 pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
259 session);
232 if (tgid == -1) 260 if (tgid == -1)
233 return -1; 261 return -1;
234 return event__synthesize_mmap_events(pid, tgid, process, session); 262 return event__synthesize_mmap_events(mmap_event, pid, tgid,
263 process, session);
264}
265
266int event__synthesize_thread(pid_t pid, event__handler_t process,
267 struct perf_session *session)
268{
269 event_t *comm_event, *mmap_event;
270 int err = -1;
271
272 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
273 if (comm_event == NULL)
274 goto out;
275
276 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
277 if (mmap_event == NULL)
278 goto out_free_comm;
279
280 err = __event__synthesize_thread(comm_event, mmap_event, pid,
281 process, session);
282 free(mmap_event);
283out_free_comm:
284 free(comm_event);
285out:
286 return err;
235} 287}
236 288
237void event__synthesize_threads(event__handler_t process, 289int event__synthesize_threads(event__handler_t process,
238 struct perf_session *session) 290 struct perf_session *session)
239{ 291{
240 DIR *proc; 292 DIR *proc;
241 struct dirent dirent, *next; 293 struct dirent dirent, *next;
294 event_t *comm_event, *mmap_event;
295 int err = -1;
296
297 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
298 if (comm_event == NULL)
299 goto out;
300
301 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
302 if (mmap_event == NULL)
303 goto out_free_comm;
242 304
243 proc = opendir("/proc"); 305 proc = opendir("/proc");
306 if (proc == NULL)
307 goto out_free_mmap;
244 308
245 while (!readdir_r(proc, &dirent, &next) && next) { 309 while (!readdir_r(proc, &dirent, &next) && next) {
246 char *end; 310 char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
249 if (*end) /* only interested in proper numerical dirents */ 313 if (*end) /* only interested in proper numerical dirents */
250 continue; 314 continue;
251 315
252 event__synthesize_thread(pid, process, session); 316 __event__synthesize_thread(comm_event, mmap_event, pid,
317 process, session);
253 } 318 }
254 319
255 closedir(proc); 320 closedir(proc);
321 err = 0;
322out_free_mmap:
323 free(mmap_event);
324out_free_comm:
325 free(comm_event);
326out:
327 return err;
256} 328}
257 329
258struct process_symbol_args { 330struct process_symbol_args {
@@ -260,7 +332,8 @@ struct process_symbol_args {
260 u64 start; 332 u64 start;
261}; 333};
262 334
263static int find_symbol_cb(void *arg, const char *name, char type, u64 start) 335static int find_symbol_cb(void *arg, const char *name, char type,
336 u64 start, u64 end __used)
264{ 337{
265 struct process_symbol_args *args = arg; 338 struct process_symbol_args *args = arg;
266 339
@@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
286 char path[PATH_MAX]; 359 char path[PATH_MAX];
287 char name_buff[PATH_MAX]; 360 char name_buff[PATH_MAX];
288 struct map *map; 361 struct map *map;
289 362 int err;
290 event_t ev = {
291 .header = {
292 .type = PERF_RECORD_MMAP,
293 },
294 };
295 /* 363 /*
296 * We should get this from /sys/kernel/sections/.text, but till that is 364 * We should get this from /sys/kernel/sections/.text, but till that is
297 * available use this, and after it is use this as a fallback for older 365 * available use this, and after it is use this as a fallback for older
298 * kernels. 366 * kernels.
299 */ 367 */
300 struct process_symbol_args args = { .name = symbol_name, }; 368 struct process_symbol_args args = { .name = symbol_name, };
369 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
370
371 if (event == NULL) {
372 pr_debug("Not enough memory synthesizing mmap event "
373 "for kernel modules\n");
374 return -1;
375 }
301 376
302 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); 377 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
303 if (machine__is_host(machine)) { 378 if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
305 * kernel uses PERF_RECORD_MISC_USER for user space maps, 380 * kernel uses PERF_RECORD_MISC_USER for user space maps,
306 * see kernel/perf_event.c __perf_event_mmap 381 * see kernel/perf_event.c __perf_event_mmap
307 */ 382 */
308 ev.header.misc = PERF_RECORD_MISC_KERNEL; 383 event->header.misc = PERF_RECORD_MISC_KERNEL;
309 filename = "/proc/kallsyms"; 384 filename = "/proc/kallsyms";
310 } else { 385 } else {
311 ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 386 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
312 if (machine__is_default_guest(machine)) 387 if (machine__is_default_guest(machine))
313 filename = (char *) symbol_conf.default_guest_kallsyms; 388 filename = (char *) symbol_conf.default_guest_kallsyms;
314 else { 389 else {
@@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
321 return -ENOENT; 396 return -ENOENT;
322 397
323 map = machine->vmlinux_maps[MAP__FUNCTION]; 398 map = machine->vmlinux_maps[MAP__FUNCTION];
324 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), 399 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
325 "%s%s", mmap_name, symbol_name) + 1; 400 "%s%s", mmap_name, symbol_name) + 1;
326 size = ALIGN(size, sizeof(u64)); 401 size = ALIGN(size, sizeof(u64));
327 ev.mmap.header.size = (sizeof(ev.mmap) - 402 event->mmap.header.type = PERF_RECORD_MMAP;
328 (sizeof(ev.mmap.filename) - size)); 403 event->mmap.header.size = (sizeof(event->mmap) -
329 ev.mmap.pgoff = args.start; 404 (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
330 ev.mmap.start = map->start; 405 event->mmap.pgoff = args.start;
331 ev.mmap.len = map->end - ev.mmap.start; 406 event->mmap.start = map->start;
332 ev.mmap.pid = machine->pid; 407 event->mmap.len = map->end - event->mmap.start;
333 408 event->mmap.pid = machine->pid;
334 return process(&ev, session); 409
410 err = process(event, &synth_sample, session);
411 free(event);
412
413 return err;
335} 414}
336 415
337static void thread__comm_adjust(struct thread *self, struct hists *hists) 416static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
361 return 0; 440 return 0;
362} 441}
363 442
364int event__process_comm(event_t *self, struct perf_session *session) 443int event__process_comm(event_t *self, struct sample_data *sample __used,
444 struct perf_session *session)
365{ 445{
366 struct thread *thread = perf_session__findnew(session, self->comm.tid); 446 struct thread *thread = perf_session__findnew(session, self->comm.tid);
367 447
@@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
376 return 0; 456 return 0;
377} 457}
378 458
379int event__process_lost(event_t *self, struct perf_session *session) 459int event__process_lost(event_t *self, struct sample_data *sample __used,
460 struct perf_session *session)
380{ 461{
381 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); 462 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
382 session->hists.stats.total_lost += self->lost.lost; 463 session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
392 * a zero sized synthesized MMAP event for the kernel. 473 * a zero sized synthesized MMAP event for the kernel.
393 */ 474 */
394 if (maps[MAP__FUNCTION]->end == 0) 475 if (maps[MAP__FUNCTION]->end == 0)
395 maps[MAP__FUNCTION]->end = ~0UL; 476 maps[MAP__FUNCTION]->end = ~0ULL;
396} 477}
397 478
398static int event__process_kernel_mmap(event_t *self, 479static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@ out_problem:
485 return -1; 566 return -1;
486} 567}
487 568
488int event__process_mmap(event_t *self, struct perf_session *session) 569int event__process_mmap(event_t *self, struct sample_data *sample __used,
570 struct perf_session *session)
489{ 571{
490 struct machine *machine; 572 struct machine *machine;
491 struct thread *thread; 573 struct thread *thread;
@@ -526,7 +608,8 @@ out_problem:
526 return 0; 608 return 0;
527} 609}
528 610
529int event__process_task(event_t *self, struct perf_session *session) 611int event__process_task(event_t *self, struct sample_data *sample __used,
612 struct perf_session *session)
530{ 613{
531 struct thread *thread = perf_session__findnew(session, self->fork.tid); 614 struct thread *thread = perf_session__findnew(session, self->fork.tid);
532 struct thread *parent = perf_session__findnew(session, self->fork.ptid); 615 struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session)
548 return 0; 631 return 0;
549} 632}
550 633
551int event__process(event_t *event, struct perf_session *session) 634int event__process(event_t *event, struct sample_data *sample,
635 struct perf_session *session)
552{ 636{
553 switch (event->header.type) { 637 switch (event->header.type) {
554 case PERF_RECORD_COMM: 638 case PERF_RECORD_COMM:
555 event__process_comm(event, session); 639 event__process_comm(event, sample, session);
556 break; 640 break;
557 case PERF_RECORD_MMAP: 641 case PERF_RECORD_MMAP:
558 event__process_mmap(event, session); 642 event__process_mmap(event, sample, session);
559 break; 643 break;
560 case PERF_RECORD_FORK: 644 case PERF_RECORD_FORK:
561 case PERF_RECORD_EXIT: 645 case PERF_RECORD_EXIT:
562 event__process_task(event, session); 646 event__process_task(event, sample, session);
563 break; 647 break;
564 default: 648 default:
565 break; 649 break;
@@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
674 symbol_filter_t filter) 758 symbol_filter_t filter)
675{ 759{
676 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 760 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
677 struct thread *thread; 761 struct thread *thread = perf_session__findnew(session, self->ip.pid);
678
679 event__parse_sample(self, session->sample_type, data);
680
681 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
682 self->header.misc, data->pid, data->tid, data->ip,
683 data->period, data->cpu);
684
685 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
686 unsigned int i;
687
688 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
689 762
690 if (!ip_callchain__valid(data->callchain, self)) {
691 pr_debug("call-chain problem with event, "
692 "skipping it.\n");
693 goto out_filtered;
694 }
695
696 if (dump_trace) {
697 for (i = 0; i < data->callchain->nr; i++)
698 dump_printf("..... %2d: %016Lx\n",
699 i, data->callchain->ips[i]);
700 }
701 }
702 thread = perf_session__findnew(session, self->ip.pid);
703 if (thread == NULL) 763 if (thread == NULL)
704 return -1; 764 return -1;
705 765
@@ -766,9 +826,65 @@ out_filtered:
766 return 0; 826 return 0;
767} 827}
768 828
769int event__parse_sample(const event_t *event, u64 type, struct sample_data *data) 829static int event__parse_id_sample(const event_t *event,
830 struct perf_session *session,
831 struct sample_data *sample)
770{ 832{
771 const u64 *array = event->sample.array; 833 const u64 *array;
834 u64 type;
835
836 sample->cpu = sample->pid = sample->tid = -1;
837 sample->stream_id = sample->id = sample->time = -1ULL;
838
839 if (!session->sample_id_all)
840 return 0;
841
842 array = event->sample.array;
843 array += ((event->header.size -
844 sizeof(event->header)) / sizeof(u64)) - 1;
845 type = session->sample_type;
846
847 if (type & PERF_SAMPLE_CPU) {
848 u32 *p = (u32 *)array;
849 sample->cpu = *p;
850 array--;
851 }
852
853 if (type & PERF_SAMPLE_STREAM_ID) {
854 sample->stream_id = *array;
855 array--;
856 }
857
858 if (type & PERF_SAMPLE_ID) {
859 sample->id = *array;
860 array--;
861 }
862
863 if (type & PERF_SAMPLE_TIME) {
864 sample->time = *array;
865 array--;
866 }
867
868 if (type & PERF_SAMPLE_TID) {
869 u32 *p = (u32 *)array;
870 sample->pid = p[0];
871 sample->tid = p[1];
872 }
873
874 return 0;
875}
876
877int event__parse_sample(const event_t *event, struct perf_session *session,
878 struct sample_data *data)
879{
880 const u64 *array;
881 u64 type;
882
883 if (event->header.type != PERF_RECORD_SAMPLE)
884 return event__parse_id_sample(event, session, data);
885
886 array = event->sample.array;
887 type = session->sample_type;
772 888
773 if (type & PERF_SAMPLE_IP) { 889 if (type & PERF_SAMPLE_IP) {
774 data->ip = event->ip.ip; 890 data->ip = event->ip.ip;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8e790dae7026..2b7e91902f10 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -85,6 +85,7 @@ struct build_id_event {
85}; 85};
86 86
87enum perf_user_event_type { /* above any possible kernel type */ 87enum perf_user_event_type { /* above any possible kernel type */
88 PERF_RECORD_USER_TYPE_START = 64,
88 PERF_RECORD_HEADER_ATTR = 64, 89 PERF_RECORD_HEADER_ATTR = 64,
89 PERF_RECORD_HEADER_EVENT_TYPE = 65, 90 PERF_RECORD_HEADER_EVENT_TYPE = 65,
90 PERF_RECORD_HEADER_TRACING_DATA = 66, 91 PERF_RECORD_HEADER_TRACING_DATA = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
135 136
136struct perf_session; 137struct perf_session;
137 138
138typedef int (*event__handler_t)(event_t *event, struct perf_session *session); 139typedef int (*event__handler_synth_t)(event_t *event,
140 struct perf_session *session);
141typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
142 struct perf_session *session);
139 143
140int event__synthesize_thread(pid_t pid, event__handler_t process, 144int event__synthesize_thread(pid_t pid, event__handler_t process,
141 struct perf_session *session); 145 struct perf_session *session);
142void event__synthesize_threads(event__handler_t process, 146int event__synthesize_threads(event__handler_t process,
143 struct perf_session *session); 147 struct perf_session *session);
144int event__synthesize_kernel_mmap(event__handler_t process, 148int event__synthesize_kernel_mmap(event__handler_t process,
145 struct perf_session *session, 149 struct perf_session *session,
146 struct machine *machine, 150 struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
150 struct perf_session *session, 154 struct perf_session *session,
151 struct machine *machine); 155 struct machine *machine);
152 156
153int event__process_comm(event_t *self, struct perf_session *session); 157int event__process_comm(event_t *self, struct sample_data *sample,
154int event__process_lost(event_t *self, struct perf_session *session); 158 struct perf_session *session);
155int event__process_mmap(event_t *self, struct perf_session *session); 159int event__process_lost(event_t *self, struct sample_data *sample,
156int event__process_task(event_t *self, struct perf_session *session); 160 struct perf_session *session);
157int event__process(event_t *event, struct perf_session *session); 161int event__process_mmap(event_t *self, struct sample_data *sample,
162 struct perf_session *session);
163int event__process_task(event_t *self, struct sample_data *sample,
164 struct perf_session *session);
165int event__process(event_t *event, struct sample_data *sample,
166 struct perf_session *session);
158 167
159struct addr_location; 168struct addr_location;
160int event__preprocess_sample(const event_t *self, struct perf_session *session, 169int event__preprocess_sample(const event_t *self, struct perf_session *session,
161 struct addr_location *al, struct sample_data *data, 170 struct addr_location *al, struct sample_data *data,
162 symbol_filter_t filter); 171 symbol_filter_t filter);
163int event__parse_sample(const event_t *event, u64 type, struct sample_data *data); 172int event__parse_sample(const event_t *event, struct perf_session *session,
173 struct sample_data *sample);
164 174
165extern const char *event__name[]; 175const char *event__get_event_name(unsigned int id);
166 176
167#endif /* __PERF_RECORD_H */ 177#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7cba0551a565..4b8c8397a947 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
152 set_bit(feat, self->adds_features); 152 set_bit(feat, self->adds_features);
153} 153}
154 154
155void perf_header__clear_feat(struct perf_header *self, int feat)
156{
157 clear_bit(feat, self->adds_features);
158}
159
155bool perf_header__has_feat(const struct perf_header *self, int feat) 160bool perf_header__has_feat(const struct perf_header *self, int feat)
156{ 161{
157 return test_bit(feat, self->adds_features); 162 return test_bit(feat, self->adds_features);
@@ -433,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
433 int idx = 0, err; 438 int idx = 0, err;
434 439
435 session = container_of(self, struct perf_session, header); 440 session = container_of(self, struct perf_session, header);
436 if (perf_session__read_build_ids(session, true)) 441
437 perf_header__set_feat(self, HEADER_BUILD_ID); 442 if (perf_header__has_feat(self, HEADER_BUILD_ID &&
443 !perf_session__read_build_ids(session, true)))
444 perf_header__clear_feat(self, HEADER_BUILD_ID);
438 445
439 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 446 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
440 if (!nr_sections) 447 if (!nr_sections)
@@ -941,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header)
941 return type; 948 return type;
942} 949}
943 950
951bool perf_header__sample_id_all(const struct perf_header *header)
952{
953 bool value = false, first = true;
954 int i;
955
956 for (i = 0; i < header->attrs; i++) {
957 struct perf_header_attr *attr = header->attr[i];
958
959 if (first) {
960 value = attr->attr.sample_id_all;
961 first = false;
962 } else if (value != attr->attr.sample_id_all)
963 die("non matching sample_id_all");
964 }
965
966 return value;
967}
968
944struct perf_event_attr * 969struct perf_event_attr *
945perf_header__find_attr(u64 id, struct perf_header *header) 970perf_header__find_attr(u64 id, struct perf_header *header)
946{ 971{
@@ -987,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
987 1012
988 ev = malloc(size); 1013 ev = malloc(size);
989 1014
1015 if (ev == NULL)
1016 return -ENOMEM;
1017
990 ev->attr.attr = *attr; 1018 ev->attr.attr = *attr;
991 memcpy(ev->attr.id, id, ids * sizeof(u64)); 1019 memcpy(ev->attr.id, id, ids * sizeof(u64));
992 1020
993 ev->attr.header.type = PERF_RECORD_HEADER_ATTR; 1021 ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
994 ev->attr.header.size = size; 1022 ev->attr.header.size = size;
995 1023
996 err = process(ev, session); 1024 err = process(ev, NULL, session);
997 1025
998 free(ev); 1026 free(ev);
999 1027
1000 return err; 1028 return err;
1001} 1029}
1002 1030
1003int event__synthesize_attrs(struct perf_header *self, 1031int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
1004 event__handler_t process,
1005 struct perf_session *session) 1032 struct perf_session *session)
1006{ 1033{
1007 struct perf_header_attr *attr; 1034 struct perf_header_attr *attr;
@@ -1071,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
1071 ev.event_type.header.size = sizeof(ev.event_type) - 1098 ev.event_type.header.size = sizeof(ev.event_type) -
1072 (sizeof(ev.event_type.event_type.name) - size); 1099 (sizeof(ev.event_type.event_type.name) - size);
1073 1100
1074 err = process(&ev, session); 1101 err = process(&ev, NULL, session);
1075 1102
1076 return err; 1103 return err;
1077} 1104}
@@ -1126,7 +1153,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
1126 ev.tracing_data.header.size = sizeof(ev.tracing_data); 1153 ev.tracing_data.header.size = sizeof(ev.tracing_data);
1127 ev.tracing_data.size = aligned_size; 1154 ev.tracing_data.size = aligned_size;
1128 1155
1129 process(&ev, session); 1156 process(&ev, NULL, session);
1130 1157
1131 err = read_tracing_data(fd, pattrs, nb_events); 1158 err = read_tracing_data(fd, pattrs, nb_events);
1132 write_padded(fd, NULL, 0, padding); 1159 write_padded(fd, NULL, 0, padding);
@@ -1186,7 +1213,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
1186 ev.build_id.header.size = sizeof(ev.build_id) + len; 1213 ev.build_id.header.size = sizeof(ev.build_id) + len;
1187 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); 1214 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
1188 1215
1189 err = process(&ev, session); 1216 err = process(&ev, NULL, session);
1190 1217
1191 return err; 1218 return err;
1192} 1219}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 402ac2454cf8..6335965e1f93 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
82 82
83u64 perf_header__sample_type(struct perf_header *header); 83u64 perf_header__sample_type(struct perf_header *header);
84bool perf_header__sample_id_all(const struct perf_header *header);
84struct perf_event_attr * 85struct perf_event_attr *
85perf_header__find_attr(u64 id, struct perf_header *header); 86perf_header__find_attr(u64 id, struct perf_header *header);
86void perf_header__set_feat(struct perf_header *self, int feat); 87void perf_header__set_feat(struct perf_header *self, int feat);
88void perf_header__clear_feat(struct perf_header *self, int feat);
87bool perf_header__has_feat(const struct perf_header *self, int feat); 89bool perf_header__has_feat(const struct perf_header *self, int feat);
88 90
89int perf_header__process_sections(struct perf_header *self, int fd, 91int perf_header__process_sections(struct perf_header *self, int fd,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2022e8740994..d5036700a435 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
1092 FILE *file; 1092 FILE *file;
1093 int err = 0; 1093 int err = 0;
1094 u64 len; 1094 u64 len;
1095 char symfs_filename[PATH_MAX];
1096
1097 if (filename) {
1098 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1099 symbol_conf.symfs, filename);
1100 }
1095 1101
1096 if (filename == NULL) { 1102 if (filename == NULL) {
1097 if (dso->has_build_id) { 1103 if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
1100 return -ENOMEM; 1106 return -ENOMEM;
1101 } 1107 }
1102 goto fallback; 1108 goto fallback;
1103 } else if (readlink(filename, command, sizeof(command)) < 0 || 1109 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
1104 strstr(command, "[kernel.kallsyms]") || 1110 strstr(command, "[kernel.kallsyms]") ||
1105 access(filename, R_OK)) { 1111 access(symfs_filename, R_OK)) {
1106 free(filename); 1112 free(filename);
1107fallback: 1113fallback:
1108 /* 1114 /*
@@ -1111,6 +1117,8 @@ fallback:
1111 * DSO is the same as when 'perf record' ran. 1117 * DSO is the same as when 'perf record' ran.
1112 */ 1118 */
1113 filename = dso->long_name; 1119 filename = dso->long_name;
1120 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1121 symbol_conf.symfs, filename);
1114 free_filename = false; 1122 free_filename = false;
1115 } 1123 }
1116 1124
@@ -1137,7 +1145,7 @@ fallback:
1137 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand", 1145 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
1138 map__rip_2objdump(map, sym->start), 1146 map__rip_2objdump(map, sym->start),
1139 map__rip_2objdump(map, sym->end), 1147 map__rip_2objdump(map, sym->end),
1140 filename, filename); 1148 symfs_filename, filename);
1141 1149
1142 pr_debug("Executing: %s\n", command); 1150 pr_debug("Executing: %s\n", command);
1143 1151
@@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
1168 size_t ret = 0; 1176 size_t ret = 0;
1169 1177
1170 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 1178 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
1171 if (!event__name[i]) 1179 const char *name = event__get_event_name(i);
1180
1181 if (!strcmp(name, "UNKNOWN"))
1172 continue; 1182 continue;
1173 ret += fprintf(fp, "%10s events: %10d\n", 1183
1174 event__name[i], self->stats.nr_events[i]); 1184 ret += fprintf(fp, "%16s events: %10d\n", name,
1185 self->stats.nr_events[i]);
1175 } 1186 }
1176 1187
1177 return ret; 1188 return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 587d375d3430..ee789856a8c9 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,8 +52,10 @@ struct sym_priv {
52struct events_stats { 52struct events_stats {
53 u64 total_period; 53 u64 total_period;
54 u64 total_lost; 54 u64 total_lost;
55 u64 total_invalid_chains;
55 u32 nr_events[PERF_RECORD_HEADER_MAX]; 56 u32 nr_events[PERF_RECORD_HEADER_MAX];
56 u32 nr_unknown_events; 57 u32 nr_unknown_events;
58 u32 nr_invalid_chains;
57}; 59};
58 60
59enum hist_column { 61enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000000..acffd5e4d1d4
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,9 @@
1
2#ifndef PERF_CPUFEATURE_H
3#define PERF_CPUFEATURE_H
4
5/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define X86_FEATURE_REP_GOOD 0
8
9#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000000..bb4198e7837a
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,11 @@
1
2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H
4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define CFI_STARTPROC
8#define CFI_ENDPROC
9
10#endif /* PERF_DWARF2_H */
11
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index bb4ac2e05385..8be0b968ca0b 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); 13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
14} 14}
15 15
16static inline void clear_bit(int nr, unsigned long *addr)
17{
18 addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
19}
20
16static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) 21static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
17{ 22{
18 return ((1UL << (nr % BITS_PER_LONG)) & 23 return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000000..06387cffe125
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,13 @@
1
2#ifndef PERF_LINUX_LINKAGE_H_
3#define PERF_LINUX_LINKAGE_H_
4
5/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
6
7#define ENTRY(name) \
8 .globl name; \
9 name:
10
11#define ENDPROC(name)
12
13#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4af5bd59cfd1..c305305a3884 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
434 id = atoll(id_buf); 434 id = atoll(id_buf);
435 attr->config = id; 435 attr->config = id;
436 attr->type = PERF_TYPE_TRACEPOINT; 436 attr->type = PERF_TYPE_TRACEPOINT;
437 *strp = evt_name + evt_length; 437 *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
438 438
439 attr->sample_type |= PERF_SAMPLE_RAW; 439 attr->sample_type |= PERF_SAMPLE_RAW;
440 attr->sample_type |= PERF_SAMPLE_TIME; 440 attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
495 struct perf_event_attr *attr) 495 struct perf_event_attr *attr)
496{ 496{
497 const char *evt_name; 497 const char *evt_name;
498 char *flags; 498 char *flags = NULL, *comma_loc;
499 char sys_name[MAX_EVENT_LENGTH]; 499 char sys_name[MAX_EVENT_LENGTH];
500 unsigned int sys_length, evt_length; 500 unsigned int sys_length, evt_length;
501 501
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
514 sys_name[sys_length] = '\0'; 514 sys_name[sys_length] = '\0';
515 evt_name = evt_name + 1; 515 evt_name = evt_name + 1;
516 516
517 comma_loc = strchr(evt_name, ',');
518 if (comma_loc) {
519 /* take the event name up to the comma */
520 evt_name = strndup(evt_name, comma_loc - evt_name);
521 }
517 flags = strchr(evt_name, ':'); 522 flags = strchr(evt_name, ':');
518 if (flags) { 523 if (flags) {
519 /* split it out: */ 524 /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
524 evt_length = strlen(evt_name); 529 evt_length = strlen(evt_name);
525 if (evt_length >= MAX_EVENT_LENGTH) 530 if (evt_length >= MAX_EVENT_LENGTH)
526 return EVT_FAILED; 531 return EVT_FAILED;
527
528 if (strpbrk(evt_name, "*?")) { 532 if (strpbrk(evt_name, "*?")) {
529 *strp = evt_name + evt_length; 533 *strp += strlen(sys_name) + evt_length;
530 return parse_multiple_tracepoint_event(sys_name, evt_name, 534 return parse_multiple_tracepoint_event(sys_name, evt_name,
531 flags); 535 flags);
532 } else 536 } else
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index c7d72dce54b2..abc31a1dac1a 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -119,6 +119,10 @@ struct option {
119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } 119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ 120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } 121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
122#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
123 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
124 .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
125 .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
122 126
123/* parse_options() will filter out the processed options and leave the 127/* parse_options() will filter out the processed options and leave the
124 * non-option argments in argv[]. 128 * non-option argments in argv[].
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 61191c6cbe7a..128aaab0aeda 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -95,7 +95,7 @@ static int init_vmlinux(void)
95 goto out; 95 goto out;
96 96
97 if (machine__create_kernel_maps(&machine) < 0) { 97 if (machine__create_kernel_maps(&machine) < 0) {
98 pr_debug("machine__create_kernel_maps "); 98 pr_debug("machine__create_kernel_maps() failed.\n");
99 goto out; 99 goto out;
100 } 100 }
101out: 101out:
@@ -149,7 +149,8 @@ static int open_vmlinux(const char *module)
149{ 149{
150 const char *path = kernel_get_module_path(module); 150 const char *path = kernel_get_module_path(module);
151 if (!path) { 151 if (!path) {
152 pr_err("Failed to find path of %s module", module ?: "kernel"); 152 pr_err("Failed to find path of %s module.\n",
153 module ?: "kernel");
153 return -ENOENT; 154 return -ENOENT;
154 } 155 }
155 pr_debug("Try to open %s\n", path); 156 pr_debug("Try to open %s\n", path);
@@ -226,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
226 pr_warning("Warning: No dwarf info found in the vmlinux - " 227 pr_warning("Warning: No dwarf info found in the vmlinux - "
227 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); 228 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
228 if (!need_dwarf) { 229 if (!need_dwarf) {
229 pr_debug("Trying to use symbols.\nn"); 230 pr_debug("Trying to use symbols.\n");
230 return 0; 231 return 0;
231 } 232 }
232 } 233 }
@@ -295,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir,
295#define LINEBUF_SIZE 256 296#define LINEBUF_SIZE 256
296#define NR_ADDITIONAL_LINES 2 297#define NR_ADDITIONAL_LINES 2
297 298
298static int show_one_line(FILE *fp, int l, bool skip, bool show_num) 299static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
299{ 300{
300 char buf[LINEBUF_SIZE]; 301 char buf[LINEBUF_SIZE];
301 const char *color = PERF_COLOR_BLUE; 302 const char *color = show_num ? "" : PERF_COLOR_BLUE;
303 const char *prefix = NULL;
302 304
303 if (fgets(buf, LINEBUF_SIZE, fp) == NULL) 305 do {
304 goto error;
305 if (!skip) {
306 if (show_num)
307 fprintf(stdout, "%7d %s", l, buf);
308 else
309 color_fprintf(stdout, color, " %s", buf);
310 }
311
312 while (strlen(buf) == LINEBUF_SIZE - 1 &&
313 buf[LINEBUF_SIZE - 2] != '\n') {
314 if (fgets(buf, LINEBUF_SIZE, fp) == NULL) 306 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
315 goto error; 307 goto error;
316 if (!skip) { 308 if (skip)
317 if (show_num) 309 continue;
318 fprintf(stdout, "%s", buf); 310 if (!prefix) {
319 else 311 prefix = show_num ? "%7d " : " ";
320 color_fprintf(stdout, color, "%s", buf); 312 color_fprintf(stdout, color, prefix, l);
321 } 313 }
322 } 314 color_fprintf(stdout, color, "%s", buf);
323 315
324 return 0; 316 } while (strchr(buf, '\n') == NULL);
317
318 return 1;
325error: 319error:
326 if (feof(fp)) 320 if (ferror(fp)) {
327 pr_warning("Source file is shorter than expected.\n");
328 else
329 pr_warning("File read error: %s\n", strerror(errno)); 321 pr_warning("File read error: %s\n", strerror(errno));
322 return -1;
323 }
324 return 0;
325}
330 326
331 return -1; 327static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
328{
329 int rv = __show_one_line(fp, l, skip, show_num);
330 if (rv == 0) {
331 pr_warning("Source file is shorter than expected.\n");
332 rv = -1;
333 }
334 return rv;
332} 335}
333 336
337#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true)
338#define show_one_line(f,l) _show_one_line(f,l,false,false)
339#define skip_one_line(f,l) _show_one_line(f,l,true,false)
340#define show_one_line_or_eof(f,l) __show_one_line(f,l,false,false)
341
334/* 342/*
335 * Show line-range always requires debuginfo to find source file and 343 * Show line-range always requires debuginfo to find source file and
336 * line number. 344 * line number.
@@ -379,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module)
379 fprintf(stdout, "<%s:%d>\n", lr->function, 387 fprintf(stdout, "<%s:%d>\n", lr->function,
380 lr->start - lr->offset); 388 lr->start - lr->offset);
381 else 389 else
382 fprintf(stdout, "<%s:%d>\n", lr->file, lr->start); 390 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
383 391
384 fp = fopen(lr->path, "r"); 392 fp = fopen(lr->path, "r");
385 if (fp == NULL) { 393 if (fp == NULL) {
@@ -388,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module)
388 return -errno; 396 return -errno;
389 } 397 }
390 /* Skip to starting line number */ 398 /* Skip to starting line number */
391 while (l < lr->start && ret >= 0) 399 while (l < lr->start) {
392 ret = show_one_line(fp, l++, true, false); 400 ret = skip_one_line(fp, l++);
393 if (ret < 0) 401 if (ret < 0)
394 goto end; 402 goto end;
403 }
395 404
396 list_for_each_entry(ln, &lr->line_list, list) { 405 list_for_each_entry(ln, &lr->line_list, list) {
397 while (ln->line > l && ret >= 0) 406 for (; ln->line > l; l++) {
398 ret = show_one_line(fp, (l++) - lr->offset, 407 ret = show_one_line(fp, l - lr->offset);
399 false, false); 408 if (ret < 0)
400 if (ret >= 0) 409 goto end;
401 ret = show_one_line(fp, (l++) - lr->offset, 410 }
402 false, true); 411 ret = show_one_line_with_num(fp, l++ - lr->offset);
403 if (ret < 0) 412 if (ret < 0)
404 goto end; 413 goto end;
405 } 414 }
406 415
407 if (lr->end == INT_MAX) 416 if (lr->end == INT_MAX)
408 lr->end = l + NR_ADDITIONAL_LINES; 417 lr->end = l + NR_ADDITIONAL_LINES;
409 while (l <= lr->end && !feof(fp) && ret >= 0) 418 while (l <= lr->end) {
410 ret = show_one_line(fp, (l++) - lr->offset, false, false); 419 ret = show_one_line_or_eof(fp, l++ - lr->offset);
420 if (ret <= 0)
421 break;
422 }
411end: 423end:
412 fclose(fp); 424 fclose(fp);
413 return ret; 425 return ret;
@@ -466,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
466 478
467 fd = open_vmlinux(module); 479 fd = open_vmlinux(module);
468 if (fd < 0) { 480 if (fd < 0) {
469 pr_warning("Failed to open debuginfo file.\n"); 481 pr_warning("Failed to open debug information file.\n");
470 return fd; 482 return fd;
471 } 483 }
472 484
@@ -526,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused,
526} 538}
527#endif 539#endif
528 540
541static int parse_line_num(char **ptr, int *val, const char *what)
542{
543 const char *start = *ptr;
544
545 errno = 0;
546 *val = strtol(*ptr, ptr, 0);
547 if (errno || *ptr == start) {
548 semantic_error("'%s' is not a valid number.\n", what);
549 return -EINVAL;
550 }
551 return 0;
552}
553
554/*
555 * Stuff 'lr' according to the line range described by 'arg'.
556 * The line range syntax is described by:
557 *
558 * SRC[:SLN[+NUM|-ELN]]
559 * FNC[:SLN[+NUM|-ELN]]
560 */
529int parse_line_range_desc(const char *arg, struct line_range *lr) 561int parse_line_range_desc(const char *arg, struct line_range *lr)
530{ 562{
531 const char *ptr; 563 char *range, *name = strdup(arg);
532 char *tmp; 564 int err;
533 /* 565
534 * <Syntax> 566 if (!name)
535 * SRC:SLN[+NUM|-ELN] 567 return -ENOMEM;
536 * FUNC[:SLN[+NUM|-ELN]] 568
537 */ 569 lr->start = 0;
538 ptr = strchr(arg, ':'); 570 lr->end = INT_MAX;
539 if (ptr) { 571
540 lr->start = (int)strtoul(ptr + 1, &tmp, 0); 572 range = strchr(name, ':');
541 if (*tmp == '+') { 573 if (range) {
542 lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0); 574 *range++ = '\0';
543 lr->end--; /* 575
544 * Adjust the number of lines here. 576 err = parse_line_num(&range, &lr->start, "start line");
545 * If the number of lines == 1, the 577 if (err)
546 * the end of line should be equal to 578 goto err;
547 * the start of line. 579
548 */ 580 if (*range == '+' || *range == '-') {
549 } else if (*tmp == '-') 581 const char c = *range++;
550 lr->end = (int)strtoul(tmp + 1, &tmp, 0); 582
551 else 583 err = parse_line_num(&range, &lr->end, "end line");
552 lr->end = INT_MAX; 584 if (err)
585 goto err;
586
587 if (c == '+') {
588 lr->end += lr->start;
589 /*
590 * Adjust the number of lines here.
591 * If the number of lines == 1, the
592 * the end of line should be equal to
593 * the start of line.
594 */
595 lr->end--;
596 }
597 }
598
553 pr_debug("Line range is %d to %d\n", lr->start, lr->end); 599 pr_debug("Line range is %d to %d\n", lr->start, lr->end);
600
601 err = -EINVAL;
554 if (lr->start > lr->end) { 602 if (lr->start > lr->end) {
555 semantic_error("Start line must be smaller" 603 semantic_error("Start line must be smaller"
556 " than end line.\n"); 604 " than end line.\n");
557 return -EINVAL; 605 goto err;
558 } 606 }
559 if (*tmp != '\0') { 607 if (*range != '\0') {
560 semantic_error("Tailing with invalid character '%d'.\n", 608 semantic_error("Tailing with invalid str '%s'.\n", range);
561 *tmp); 609 goto err;
562 return -EINVAL;
563 } 610 }
564 tmp = strndup(arg, (ptr - arg));
565 } else {
566 tmp = strdup(arg);
567 lr->end = INT_MAX;
568 } 611 }
569 612
570 if (tmp == NULL) 613 if (strchr(name, '.'))
571 return -ENOMEM; 614 lr->file = name;
572
573 if (strchr(tmp, '.'))
574 lr->file = tmp;
575 else 615 else
576 lr->function = tmp; 616 lr->function = name;
577 617
578 return 0; 618 return 0;
619err:
620 free(name);
621 return err;
579} 622}
580 623
581/* Check the name is good for event/group */ 624/* Check the name is good for event/group */
@@ -699,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
699 742
700 /* Exclusion check */ 743 /* Exclusion check */
701 if (pp->lazy_line && pp->line) { 744 if (pp->lazy_line && pp->line) {
702 semantic_error("Lazy pattern can't be used with line number."); 745 semantic_error("Lazy pattern can't be used with"
746 " line number.\n");
703 return -EINVAL; 747 return -EINVAL;
704 } 748 }
705 749
706 if (pp->lazy_line && pp->offset) { 750 if (pp->lazy_line && pp->offset) {
707 semantic_error("Lazy pattern can't be used with offset."); 751 semantic_error("Lazy pattern can't be used with offset.\n");
708 return -EINVAL; 752 return -EINVAL;
709 } 753 }
710 754
711 if (pp->line && pp->offset) { 755 if (pp->line && pp->offset) {
712 semantic_error("Offset can't be used with line number."); 756 semantic_error("Offset can't be used with line number.\n");
713 return -EINVAL; 757 return -EINVAL;
714 } 758 }
715 759
716 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) { 760 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
717 semantic_error("File always requires line number or " 761 semantic_error("File always requires line number or "
718 "lazy pattern."); 762 "lazy pattern.\n");
719 return -EINVAL; 763 return -EINVAL;
720 } 764 }
721 765
722 if (pp->offset && !pp->function) { 766 if (pp->offset && !pp->function) {
723 semantic_error("Offset requires an entry function."); 767 semantic_error("Offset requires an entry function.\n");
724 return -EINVAL; 768 return -EINVAL;
725 } 769 }
726 770
727 if (pp->retprobe && !pp->function) { 771 if (pp->retprobe && !pp->function) {
728 semantic_error("Return probe requires an entry function."); 772 semantic_error("Return probe requires an entry function.\n");
729 return -EINVAL; 773 return -EINVAL;
730 } 774 }
731 775
732 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { 776 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
733 semantic_error("Offset/Line/Lazy pattern can't be used with " 777 semantic_error("Offset/Line/Lazy pattern can't be used with "
734 "return probe."); 778 "return probe.\n");
735 return -EINVAL; 779 return -EINVAL;
736 } 780 }
737 781
@@ -1005,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
1005 1049
1006 return tmp - buf; 1050 return tmp - buf;
1007error: 1051error:
1008 pr_debug("Failed to synthesize perf probe argument: %s", 1052 pr_debug("Failed to synthesize perf probe argument: %s\n",
1009 strerror(-ret)); 1053 strerror(-ret));
1010 return ret; 1054 return ret;
1011} 1055}
@@ -1033,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
1033 goto error; 1077 goto error;
1034 } 1078 }
1035 if (pp->file) { 1079 if (pp->file) {
1036 len = strlen(pp->file) - 31; 1080 tmp = pp->file;
1037 if (len < 0) 1081 len = strlen(tmp);
1038 len = 0; 1082 if (len > 30) {
1039 tmp = strchr(pp->file + len, '/'); 1083 tmp = strchr(pp->file + len - 30, '/');
1040 if (!tmp) 1084 tmp = tmp ? tmp + 1 : pp->file + len - 30;
1041 tmp = pp->file + len; 1085 }
1042 ret = e_snprintf(file, 32, "@%s", tmp + 1); 1086 ret = e_snprintf(file, 32, "@%s", tmp);
1043 if (ret <= 0) 1087 if (ret <= 0)
1044 goto error; 1088 goto error;
1045 } 1089 }
@@ -1055,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
1055 1099
1056 return buf; 1100 return buf;
1057error: 1101error:
1058 pr_debug("Failed to synthesize perf probe point: %s", 1102 pr_debug("Failed to synthesize perf probe point: %s\n",
1059 strerror(-ret)); 1103 strerror(-ret));
1060 if (buf) 1104 if (buf)
1061 free(buf); 1105 free(buf);
@@ -1796,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group,
1796 1840
1797 ret = e_snprintf(buf, 128, "%s:%s", group, event); 1841 ret = e_snprintf(buf, 128, "%s:%s", group, event);
1798 if (ret < 0) { 1842 if (ret < 0) {
1799 pr_err("Failed to copy event."); 1843 pr_err("Failed to copy event.\n");
1800 return ret; 1844 return ret;
1801 } 1845 }
1802 1846
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ddf4d4556321..ab83b6ac5d65 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -652,8 +652,8 @@ static_var:
652 regs = get_arch_regstr(regn); 652 regs = get_arch_regstr(regn);
653 if (!regs) { 653 if (!regs) {
654 /* This should be a bug in DWARF or this tool */ 654 /* This should be a bug in DWARF or this tool */
655 pr_warning("Mapping for DWARF register number %u " 655 pr_warning("Mapping for the register number %u "
656 "missing on this architecture.", regn); 656 "missing on this architecture.\n", regn);
657 return -ERANGE; 657 return -ERANGE;
658 } 658 }
659 659
@@ -699,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
699 if (ret != DW_TAG_pointer_type && 699 if (ret != DW_TAG_pointer_type &&
700 ret != DW_TAG_array_type) { 700 ret != DW_TAG_array_type) {
701 pr_warning("Failed to cast into string: " 701 pr_warning("Failed to cast into string: "
702 "%s(%s) is not a pointer nor array.", 702 "%s(%s) is not a pointer nor array.\n",
703 dwarf_diename(vr_die), dwarf_diename(&type)); 703 dwarf_diename(vr_die), dwarf_diename(&type));
704 return -EINVAL; 704 return -EINVAL;
705 } 705 }
706 if (ret == DW_TAG_pointer_type) { 706 if (ret == DW_TAG_pointer_type) {
707 if (die_get_real_type(&type, &type) == NULL) { 707 if (die_get_real_type(&type, &type) == NULL) {
708 pr_warning("Failed to get a type information."); 708 pr_warning("Failed to get a type"
709 " information.\n");
709 return -ENOENT; 710 return -ENOENT;
710 } 711 }
711 while (*ref_ptr) 712 while (*ref_ptr)
@@ -720,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
720 if (!die_compare_name(&type, "char") && 721 if (!die_compare_name(&type, "char") &&
721 !die_compare_name(&type, "unsigned char")) { 722 !die_compare_name(&type, "unsigned char")) {
722 pr_warning("Failed to cast into string: " 723 pr_warning("Failed to cast into string: "
723 "%s is not (unsigned) char *.", 724 "%s is not (unsigned) char *.\n",
724 dwarf_diename(vr_die)); 725 dwarf_diename(vr_die));
725 return -EINVAL; 726 return -EINVAL;
726 } 727 }
@@ -830,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
830 return -EINVAL; 831 return -EINVAL;
831 } 832 }
832 if (field->name[0] == '[') { 833 if (field->name[0] == '[') {
833 pr_err("Semantic error: %s is not a pointor nor array.", 834 pr_err("Semantic error: %s is not a pointor"
834 varname); 835 " nor array.\n", varname);
835 return -EINVAL; 836 return -EINVAL;
836 } 837 }
837 if (field->ref) { 838 if (field->ref) {
@@ -978,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
978 name = dwarf_diename(sp_die); 979 name = dwarf_diename(sp_die);
979 if (name) { 980 if (name) {
980 if (dwarf_entrypc(sp_die, &eaddr) != 0) { 981 if (dwarf_entrypc(sp_die, &eaddr) != 0) {
981 pr_warning("Failed to get entry pc of %s\n", 982 pr_warning("Failed to get entry address of %s\n",
982 dwarf_diename(sp_die)); 983 dwarf_diename(sp_die));
983 return -ENOENT; 984 return -ENOENT;
984 } 985 }
@@ -994,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
994 if (retprobe) { 995 if (retprobe) {
995 if (eaddr != paddr) { 996 if (eaddr != paddr) {
996 pr_warning("Return probe must be on the head of" 997 pr_warning("Return probe must be on the head of"
997 " a real function\n"); 998 " a real function.\n");
998 return -EINVAL; 999 return -EINVAL;
999 } 1000 }
1000 tp->retprobe = true; 1001 tp->retprobe = true;
@@ -1033,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
1033 Dwarf_Frame *frame; 1034 Dwarf_Frame *frame;
1034 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 || 1035 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
1035 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) { 1036 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
1036 pr_warning("Failed to get CFA on 0x%jx\n", 1037 pr_warning("Failed to get call frame on 0x%jx\n",
1037 (uintmax_t)pf->addr); 1038 (uintmax_t)pf->addr);
1038 return -ENOENT; 1039 return -ENOENT;
1039 } 1040 }
@@ -1060,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
1060 int ret = 0; 1061 int ret = 0;
1061 1062
1062 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1063 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1063 pr_warning("No source lines found in this CU.\n"); 1064 pr_warning("No source lines found.\n");
1064 return -ENOENT; 1065 return -ENOENT;
1065 } 1066 }
1066 1067
@@ -1162,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
1162 } 1163 }
1163 1164
1164 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1165 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1165 pr_warning("No source lines found in this CU.\n"); 1166 pr_warning("No source lines found.\n");
1166 return -ENOENT; 1167 return -ENOENT;
1167 } 1168 }
1168 1169
@@ -1220,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
1220 else { 1221 else {
1221 /* Get probe address */ 1222 /* Get probe address */
1222 if (dwarf_entrypc(in_die, &addr) != 0) { 1223 if (dwarf_entrypc(in_die, &addr) != 0) {
1223 pr_warning("Failed to get entry pc of %s.\n", 1224 pr_warning("Failed to get entry address of %s.\n",
1224 dwarf_diename(in_die)); 1225 dwarf_diename(in_die));
1225 param->retval = -ENOENT; 1226 param->retval = -ENOENT;
1226 return DWARF_CB_ABORT; 1227 return DWARF_CB_ABORT;
@@ -1261,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
1261 param->retval = find_probe_point_lazy(sp_die, pf); 1262 param->retval = find_probe_point_lazy(sp_die, pf);
1262 else { 1263 else {
1263 if (dwarf_entrypc(sp_die, &pf->addr) != 0) { 1264 if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
1264 pr_warning("Failed to get entry pc of %s.\n", 1265 pr_warning("Failed to get entry address of "
1265 dwarf_diename(sp_die)); 1266 "%s.\n", dwarf_diename(sp_die));
1266 param->retval = -ENOENT; 1267 param->retval = -ENOENT;
1267 return DWARF_CB_ABORT; 1268 return DWARF_CB_ABORT;
1268 } 1269 }
@@ -1304,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf)
1304 1305
1305 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); 1306 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1306 if (!dbg) { 1307 if (!dbg) {
1307 pr_warning("No dwarf info found in the vmlinux - " 1308 pr_warning("No debug information found in the vmlinux - "
1308 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1309 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1309 return -EBADF; 1310 return -EBADF;
1310 } 1311 }
@@ -1549,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1549 /* Open the live linux kernel */ 1550 /* Open the live linux kernel */
1550 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias); 1551 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
1551 if (!dbg) { 1552 if (!dbg) {
1552 pr_warning("No dwarf info found in the vmlinux - " 1553 pr_warning("No debug information found in the vmlinux - "
1553 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1554 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1554 ret = -EINVAL; 1555 ret = -EINVAL;
1555 goto end; 1556 goto end;
@@ -1559,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1559 addr += bias; 1560 addr += bias;
1560 /* Find cu die */ 1561 /* Find cu die */
1561 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { 1562 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
1562 pr_warning("No CU DIE is found at %lx\n", addr); 1563 pr_warning("Failed to find debug information for address %lx\n",
1564 addr);
1563 ret = -EINVAL; 1565 ret = -EINVAL;
1564 goto end; 1566 goto end;
1565 } 1567 }
@@ -1684,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
1684 1686
1685 line_list__init(&lf->lr->line_list); 1687 line_list__init(&lf->lr->line_list);
1686 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) { 1688 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
1687 pr_warning("No source lines found in this CU.\n"); 1689 pr_warning("No source lines found.\n");
1688 return -ENOENT; 1690 return -ENOENT;
1689 } 1691 }
1690 1692
@@ -1809,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr)
1809 1811
1810 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); 1812 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1811 if (!dbg) { 1813 if (!dbg) {
1812 pr_warning("No dwarf info found in the vmlinux - " 1814 pr_warning("No debug information found in the vmlinux - "
1813 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1815 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1814 return -EBADF; 1816 return -EBADF;
1815 } 1817 }
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bba69d455699..beaefc3c1223 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
34 bool externs); 34 bool externs);
35 35
36#include <dwarf.h> 36#include <dwarf.h>
37#include <libdw.h> 37#include <elfutils/libdw.h>
38#include <libdwfl.h> 38#include <elfutils/libdwfl.h>
39#include <version.h> 39#include <elfutils/version.h>
40 40
41struct probe_finder { 41struct probe_finder {
42 struct perf_probe_event *pev; /* Target probe event */ 42 struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b059dc50cc2d..93680818e244 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. 2 * trace-event-perl. Feed perf script events to an embedded Perl interpreter.
3 * 3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
411 return -1; 411 return -1;
412 } 412 }
413 413
414 fprintf(ofp, "# perf trace event handlers, " 414 fprintf(ofp, "# perf script event handlers, "
415 "generated by perf trace -g perl\n"); 415 "generated by perf script -g perl\n");
416 416
417 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 417 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
418 " License version 2\n\n"); 418 " License version 2\n\n");
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 33a632523743..c6d99334bdfa 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
442 fprintf(stderr, "couldn't open %s\n", fname); 442 fprintf(stderr, "couldn't open %s\n", fname);
443 return -1; 443 return -1;
444 } 444 }
445 fprintf(ofp, "# perf trace event handlers, " 445 fprintf(ofp, "# perf script event handlers, "
446 "generated by perf trace -g python\n"); 446 "generated by perf script -g python\n");
447 447
448 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 448 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
449 " License version 2\n\n"); 449 " License version 2\n\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fa9d652c2dc3..0f7e544544f5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -65,9 +65,49 @@ out_close:
65 return -1; 65 return -1;
66} 66}
67 67
68static void perf_session__id_header_size(struct perf_session *session)
69{
70 struct sample_data *data;
71 u64 sample_type = session->sample_type;
72 u16 size = 0;
73
74 if (!session->sample_id_all)
75 goto out;
76
77 if (sample_type & PERF_SAMPLE_TID)
78 size += sizeof(data->tid) * 2;
79
80 if (sample_type & PERF_SAMPLE_TIME)
81 size += sizeof(data->time);
82
83 if (sample_type & PERF_SAMPLE_ID)
84 size += sizeof(data->id);
85
86 if (sample_type & PERF_SAMPLE_STREAM_ID)
87 size += sizeof(data->stream_id);
88
89 if (sample_type & PERF_SAMPLE_CPU)
90 size += sizeof(data->cpu) * 2;
91out:
92 session->id_hdr_size = size;
93}
94
95void perf_session__set_sample_id_all(struct perf_session *session, bool value)
96{
97 session->sample_id_all = value;
98 perf_session__id_header_size(session);
99}
100
101void perf_session__set_sample_type(struct perf_session *session, u64 type)
102{
103 session->sample_type = type;
104}
105
68void perf_session__update_sample_type(struct perf_session *self) 106void perf_session__update_sample_type(struct perf_session *self)
69{ 107{
70 self->sample_type = perf_header__sample_type(&self->header); 108 self->sample_type = perf_header__sample_type(&self->header);
109 self->sample_id_all = perf_header__sample_id_all(&self->header);
110 perf_session__id_header_size(self);
71} 111}
72 112
73int perf_session__create_kernel_maps(struct perf_session *self) 113int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
85 machines__destroy_guest_kernel_maps(&self->machines); 125 machines__destroy_guest_kernel_maps(&self->machines);
86} 126}
87 127
88struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe) 128struct perf_session *perf_session__new(const char *filename, int mode,
129 bool force, bool repipe,
130 struct perf_event_ops *ops)
89{ 131{
90 size_t len = filename ? strlen(filename) + 1 : 0; 132 size_t len = filename ? strlen(filename) + 1 : 0;
91 struct perf_session *self = zalloc(sizeof(*self) + len); 133 struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
101 INIT_LIST_HEAD(&self->dead_threads); 143 INIT_LIST_HEAD(&self->dead_threads);
102 self->hists_tree = RB_ROOT; 144 self->hists_tree = RB_ROOT;
103 self->last_match = NULL; 145 self->last_match = NULL;
104 self->mmap_window = 32; 146 /*
147 * On 64bit we can mmap the data file in one go. No need for tiny mmap
148 * slices. On 32bit we use 32MB.
149 */
150#if BITS_PER_LONG == 64
151 self->mmap_window = ULLONG_MAX;
152#else
153 self->mmap_window = 32 * 1024 * 1024ULL;
154#endif
105 self->machines = RB_ROOT; 155 self->machines = RB_ROOT;
106 self->repipe = repipe; 156 self->repipe = repipe;
107 INIT_LIST_HEAD(&self->ordered_samples.samples_head); 157 INIT_LIST_HEAD(&self->ordered_samples.samples);
158 INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
159 INIT_LIST_HEAD(&self->ordered_samples.to_free);
108 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 160 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
109 161
110 if (mode == O_RDONLY) { 162 if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
120 } 172 }
121 173
122 perf_session__update_sample_type(self); 174 perf_session__update_sample_type(self);
175
176 if (ops && ops->ordering_requires_timestamps &&
177 ops->ordered_samples && !self->sample_id_all) {
178 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
179 ops->ordered_samples = false;
180 }
181
123out: 182out:
124 return self; 183 return self;
125out_free: 184out_free:
@@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
230 return syms; 289 return syms;
231} 290}
232 291
292static int process_event_synth_stub(event_t *event __used,
293 struct perf_session *session __used)
294{
295 dump_printf(": unhandled!\n");
296 return 0;
297}
298
233static int process_event_stub(event_t *event __used, 299static int process_event_stub(event_t *event __used,
300 struct sample_data *sample __used,
234 struct perf_session *session __used) 301 struct perf_session *session __used)
235{ 302{
236 dump_printf(": unhandled!\n"); 303 dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
262 if (handler->exit == NULL) 329 if (handler->exit == NULL)
263 handler->exit = process_event_stub; 330 handler->exit = process_event_stub;
264 if (handler->lost == NULL) 331 if (handler->lost == NULL)
265 handler->lost = process_event_stub; 332 handler->lost = event__process_lost;
266 if (handler->read == NULL) 333 if (handler->read == NULL)
267 handler->read = process_event_stub; 334 handler->read = process_event_stub;
268 if (handler->throttle == NULL) 335 if (handler->throttle == NULL)
@@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
270 if (handler->unthrottle == NULL) 337 if (handler->unthrottle == NULL)
271 handler->unthrottle = process_event_stub; 338 handler->unthrottle = process_event_stub;
272 if (handler->attr == NULL) 339 if (handler->attr == NULL)
273 handler->attr = process_event_stub; 340 handler->attr = process_event_synth_stub;
274 if (handler->event_type == NULL) 341 if (handler->event_type == NULL)
275 handler->event_type = process_event_stub; 342 handler->event_type = process_event_synth_stub;
276 if (handler->tracing_data == NULL) 343 if (handler->tracing_data == NULL)
277 handler->tracing_data = process_event_stub; 344 handler->tracing_data = process_event_synth_stub;
278 if (handler->build_id == NULL) 345 if (handler->build_id == NULL)
279 handler->build_id = process_event_stub; 346 handler->build_id = process_event_synth_stub;
280 if (handler->finished_round == NULL) { 347 if (handler->finished_round == NULL) {
281 if (handler->ordered_samples) 348 if (handler->ordered_samples)
282 handler->finished_round = process_finished_round; 349 handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = {
386 453
387struct sample_queue { 454struct sample_queue {
388 u64 timestamp; 455 u64 timestamp;
389 struct sample_event *event; 456 u64 file_offset;
457 event_t *event;
390 struct list_head list; 458 struct list_head list;
391}; 459};
392 460
461static void perf_session_free_sample_buffers(struct perf_session *session)
462{
463 struct ordered_samples *os = &session->ordered_samples;
464
465 while (!list_empty(&os->to_free)) {
466 struct sample_queue *sq;
467
468 sq = list_entry(os->to_free.next, struct sample_queue, list);
469 list_del(&sq->list);
470 free(sq);
471 }
472}
473
474static int perf_session_deliver_event(struct perf_session *session,
475 event_t *event,
476 struct sample_data *sample,
477 struct perf_event_ops *ops,
478 u64 file_offset);
479
393static void flush_sample_queue(struct perf_session *s, 480static void flush_sample_queue(struct perf_session *s,
394 struct perf_event_ops *ops) 481 struct perf_event_ops *ops)
395{ 482{
396 struct list_head *head = &s->ordered_samples.samples_head; 483 struct ordered_samples *os = &s->ordered_samples;
397 u64 limit = s->ordered_samples.next_flush; 484 struct list_head *head = &os->samples;
398 struct sample_queue *tmp, *iter; 485 struct sample_queue *tmp, *iter;
486 struct sample_data sample;
487 u64 limit = os->next_flush;
488 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
399 489
400 if (!ops->ordered_samples || !limit) 490 if (!ops->ordered_samples || !limit)
401 return; 491 return;
402 492
403 list_for_each_entry_safe(iter, tmp, head, list) { 493 list_for_each_entry_safe(iter, tmp, head, list) {
404 if (iter->timestamp > limit) 494 if (iter->timestamp > limit)
405 return; 495 break;
406
407 if (iter == s->ordered_samples.last_inserted)
408 s->ordered_samples.last_inserted = NULL;
409 496
410 ops->sample((event_t *)iter->event, s); 497 event__parse_sample(iter->event, s, &sample);
498 perf_session_deliver_event(s, iter->event, &sample, ops,
499 iter->file_offset);
411 500
412 s->ordered_samples.last_flush = iter->timestamp; 501 os->last_flush = iter->timestamp;
413 list_del(&iter->list); 502 list_del(&iter->list);
414 free(iter->event); 503 list_add(&iter->list, &os->sample_cache);
415 free(iter); 504 }
505
506 if (list_empty(head)) {
507 os->last_sample = NULL;
508 } else if (last_ts <= limit) {
509 os->last_sample =
510 list_entry(head->prev, struct sample_queue, list);
416 } 511 }
417} 512}
418 513
@@ -465,176 +560,263 @@ static int process_finished_round(event_t *event __used,
465 return 0; 560 return 0;
466} 561}
467 562
468static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
469{
470 struct sample_queue *iter;
471
472 list_for_each_entry_reverse(iter, head, list) {
473 if (iter->timestamp < new->timestamp) {
474 list_add(&new->list, &iter->list);
475 return;
476 }
477 }
478
479 list_add(&new->list, head);
480}
481
482static void __queue_sample_before(struct sample_queue *new,
483 struct sample_queue *iter,
484 struct list_head *head)
485{
486 list_for_each_entry_continue_reverse(iter, head, list) {
487 if (iter->timestamp < new->timestamp) {
488 list_add(&new->list, &iter->list);
489 return;
490 }
491 }
492
493 list_add(&new->list, head);
494}
495
496static void __queue_sample_after(struct sample_queue *new,
497 struct sample_queue *iter,
498 struct list_head *head)
499{
500 list_for_each_entry_continue(iter, head, list) {
501 if (iter->timestamp > new->timestamp) {
502 list_add_tail(&new->list, &iter->list);
503 return;
504 }
505 }
506 list_add_tail(&new->list, head);
507}
508
509/* The queue is ordered by time */ 563/* The queue is ordered by time */
510static void __queue_sample_event(struct sample_queue *new, 564static void __queue_event(struct sample_queue *new, struct perf_session *s)
511 struct perf_session *s)
512{ 565{
513 struct sample_queue *last_inserted = s->ordered_samples.last_inserted; 566 struct ordered_samples *os = &s->ordered_samples;
514 struct list_head *head = &s->ordered_samples.samples_head; 567 struct sample_queue *sample = os->last_sample;
568 u64 timestamp = new->timestamp;
569 struct list_head *p;
515 570
571 os->last_sample = new;
516 572
517 if (!last_inserted) { 573 if (!sample) {
518 __queue_sample_end(new, head); 574 list_add(&new->list, &os->samples);
575 os->max_timestamp = timestamp;
519 return; 576 return;
520 } 577 }
521 578
522 /* 579 /*
523 * Most of the time the current event has a timestamp 580 * last_sample might point to some random place in the list as it's
524 * very close to the last event inserted, unless we just switched 581 * the last queued event. We expect that the new event is close to
525 * to another event buffer. Having a sorting based on a list and 582 * this.
526 * on the last inserted event that is close to the current one is
527 * probably more efficient than an rbtree based sorting.
528 */ 583 */
529 if (last_inserted->timestamp >= new->timestamp) 584 if (sample->timestamp <= timestamp) {
530 __queue_sample_before(new, last_inserted, head); 585 while (sample->timestamp <= timestamp) {
531 else 586 p = sample->list.next;
532 __queue_sample_after(new, last_inserted, head); 587 if (p == &os->samples) {
588 list_add_tail(&new->list, &os->samples);
589 os->max_timestamp = timestamp;
590 return;
591 }
592 sample = list_entry(p, struct sample_queue, list);
593 }
594 list_add_tail(&new->list, &sample->list);
595 } else {
596 while (sample->timestamp > timestamp) {
597 p = sample->list.prev;
598 if (p == &os->samples) {
599 list_add(&new->list, &os->samples);
600 return;
601 }
602 sample = list_entry(p, struct sample_queue, list);
603 }
604 list_add(&new->list, &sample->list);
605 }
533} 606}
534 607
535static int queue_sample_event(event_t *event, struct sample_data *data, 608#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
536 struct perf_session *s) 609
610static int perf_session_queue_event(struct perf_session *s, event_t *event,
611 struct sample_data *data, u64 file_offset)
537{ 612{
613 struct ordered_samples *os = &s->ordered_samples;
614 struct list_head *sc = &os->sample_cache;
538 u64 timestamp = data->time; 615 u64 timestamp = data->time;
539 struct sample_queue *new; 616 struct sample_queue *new;
540 617
618 if (!timestamp || timestamp == ~0ULL)
619 return -ETIME;
541 620
542 if (timestamp < s->ordered_samples.last_flush) { 621 if (timestamp < s->ordered_samples.last_flush) {
543 printf("Warning: Timestamp below last timeslice flush\n"); 622 printf("Warning: Timestamp below last timeslice flush\n");
544 return -EINVAL; 623 return -EINVAL;
545 } 624 }
546 625
547 new = malloc(sizeof(*new)); 626 if (!list_empty(sc)) {
548 if (!new) 627 new = list_entry(sc->next, struct sample_queue, list);
549 return -ENOMEM; 628 list_del(&new->list);
629 } else if (os->sample_buffer) {
630 new = os->sample_buffer + os->sample_buffer_idx;
631 if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
632 os->sample_buffer = NULL;
633 } else {
634 os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
635 if (!os->sample_buffer)
636 return -ENOMEM;
637 list_add(&os->sample_buffer->list, &os->to_free);
638 os->sample_buffer_idx = 2;
639 new = os->sample_buffer + 1;
640 }
550 641
551 new->timestamp = timestamp; 642 new->timestamp = timestamp;
643 new->file_offset = file_offset;
644 new->event = event;
552 645
553 new->event = malloc(event->header.size); 646 __queue_event(new, s);
554 if (!new->event) {
555 free(new);
556 return -ENOMEM;
557 }
558 647
559 memcpy(new->event, event, event->header.size); 648 return 0;
649}
560 650
561 __queue_sample_event(new, s); 651static void callchain__printf(struct sample_data *sample)
562 s->ordered_samples.last_inserted = new; 652{
653 unsigned int i;
563 654
564 if (new->timestamp > s->ordered_samples.max_timestamp) 655 printf("... chain: nr:%Lu\n", sample->callchain->nr);
565 s->ordered_samples.max_timestamp = new->timestamp;
566 656
567 return 0; 657 for (i = 0; i < sample->callchain->nr; i++)
658 printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
568} 659}
569 660
570static int perf_session__process_sample(event_t *event, struct perf_session *s, 661static void perf_session__print_tstamp(struct perf_session *session,
571 struct perf_event_ops *ops) 662 event_t *event,
663 struct sample_data *sample)
572{ 664{
573 struct sample_data data; 665 if (event->header.type != PERF_RECORD_SAMPLE &&
666 !session->sample_id_all) {
667 fputs("-1 -1 ", stdout);
668 return;
669 }
574 670
575 if (!ops->ordered_samples) 671 if ((session->sample_type & PERF_SAMPLE_CPU))
576 return ops->sample(event, s); 672 printf("%u ", sample->cpu);
577 673
578 bzero(&data, sizeof(struct sample_data)); 674 if (session->sample_type & PERF_SAMPLE_TIME)
579 event__parse_sample(event, s->sample_type, &data); 675 printf("%Lu ", sample->time);
676}
580 677
581 queue_sample_event(event, &data, s); 678static void dump_event(struct perf_session *session, event_t *event,
679 u64 file_offset, struct sample_data *sample)
680{
681 if (!dump_trace)
682 return;
582 683
583 return 0; 684 printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
685 event->header.type);
686
687 trace_event(event);
688
689 if (sample)
690 perf_session__print_tstamp(session, event, sample);
691
692 printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
693 event__get_event_name(event->header.type));
584} 694}
585 695
586static int perf_session__process_event(struct perf_session *self, 696static void dump_sample(struct perf_session *session, event_t *event,
587 event_t *event, 697 struct sample_data *sample)
588 struct perf_event_ops *ops,
589 u64 offset, u64 head)
590{ 698{
591 trace_event(event); 699 if (!dump_trace)
700 return;
592 701
593 if (event->header.type < PERF_RECORD_HEADER_MAX) { 702 printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
594 dump_printf("%#Lx [%#x]: PERF_RECORD_%s", 703 sample->pid, sample->tid, sample->ip, sample->period);
595 offset + head, event->header.size,
596 event__name[event->header.type]);
597 hists__inc_nr_events(&self->hists, event->header.type);
598 }
599 704
600 if (self->header.needs_swap && event__swap_ops[event->header.type]) 705 if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
601 event__swap_ops[event->header.type](event); 706 callchain__printf(sample);
707}
708
709static int perf_session_deliver_event(struct perf_session *session,
710 event_t *event,
711 struct sample_data *sample,
712 struct perf_event_ops *ops,
713 u64 file_offset)
714{
715 dump_event(session, event, file_offset, sample);
602 716
603 switch (event->header.type) { 717 switch (event->header.type) {
604 case PERF_RECORD_SAMPLE: 718 case PERF_RECORD_SAMPLE:
605 return perf_session__process_sample(event, self, ops); 719 dump_sample(session, event, sample);
720 return ops->sample(event, sample, session);
606 case PERF_RECORD_MMAP: 721 case PERF_RECORD_MMAP:
607 return ops->mmap(event, self); 722 return ops->mmap(event, sample, session);
608 case PERF_RECORD_COMM: 723 case PERF_RECORD_COMM:
609 return ops->comm(event, self); 724 return ops->comm(event, sample, session);
610 case PERF_RECORD_FORK: 725 case PERF_RECORD_FORK:
611 return ops->fork(event, self); 726 return ops->fork(event, sample, session);
612 case PERF_RECORD_EXIT: 727 case PERF_RECORD_EXIT:
613 return ops->exit(event, self); 728 return ops->exit(event, sample, session);
614 case PERF_RECORD_LOST: 729 case PERF_RECORD_LOST:
615 return ops->lost(event, self); 730 return ops->lost(event, sample, session);
616 case PERF_RECORD_READ: 731 case PERF_RECORD_READ:
617 return ops->read(event, self); 732 return ops->read(event, sample, session);
618 case PERF_RECORD_THROTTLE: 733 case PERF_RECORD_THROTTLE:
619 return ops->throttle(event, self); 734 return ops->throttle(event, sample, session);
620 case PERF_RECORD_UNTHROTTLE: 735 case PERF_RECORD_UNTHROTTLE:
621 return ops->unthrottle(event, self); 736 return ops->unthrottle(event, sample, session);
737 default:
738 ++session->hists.stats.nr_unknown_events;
739 return -1;
740 }
741}
742
743static int perf_session__preprocess_sample(struct perf_session *session,
744 event_t *event, struct sample_data *sample)
745{
746 if (event->header.type != PERF_RECORD_SAMPLE ||
747 !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
748 return 0;
749
750 if (!ip_callchain__valid(sample->callchain, event)) {
751 pr_debug("call-chain problem with event, skipping it.\n");
752 ++session->hists.stats.nr_invalid_chains;
753 session->hists.stats.total_invalid_chains += sample->period;
754 return -EINVAL;
755 }
756 return 0;
757}
758
759static int perf_session__process_user_event(struct perf_session *session, event_t *event,
760 struct perf_event_ops *ops, u64 file_offset)
761{
762 dump_event(session, event, file_offset, NULL);
763
764 /* These events are processed right away */
765 switch (event->header.type) {
622 case PERF_RECORD_HEADER_ATTR: 766 case PERF_RECORD_HEADER_ATTR:
623 return ops->attr(event, self); 767 return ops->attr(event, session);
624 case PERF_RECORD_HEADER_EVENT_TYPE: 768 case PERF_RECORD_HEADER_EVENT_TYPE:
625 return ops->event_type(event, self); 769 return ops->event_type(event, session);
626 case PERF_RECORD_HEADER_TRACING_DATA: 770 case PERF_RECORD_HEADER_TRACING_DATA:
627 /* setup for reading amidst mmap */ 771 /* setup for reading amidst mmap */
628 lseek(self->fd, offset + head, SEEK_SET); 772 lseek(session->fd, file_offset, SEEK_SET);
629 return ops->tracing_data(event, self); 773 return ops->tracing_data(event, session);
630 case PERF_RECORD_HEADER_BUILD_ID: 774 case PERF_RECORD_HEADER_BUILD_ID:
631 return ops->build_id(event, self); 775 return ops->build_id(event, session);
632 case PERF_RECORD_FINISHED_ROUND: 776 case PERF_RECORD_FINISHED_ROUND:
633 return ops->finished_round(event, self, ops); 777 return ops->finished_round(event, session, ops);
634 default: 778 default:
635 ++self->hists.stats.nr_unknown_events; 779 return -EINVAL;
636 return -1; 780 }
781}
782
783static int perf_session__process_event(struct perf_session *session,
784 event_t *event,
785 struct perf_event_ops *ops,
786 u64 file_offset)
787{
788 struct sample_data sample;
789 int ret;
790
791 if (session->header.needs_swap && event__swap_ops[event->header.type])
792 event__swap_ops[event->header.type](event);
793
794 if (event->header.type >= PERF_RECORD_HEADER_MAX)
795 return -EINVAL;
796
797 hists__inc_nr_events(&session->hists, event->header.type);
798
799 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
800 return perf_session__process_user_event(session, event, ops, file_offset);
801
802 /*
803 * For all kernel events we get the sample data
804 */
805 event__parse_sample(event, session, &sample);
806
807 /* Preprocess sample records - precheck callchains */
808 if (perf_session__preprocess_sample(session, event, &sample))
809 return 0;
810
811 if (ops->ordered_samples) {
812 ret = perf_session_queue_event(session, event, &sample,
813 file_offset);
814 if (ret != -ETIME)
815 return ret;
637 } 816 }
817
818 return perf_session_deliver_event(session, event, &sample, ops,
819 file_offset);
638} 820}
639 821
640void perf_event_header__bswap(struct perf_event_header *self) 822void perf_event_header__bswap(struct perf_event_header *self)
@@ -724,8 +906,7 @@ more:
724 } 906 }
725 907
726 if (size == 0 || 908 if (size == 0 ||
727 (skip = perf_session__process_event(self, &event, ops, 909 (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
728 0, head)) < 0) {
729 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 910 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
730 head, event.header.size, event.header.type); 911 head, event.header.size, event.header.type);
731 /* 912 /*
@@ -740,9 +921,6 @@ more:
740 921
741 head += size; 922 head += size;
742 923
743 dump_printf("\n%#Lx [%#x]: event: %d\n",
744 head, event.header.size, event.header.type);
745
746 if (skip > 0) 924 if (skip > 0)
747 head += skip; 925 head += skip;
748 926
@@ -751,82 +929,90 @@ more:
751done: 929done:
752 err = 0; 930 err = 0;
753out_err: 931out_err:
932 perf_session_free_sample_buffers(self);
754 return err; 933 return err;
755} 934}
756 935
757int __perf_session__process_events(struct perf_session *self, 936int __perf_session__process_events(struct perf_session *session,
758 u64 data_offset, u64 data_size, 937 u64 data_offset, u64 data_size,
759 u64 file_size, struct perf_event_ops *ops) 938 u64 file_size, struct perf_event_ops *ops)
760{ 939{
761 int err, mmap_prot, mmap_flags; 940 u64 head, page_offset, file_offset, file_pos, progress_next;
762 u64 head, shift; 941 int err, mmap_prot, mmap_flags, map_idx = 0;
763 u64 offset = 0; 942 struct ui_progress *progress;
764 size_t page_size; 943 size_t page_size, mmap_size;
944 char *buf, *mmaps[8];
765 event_t *event; 945 event_t *event;
766 uint32_t size; 946 uint32_t size;
767 char *buf;
768 struct ui_progress *progress = ui_progress__new("Processing events...",
769 self->size);
770 if (progress == NULL)
771 return -1;
772 947
773 perf_event_ops__fill_defaults(ops); 948 perf_event_ops__fill_defaults(ops);
774 949
775 page_size = sysconf(_SC_PAGESIZE); 950 page_size = sysconf(_SC_PAGESIZE);
776 951
777 head = data_offset; 952 page_offset = page_size * (data_offset / page_size);
778 shift = page_size * (head / page_size); 953 file_offset = page_offset;
779 offset += shift; 954 head = data_offset - page_offset;
780 head -= shift; 955
956 if (data_offset + data_size < file_size)
957 file_size = data_offset + data_size;
958
959 progress_next = file_size / 16;
960 progress = ui_progress__new("Processing events...", file_size);
961 if (progress == NULL)
962 return -1;
963
964 mmap_size = session->mmap_window;
965 if (mmap_size > file_size)
966 mmap_size = file_size;
967
968 memset(mmaps, 0, sizeof(mmaps));
781 969
782 mmap_prot = PROT_READ; 970 mmap_prot = PROT_READ;
783 mmap_flags = MAP_SHARED; 971 mmap_flags = MAP_SHARED;
784 972
785 if (self->header.needs_swap) { 973 if (session->header.needs_swap) {
786 mmap_prot |= PROT_WRITE; 974 mmap_prot |= PROT_WRITE;
787 mmap_flags = MAP_PRIVATE; 975 mmap_flags = MAP_PRIVATE;
788 } 976 }
789remap: 977remap:
790 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, 978 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
791 mmap_flags, self->fd, offset); 979 file_offset);
792 if (buf == MAP_FAILED) { 980 if (buf == MAP_FAILED) {
793 pr_err("failed to mmap file\n"); 981 pr_err("failed to mmap file\n");
794 err = -errno; 982 err = -errno;
795 goto out_err; 983 goto out_err;
796 } 984 }
985 mmaps[map_idx] = buf;
986 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
987 file_pos = file_offset + head;
797 988
798more: 989more:
799 event = (event_t *)(buf + head); 990 event = (event_t *)(buf + head);
800 ui_progress__update(progress, offset);
801 991
802 if (self->header.needs_swap) 992 if (session->header.needs_swap)
803 perf_event_header__bswap(&event->header); 993 perf_event_header__bswap(&event->header);
804 size = event->header.size; 994 size = event->header.size;
805 if (size == 0) 995 if (size == 0)
806 size = 8; 996 size = 8;
807 997
808 if (head + event->header.size >= page_size * self->mmap_window) { 998 if (head + event->header.size >= mmap_size) {
809 int munmap_ret; 999 if (mmaps[map_idx]) {
810 1000 munmap(mmaps[map_idx], mmap_size);
811 shift = page_size * (head / page_size); 1001 mmaps[map_idx] = NULL;
812 1002 }
813 munmap_ret = munmap(buf, page_size * self->mmap_window);
814 assert(munmap_ret == 0);
815 1003
816 offset += shift; 1004 page_offset = page_size * (head / page_size);
817 head -= shift; 1005 file_offset += page_offset;
1006 head -= page_offset;
818 goto remap; 1007 goto remap;
819 } 1008 }
820 1009
821 size = event->header.size; 1010 size = event->header.size;
822 1011
823 dump_printf("\n%#Lx [%#x]: event: %d\n",
824 offset + head, event->header.size, event->header.type);
825
826 if (size == 0 || 1012 if (size == 0 ||
827 perf_session__process_event(self, event, ops, offset, head) < 0) { 1013 perf_session__process_event(session, event, ops, file_pos) < 0) {
828 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 1014 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
829 offset + head, event->header.size, 1015 file_offset + head, event->header.size,
830 event->header.type); 1016 event->header.type);
831 /* 1017 /*
832 * assume we lost track of the stream, check alignment, and 1018 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1025,49 @@ more:
839 } 1025 }
840 1026
841 head += size; 1027 head += size;
1028 file_pos += size;
842 1029
843 if (offset + head >= data_offset + data_size) 1030 if (file_pos >= progress_next) {
844 goto done; 1031 progress_next += file_size / 16;
1032 ui_progress__update(progress, file_pos);
1033 }
845 1034
846 if (offset + head < file_size) 1035 if (file_pos < file_size)
847 goto more; 1036 goto more;
848done: 1037
849 err = 0; 1038 err = 0;
850 /* do the final flush for ordered samples */ 1039 /* do the final flush for ordered samples */
851 self->ordered_samples.next_flush = ULLONG_MAX; 1040 session->ordered_samples.next_flush = ULLONG_MAX;
852 flush_sample_queue(self, ops); 1041 flush_sample_queue(session, ops);
853out_err: 1042out_err:
854 ui_progress__delete(progress); 1043 ui_progress__delete(progress);
1044
1045 if (ops->lost == event__process_lost &&
1046 session->hists.stats.total_lost != 0) {
1047 ui__warning("Processed %Lu events and LOST %Lu!\n\n"
1048 "Check IO/CPU overload!\n\n",
1049 session->hists.stats.total_period,
1050 session->hists.stats.total_lost);
1051 }
1052
1053 if (session->hists.stats.nr_unknown_events != 0) {
1054 ui__warning("Found %u unknown events!\n\n"
1055 "Is this an older tool processing a perf.data "
1056 "file generated by a more recent tool?\n\n"
1057 "If that is not the case, consider "
1058 "reporting to linux-kernel@vger.kernel.org.\n\n",
1059 session->hists.stats.nr_unknown_events);
1060 }
1061
1062 if (session->hists.stats.nr_invalid_chains != 0) {
1063 ui__warning("Found invalid callchains!\n\n"
1064 "%u out of %u events were discarded for this reason.\n\n"
1065 "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1066 session->hists.stats.nr_invalid_chains,
1067 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
1068 }
1069
1070 perf_session_free_sample_buffers(session);
855 return err; 1071 return err;
856} 1072}
857 1073
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2a863f..ffe4b98db8f0 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,12 @@ struct ordered_samples {
17 u64 last_flush; 17 u64 last_flush;
18 u64 next_flush; 18 u64 next_flush;
19 u64 max_timestamp; 19 u64 max_timestamp;
20 struct list_head samples_head; 20 struct list_head samples;
21 struct sample_queue *last_inserted; 21 struct list_head sample_cache;
22 struct list_head to_free;
23 struct sample_queue *sample_buffer;
24 struct sample_queue *last_sample;
25 int sample_buffer_idx;
22}; 26};
23 27
24struct perf_session { 28struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
42 int fd; 46 int fd;
43 bool fd_pipe; 47 bool fd_pipe;
44 bool repipe; 48 bool repipe;
49 bool sample_id_all;
50 u16 id_hdr_size;
45 int cwdlen; 51 int cwdlen;
46 char *cwd; 52 char *cwd;
47 struct ordered_samples ordered_samples; 53 struct ordered_samples ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
50 56
51struct perf_event_ops; 57struct perf_event_ops;
52 58
53typedef int (*event_op)(event_t *self, struct perf_session *session); 59typedef int (*event_op)(event_t *self, struct sample_data *sample,
60 struct perf_session *session);
61typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
54typedef int (*event_op2)(event_t *self, struct perf_session *session, 62typedef int (*event_op2)(event_t *self, struct perf_session *session,
55 struct perf_event_ops *ops); 63 struct perf_event_ops *ops);
56 64
@@ -63,16 +71,19 @@ struct perf_event_ops {
63 lost, 71 lost,
64 read, 72 read,
65 throttle, 73 throttle,
66 unthrottle, 74 unthrottle;
67 attr, 75 event_synth_op attr,
68 event_type, 76 event_type,
69 tracing_data, 77 tracing_data,
70 build_id; 78 build_id;
71 event_op2 finished_round; 79 event_op2 finished_round;
72 bool ordered_samples; 80 bool ordered_samples;
81 bool ordering_requires_timestamps;
73}; 82};
74 83
75struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe); 84struct perf_session *perf_session__new(const char *filename, int mode,
85 bool force, bool repipe,
86 struct perf_event_ops *ops);
76void perf_session__delete(struct perf_session *self); 87void perf_session__delete(struct perf_session *self);
77 88
78void perf_event_header__bswap(struct perf_event_header *self); 89void perf_event_header__bswap(struct perf_event_header *self);
@@ -100,6 +111,8 @@ int perf_session__create_kernel_maps(struct perf_session *self);
100 111
101int do_read(int fd, void *buf, size_t size); 112int do_read(int fd, void *buf, size_t size);
102void perf_session__update_sample_type(struct perf_session *self); 113void perf_session__update_sample_type(struct perf_session *self);
114void perf_session__set_sample_id_all(struct perf_session *session, bool value);
115void perf_session__set_sample_type(struct perf_session *session, u64 type);
103void perf_session__remove_thread(struct perf_session *self, struct thread *th); 116void perf_session__remove_thread(struct perf_session *self, struct thread *th);
104 117
105static inline 118static inline
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553cc67d..f44fa541d56e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
170 return repsep_snprintf(bf, size, "%-*s", width, dso_name); 170 return repsep_snprintf(bf, size, "%-*s", width, dso_name);
171 } 171 }
172 172
173 return repsep_snprintf(bf, size, "%*Lx", width, self->ip); 173 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
174} 174}
175 175
176/* --sort symbol */ 176/* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
196 196
197 if (verbose) { 197 if (verbose) {
198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; 198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
199 ret += repsep_snprintf(bf, size, "%*Lx %c ", 199 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
200 BITS_PER_LONG / 4, self->ip, o); 200 BITS_PER_LONG / 4, self->ip, o);
201 } 201 }
202 202
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
205 ret += repsep_snprintf(bf + ret, size - ret, "%s", 205 ret += repsep_snprintf(bf + ret, size - ret, "%s",
206 self->ms.sym->name); 206 self->ms.sym->name);
207 else 207 else
208 ret += repsep_snprintf(bf + ret, size - ret, "%*Lx", 208 ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
209 BITS_PER_LONG / 4, self->ip); 209 BITS_PER_LONG / 4, self->ip);
210 210
211 return ret; 211 return ret;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 439ab947daf4..15ccfba8cdf8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,10 @@
22#include <limits.h> 22#include <limits.h>
23#include <sys/utsname.h> 23#include <sys/utsname.h>
24 24
25#ifndef KSYM_NAME_LEN
26#define KSYM_NAME_LEN 128
27#endif
28
25#ifndef NT_GNU_BUILD_ID 29#ifndef NT_GNU_BUILD_ID
26#define NT_GNU_BUILD_ID 3 30#define NT_GNU_BUILD_ID 3
27#endif 31#endif
@@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = {
41 .exclude_other = true, 45 .exclude_other = true,
42 .use_modules = true, 46 .use_modules = true,
43 .try_vmlinux_path = true, 47 .try_vmlinux_path = true,
48 .symfs = "",
44}; 49};
45 50
46int dso__name_len(const struct dso *self) 51int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self)
92 prev = curr; 97 prev = curr;
93 curr = rb_entry(nd, struct symbol, rb_node); 98 curr = rb_entry(nd, struct symbol, rb_node);
94 99
95 if (prev->end == prev->start) 100 if (prev->end == prev->start && prev->end != curr->start)
96 prev->end = curr->start - 1; 101 prev->end = curr->start - 1;
97 } 102 }
98 103
@@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
121 * We still haven't the actual symbols, so guess the 126 * We still haven't the actual symbols, so guess the
122 * last map final address. 127 * last map final address.
123 */ 128 */
124 curr->end = ~0UL; 129 curr->end = ~0ULL;
125} 130}
126 131
127static void map_groups__fixup_end(struct map_groups *self) 132static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
425 430
426int kallsyms__parse(const char *filename, void *arg, 431int kallsyms__parse(const char *filename, void *arg,
427 int (*process_symbol)(void *arg, const char *name, 432 int (*process_symbol)(void *arg, const char *name,
428 char type, u64 start)) 433 char type, u64 start, u64 end))
429{ 434{
430 char *line = NULL; 435 char *line = NULL;
431 size_t n; 436 size_t n;
432 int err = 0; 437 int err = -1;
438 u64 prev_start = 0;
439 char prev_symbol_type = 0;
440 char *prev_symbol_name;
433 FILE *file = fopen(filename, "r"); 441 FILE *file = fopen(filename, "r");
434 442
435 if (file == NULL) 443 if (file == NULL)
436 goto out_failure; 444 goto out_failure;
437 445
446 prev_symbol_name = malloc(KSYM_NAME_LEN);
447 if (prev_symbol_name == NULL)
448 goto out_close;
449
450 err = 0;
451
438 while (!feof(file)) { 452 while (!feof(file)) {
439 u64 start; 453 u64 start;
440 int line_len, len; 454 int line_len, len;
@@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg,
454 continue; 468 continue;
455 469
456 symbol_type = toupper(line[len]); 470 symbol_type = toupper(line[len]);
457 symbol_name = line + len + 2; 471 len += 2;
472 symbol_name = line + len;
473 len = line_len - len;
458 474
459 err = process_symbol(arg, symbol_name, symbol_type, start); 475 if (len >= KSYM_NAME_LEN) {
460 if (err) 476 err = -1;
461 break; 477 break;
478 }
479
480 if (prev_symbol_type) {
481 u64 end = start;
482 if (end != prev_start)
483 --end;
484 err = process_symbol(arg, prev_symbol_name,
485 prev_symbol_type, prev_start, end);
486 if (err)
487 break;
488 }
489
490 memcpy(prev_symbol_name, symbol_name, len + 1);
491 prev_symbol_type = symbol_type;
492 prev_start = start;
462 } 493 }
463 494
495 free(prev_symbol_name);
464 free(line); 496 free(line);
497out_close:
465 fclose(file); 498 fclose(file);
466 return err; 499 return err;
467 500
@@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type)
483} 516}
484 517
485static int map__process_kallsym_symbol(void *arg, const char *name, 518static int map__process_kallsym_symbol(void *arg, const char *name,
486 char type, u64 start) 519 char type, u64 start, u64 end)
487{ 520{
488 struct symbol *sym; 521 struct symbol *sym;
489 struct process_kallsyms_args *a = arg; 522 struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
492 if (!symbol_type__is_a(type, a->map->type)) 525 if (!symbol_type__is_a(type, a->map->type))
493 return 0; 526 return 0;
494 527
495 /* 528 sym = symbol__new(start, end - start + 1,
496 * Will fix up the end later, when we have all symbols sorted. 529 kallsyms2elf_type(type), name);
497 */
498 sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
499
500 if (sym == NULL) 530 if (sym == NULL)
501 return -ENOMEM; 531 return -ENOMEM;
502 /* 532 /*
@@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
649 if (dso__load_all_kallsyms(self, filename, map) < 0) 679 if (dso__load_all_kallsyms(self, filename, map) < 0)
650 return -1; 680 return -1;
651 681
652 symbols__fixup_end(&self->symbols[map->type]);
653 if (self->kernel == DSO_TYPE_GUEST_KERNEL) 682 if (self->kernel == DSO_TYPE_GUEST_KERNEL)
654 self->origin = DSO__ORIG_GUEST_KERNEL; 683 self->origin = DSO__ORIG_GUEST_KERNEL;
655 else 684 else
@@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map,
839 char sympltname[1024]; 868 char sympltname[1024];
840 Elf *elf; 869 Elf *elf;
841 int nr = 0, symidx, fd, err = 0; 870 int nr = 0, symidx, fd, err = 0;
871 char name[PATH_MAX];
842 872
843 fd = open(self->long_name, O_RDONLY); 873 snprintf(name, sizeof(name), "%s%s",
874 symbol_conf.symfs, self->long_name);
875 fd = open(name, O_RDONLY);
844 if (fd < 0) 876 if (fd < 0)
845 goto out; 877 goto out;
846 878
@@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1452 self->origin++) { 1484 self->origin++) {
1453 switch (self->origin) { 1485 switch (self->origin) {
1454 case DSO__ORIG_BUILD_ID_CACHE: 1486 case DSO__ORIG_BUILD_ID_CACHE:
1455 if (dso__build_id_filename(self, name, size) == NULL) 1487 /* skip the locally configured cache if a symfs is given */
1488 if (symbol_conf.symfs[0] ||
1489 (dso__build_id_filename(self, name, size) == NULL)) {
1456 continue; 1490 continue;
1491 }
1457 break; 1492 break;
1458 case DSO__ORIG_FEDORA: 1493 case DSO__ORIG_FEDORA:
1459 snprintf(name, size, "/usr/lib/debug%s.debug", 1494 snprintf(name, size, "%s/usr/lib/debug%s.debug",
1460 self->long_name); 1495 symbol_conf.symfs, self->long_name);
1461 break; 1496 break;
1462 case DSO__ORIG_UBUNTU: 1497 case DSO__ORIG_UBUNTU:
1463 snprintf(name, size, "/usr/lib/debug%s", 1498 snprintf(name, size, "%s/usr/lib/debug%s",
1464 self->long_name); 1499 symbol_conf.symfs, self->long_name);
1465 break; 1500 break;
1466 case DSO__ORIG_BUILDID: { 1501 case DSO__ORIG_BUILDID: {
1467 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 1502 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1473 sizeof(self->build_id), 1508 sizeof(self->build_id),
1474 build_id_hex); 1509 build_id_hex);
1475 snprintf(name, size, 1510 snprintf(name, size,
1476 "/usr/lib/debug/.build-id/%.2s/%s.debug", 1511 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
1477 build_id_hex, build_id_hex + 2); 1512 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
1478 } 1513 }
1479 break; 1514 break;
1480 case DSO__ORIG_DSO: 1515 case DSO__ORIG_DSO:
1481 snprintf(name, size, "%s", self->long_name); 1516 snprintf(name, size, "%s%s",
1517 symbol_conf.symfs, self->long_name);
1482 break; 1518 break;
1483 case DSO__ORIG_GUEST_KMODULE: 1519 case DSO__ORIG_GUEST_KMODULE:
1484 if (map->groups && map->groups->machine) 1520 if (map->groups && map->groups->machine)
1485 root_dir = map->groups->machine->root_dir; 1521 root_dir = map->groups->machine->root_dir;
1486 else 1522 else
1487 root_dir = ""; 1523 root_dir = "";
1488 snprintf(name, size, "%s%s", root_dir, self->long_name); 1524 snprintf(name, size, "%s%s%s", symbol_conf.symfs,
1525 root_dir, self->long_name);
1526 break;
1527
1528 case DSO__ORIG_KMODULE:
1529 snprintf(name, size, "%s%s", symbol_conf.symfs,
1530 self->long_name);
1489 break; 1531 break;
1490 1532
1491 default: 1533 default:
@@ -1784,17 +1826,20 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
1784 const char *vmlinux, symbol_filter_t filter) 1826 const char *vmlinux, symbol_filter_t filter)
1785{ 1827{
1786 int err = -1, fd; 1828 int err = -1, fd;
1829 char symfs_vmlinux[PATH_MAX];
1787 1830
1788 fd = open(vmlinux, O_RDONLY); 1831 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
1832 symbol_conf.symfs, vmlinux);
1833 fd = open(symfs_vmlinux, O_RDONLY);
1789 if (fd < 0) 1834 if (fd < 0)
1790 return -1; 1835 return -1;
1791 1836
1792 dso__set_loaded(self, map->type); 1837 dso__set_loaded(self, map->type);
1793 err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0); 1838 err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
1794 close(fd); 1839 close(fd);
1795 1840
1796 if (err > 0) 1841 if (err > 0)
1797 pr_debug("Using %s for symbols\n", vmlinux); 1842 pr_debug("Using %s for symbols\n", symfs_vmlinux);
1798 1843
1799 return err; 1844 return err;
1800} 1845}
@@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1836 const char *kallsyms_filename = NULL; 1881 const char *kallsyms_filename = NULL;
1837 char *kallsyms_allocated_filename = NULL; 1882 char *kallsyms_allocated_filename = NULL;
1838 /* 1883 /*
1839 * Step 1: if the user specified a vmlinux filename, use it and only 1884 * Step 1: if the user specified a kallsyms or vmlinux filename, use
1840 * it, reporting errors to the user if it cannot be used. 1885 * it and only it, reporting errors to the user if it cannot be used.
1841 * 1886 *
1842 * For instance, try to analyse an ARM perf.data file _without_ a 1887 * For instance, try to analyse an ARM perf.data file _without_ a
1843 * build-id, or if the user specifies the wrong path to the right 1888 * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1850 * validation in dso__load_vmlinux and will bail out if they don't 1895 * validation in dso__load_vmlinux and will bail out if they don't
1851 * match. 1896 * match.
1852 */ 1897 */
1898 if (symbol_conf.kallsyms_name != NULL) {
1899 kallsyms_filename = symbol_conf.kallsyms_name;
1900 goto do_kallsyms;
1901 }
1902
1853 if (symbol_conf.vmlinux_name != NULL) { 1903 if (symbol_conf.vmlinux_name != NULL) {
1854 err = dso__load_vmlinux(self, map, 1904 err = dso__load_vmlinux(self, map,
1855 symbol_conf.vmlinux_name, filter); 1905 symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1867 goto out_fixup; 1917 goto out_fixup;
1868 } 1918 }
1869 1919
1920 /* do not try local files if a symfs was given */
1921 if (symbol_conf.symfs[0] != 0)
1922 return -1;
1923
1870 /* 1924 /*
1871 * Say the kernel DSO was created when processing the build-id header table, 1925 * Say the kernel DSO was created when processing the build-id header table,
1872 * we have a build-id, so check if it is the same as the running kernel, 1926 * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@ struct process_args {
2136}; 2190};
2137 2191
2138static int symbol__in_kernel(void *arg, const char *name, 2192static int symbol__in_kernel(void *arg, const char *name,
2139 char type __used, u64 start) 2193 char type __used, u64 start, u64 end __used)
2140{ 2194{
2141 struct process_args *args = arg; 2195 struct process_args *args = arg;
2142 2196
@@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void)
2257 struct utsname uts; 2311 struct utsname uts;
2258 char bf[PATH_MAX]; 2312 char bf[PATH_MAX];
2259 2313
2260 if (uname(&uts) < 0)
2261 return -1;
2262
2263 vmlinux_path = malloc(sizeof(char *) * 5); 2314 vmlinux_path = malloc(sizeof(char *) * 5);
2264 if (vmlinux_path == NULL) 2315 if (vmlinux_path == NULL)
2265 return -1; 2316 return -1;
@@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void)
2272 if (vmlinux_path[vmlinux_path__nr_entries] == NULL) 2323 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
2273 goto out_fail; 2324 goto out_fail;
2274 ++vmlinux_path__nr_entries; 2325 ++vmlinux_path__nr_entries;
2326
2327 /* only try running kernel version if no symfs was given */
2328 if (symbol_conf.symfs[0] != 0)
2329 return 0;
2330
2331 if (uname(&uts) < 0)
2332 return -1;
2333
2275 snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); 2334 snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
2276 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); 2335 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
2277 if (vmlinux_path[vmlinux_path__nr_entries] == NULL) 2336 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str,
2331 2390
2332int symbol__init(void) 2391int symbol__init(void)
2333{ 2392{
2393 const char *symfs;
2394
2334 if (symbol_conf.initialized) 2395 if (symbol_conf.initialized)
2335 return 0; 2396 return 0;
2336 2397
@@ -2359,6 +2420,18 @@ int symbol__init(void)
2359 symbol_conf.sym_list_str, "symbol") < 0) 2420 symbol_conf.sym_list_str, "symbol") < 0)
2360 goto out_free_comm_list; 2421 goto out_free_comm_list;
2361 2422
2423 /*
2424 * A path to symbols of "/" is identical to ""
2425 * reset here for simplicity.
2426 */
2427 symfs = realpath(symbol_conf.symfs, NULL);
2428 if (symfs == NULL)
2429 symfs = symbol_conf.symfs;
2430 if (strcmp(symfs, "/") == 0)
2431 symbol_conf.symfs = "";
2432 if (symfs != symbol_conf.symfs)
2433 free((void *)symfs);
2434
2362 symbol_conf.initialized = true; 2435 symbol_conf.initialized = true;
2363 return 0; 2436 return 0;
2364 2437
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 6c6eafdb932d..670cd1c88f54 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -72,6 +72,7 @@ struct symbol_conf {
72 show_cpu_utilization, 72 show_cpu_utilization,
73 initialized; 73 initialized;
74 const char *vmlinux_name, 74 const char *vmlinux_name,
75 *kallsyms_name,
75 *source_prefix, 76 *source_prefix,
76 *field_sep; 77 *field_sep;
77 const char *default_guest_vmlinux_name, 78 const char *default_guest_vmlinux_name,
@@ -85,6 +86,7 @@ struct symbol_conf {
85 struct strlist *dso_list, 86 struct strlist *dso_list,
86 *comm_list, 87 *comm_list,
87 *sym_list; 88 *sym_list;
89 const char *symfs;
88}; 90};
89 91
90extern struct symbol_conf symbol_conf; 92extern struct symbol_conf symbol_conf;
@@ -215,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
215int build_id__sprintf(const u8 *self, int len, char *bf); 217int build_id__sprintf(const u8 *self, int len, char *bf);
216int kallsyms__parse(const char *filename, void *arg, 218int kallsyms__parse(const char *filename, void *arg,
217 int (*process_symbol)(void *arg, const char *name, 219 int (*process_symbol)(void *arg, const char *name,
218 char type, u64 start)); 220 char type, u64 start, u64 end));
219 221
220void machine__destroy_kernel_maps(struct machine *self); 222void machine__destroy_kernel_maps(struct machine *self);
221int __machine__create_kernel_maps(struct machine *self, struct dso *kernel); 223int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 056c69521a38..7b5a8926624e 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -104,10 +104,24 @@ out_destroy_form:
104 return rc; 104 return rc;
105} 105}
106 106
107static const char yes[] = "Yes", no[] = "No"; 107static const char yes[] = "Yes", no[] = "No",
108 warning_str[] = "Warning!", ok[] = "Ok";
108 109
109bool ui__dialog_yesno(const char *msg) 110bool ui__dialog_yesno(const char *msg)
110{ 111{
111 /* newtWinChoice should really be accepting const char pointers... */ 112 /* newtWinChoice should really be accepting const char pointers... */
112 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; 113 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
113} 114}
115
116void ui__warning(const char *format, ...)
117{
118 va_list args;
119
120 va_start(args, format);
121 if (use_browser > 0)
122 newtWinMessagev((char *)warning_str, (char *)ok,
123 (char *)format, args);
124 else
125 vfprintf(stderr, format, args);
126 va_end(args);
127}