aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS2
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/include/asm/perf_event.h6
-rw-r--r--arch/alpha/kernel/irq_alpha.c2
-rw-r--r--arch/alpha/kernel/perf_event.c11
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c2
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c2
-rw-r--r--arch/powerpc/kernel/perf_event.c2
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c2
-rw-r--r--arch/powerpc/kernel/power4-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5-pmu.c2
-rw-r--r--arch/powerpc/kernel/power6-pmu.c2
-rw-r--r--arch/powerpc/kernel/power7-pmu.c2
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c2
-rw-r--r--arch/sh/kernel/perf_event.c2
-rw-r--r--arch/sparc/include/asm/perf_event.h4
-rw-r--r--arch/sparc/kernel/nmi.c2
-rw-r--r--arch/sparc/kernel/perf_event.c9
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/nmi.h51
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h63
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h33
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/kernel/alternative.c49
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c15
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c46
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c26
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c642
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/kprobes.c113
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/traps.c27
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c3
-rw-r--r--drivers/acpi/acpica/nsinit.c2
-rw-r--r--drivers/watchdog/hpwdt.c7
-rw-r--r--include/linux/ftrace_event.h10
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/nmi.h4
-rw-r--r--include/linux/perf_event.h26
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/stacktrace.h4
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--include/trace/events/syscalls.h4
-rw-r--r--include/trace/ftrace.h7
-rw-r--r--init/main.c5
-rw-r--r--kernel/hw_breakpoint.c2
-rw-r--r--kernel/kprobes.c565
-rw-r--r--kernel/perf_event.c573
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sysctl.c16
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/trace/trace_event_perf.c31
-rw-r--r--kernel/watchdog.c9
-rw-r--r--tools/perf/Documentation/perf-annotate.txt37
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt19
-rw-r--r--tools/perf/Documentation/perf-kvm.txt8
-rw-r--r--tools/perf/Documentation/perf-lock.txt15
-rw-r--r--tools/perf/Documentation/perf-probe.txt2
-rw-r--r--tools/perf/Documentation/perf-record.txt22
-rw-r--r--tools/perf/Documentation/perf-report.txt52
-rw-r--r--tools/perf/Documentation/perf-sched.txt18
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt (renamed from tools/perf/Documentation/perf-trace-perl.txt)28
-rw-r--r--tools/perf/Documentation/perf-script-python.txt (renamed from tools/perf/Documentation/perf-trace-python.txt)88
-rw-r--r--tools/perf/Documentation/perf-script.txt (renamed from tools/perf/Documentation/perf-trace.txt)61
-rw-r--r--tools/perf/Documentation/perf-stat.txt44
-rw-r--r--tools/perf/Documentation/perf-test.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt28
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile19
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c219
-rw-r--r--tools/perf/builtin-annotate.c6
-rw-r--r--tools/perf/builtin-diff.c13
-rw-r--r--tools/perf/builtin-inject.c39
-rw-r--r--tools/perf/builtin-kmem.c24
-rw-r--r--tools/perf/builtin-lock.c21
-rw-r--r--tools/perf/builtin-record.c39
-rw-r--r--tools/perf/builtin-report.c17
-rw-r--r--tools/perf/builtin-sched.c30
-rw-r--r--tools/perf/builtin-script.c (renamed from tools/perf/builtin-trace.c)117
-rw-r--r--tools/perf/builtin-stat.c325
-rw-r--r--tools/perf/builtin-timechart.c43
-rw-r--r--tools/perf/builtin-top.c18
-rw-r--r--tools/perf/builtin.h2
-rw-r--r--tools/perf/command-list.txt2
-rw-r--r--tools/perf/feature-tests.mak4
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c2
-rw-r--r--tools/perf/util/build-id.c7
-rw-r--r--tools/perf/util/debug.c42
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c355
-rw-r--r--tools/perf/util/event.h30
-rw-r--r--tools/perf/util/header.c43
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c9
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h9
-rw-r--r--tools/perf/util/include/asm/dwarf2.h11
-rw-r--r--tools/perf/util/include/linux/bitops.h5
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/parse-events.c12
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/probe-finder.h6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c553
-rw-r--r--tools/perf/util/session.h20
-rw-r--r--tools/perf/util/sort.c6
-rw-r--r--tools/perf/util/symbol.c11
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/ui/util.c16
138 files changed, 3121 insertions, 2699 deletions
diff --git a/CREDITS b/CREDITS
index 41d8e63d5165..494b6e4746d7 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
2365W: http://oops.ghostprotocols.net:81/blog/ 2365W: http://oops.ghostprotocols.net:81/blog/
2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks 2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
2368S: R. Brasílio Itiberê, 4270/1010 - Água Verde
2369S: 80240-060 - Curitiba - Paraná
2370S: Brazil 2368S: Brazil
2371 2369
2372N: Karsten Merker 2370N: Karsten Merker
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8b61c9360999..316c723a950c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
1579 1579
1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels 1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
1581 Format: [panic,][num] 1581 Format: [panic,][num]
1582 Valid num: 0,1,2 1582 Valid num: 0
1583 0 - turn nmi_watchdog off 1583 0 - turn nmi_watchdog off
1584 1 - use the IO-APIC timer for the NMI watchdog
1585 2 - use the local APIC for the NMI watchdog using
1586 a performance counter. Note: This will use one
1587 performance counter and the local APIC's performance
1588 vector.
1589 When panic is specified, panic when an NMI watchdog 1584 When panic is specified, panic when an NMI watchdog
1590 timeout occurs. 1585 timeout occurs.
1591 This is useful when you use a panic=... timeout and 1586 This is useful when you use a panic=... timeout and
1592 need the box quickly up again. 1587 need the box quickly up again.
1593 Instead of 1 and 2 it is possible to use the following
1594 symbolic names: lapic and ioapic
1595 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
1596 1588
1597 netpoll.carrier_timeout= 1589 netpoll.carrier_timeout=
1598 [NET] Specifies amount of time (in seconds) that 1590 [NET] Specifies amount of time (in seconds) that
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a588873cf8d..f1f803c6674a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4612,7 +4612,7 @@ PERFORMANCE EVENTS SUBSYSTEM
4612M: Peter Zijlstra <a.p.zijlstra@chello.nl> 4612M: Peter Zijlstra <a.p.zijlstra@chello.nl>
4613M: Paul Mackerras <paulus@samba.org> 4613M: Paul Mackerras <paulus@samba.org>
4614M: Ingo Molnar <mingo@elte.hu> 4614M: Ingo Molnar <mingo@elte.hu>
4615M: Arnaldo Carvalho de Melo <acme@redhat.com> 4615M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
4616S: Supported 4616S: Supported
4617F: kernel/perf_event*.c 4617F: kernel/perf_event*.c
4618F: include/linux/perf_event.h 4618F: include/linux/perf_event.h
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index fe792ca818f6..5996e7a6757e 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,10 +1,4 @@
1#ifndef __ASM_ALPHA_PERF_EVENT_H 1#ifndef __ASM_ALPHA_PERF_EVENT_H
2#define __ASM_ALPHA_PERF_EVENT_H 2#define __ASM_ALPHA_PERF_EVENT_H
3 3
4#ifdef CONFIG_PERF_EVENTS
5extern void init_hw_perf_events(void);
6#else
7static inline void init_hw_perf_events(void) { }
8#endif
9
10#endif /* __ASM_ALPHA_PERF_EVENT_H */ 4#endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 5f77afb88e89..4c8bb374eb0a 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -112,8 +112,6 @@ init_IRQ(void)
112 wrent(entInt, 0); 112 wrent(entInt, 0);
113 113
114 alpha_mv.init_irq(); 114 alpha_mv.init_irq();
115
116 init_hw_perf_events();
117} 115}
118 116
119/* 117/*
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1cc49683fb69..90561c45e7d8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -14,6 +14,7 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/init.h>
17 18
18#include <asm/hwrpb.h> 19#include <asm/hwrpb.h>
19#include <asm/atomic.h> 20#include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
863/* 864/*
864 * Init call to initialise performance events at kernel startup. 865 * Init call to initialise performance events at kernel startup.
865 */ 866 */
866void __init init_hw_perf_events(void) 867int __init init_hw_perf_events(void)
867{ 868{
868 pr_info("Performance events: "); 869 pr_info("Performance events: ");
869 870
870 if (!supported_cpu()) { 871 if (!supported_cpu()) {
871 pr_cont("No support for your CPU.\n"); 872 pr_cont("No support for your CPU.\n");
872 return; 873 return 0;
873 } 874 }
874 875
875 pr_cont("Supported CPU type!\n"); 876 pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
881 /* And set up PMU specification */ 882 /* And set up PMU specification */
882 alpha_pmu = &ev67_pmu; 883 alpha_pmu = &ev67_pmu;
883 884
884 perf_pmu_register(&pmu); 885 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
885}
886 886
887 return 0;
888}
889early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492a..fdfa4976b0bf 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
3034 pr_info("no hardware support available\n"); 3034 pr_info("no hardware support available\n");
3035 } 3035 }
3036 3036
3037 perf_pmu_register(&pmu); 3037 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
3038 3038
3039 return 0; 3039 return 0;
3040} 3040}
3041arch_initcall(init_hw_perf_events); 3041early_initcall(init_hw_perf_events);
3042 3042
3043/* 3043/*
3044 * Callchain handling code. 3044 * Callchain handling code.
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 5c7c6fc07565..183e0d226669 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
1047 1047
1048 return 0; 1048 return 0;
1049} 1049}
1050arch_initcall(init_hw_perf_events); 1050early_initcall(init_hw_perf_events);
1051 1051
1052#endif /* defined(CONFIG_CPU_MIPS32)... */ 1052#endif /* defined(CONFIG_CPU_MIPS32)... */
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index 7c07de0d8943..b150b510510f 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
126 return register_fsl_emb_pmu(&e500_pmu); 126 return register_fsl_emb_pmu(&e500_pmu);
127} 127}
128 128
129arch_initcall(init_e500_pmu); 129early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 09d72028f317..2cc5e0301d0b 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
414 return register_power_pmu(&mpc7450_pmu); 414 return register_power_pmu(&mpc7450_pmu);
415} 415}
416 416
417arch_initcall(init_mpc7450_pmu); 417early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 3129c855933c..567480705789 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1379 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1380#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1381 1381
1382 perf_pmu_register(&power_pmu); 1382 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
1383 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1384 1384
1385 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 7ecca59ddf77..4dcf5f831e9d 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
681 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
682 pmu->name); 682 pmu->name);
683 683
684 perf_pmu_register(&fsl_emb_pmu); 684 perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
685 685
686 return 0; 686 return 0;
687} 687}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 2a361cdda635..ead8b3c2649e 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
613 return register_power_pmu(&power4_pmu); 613 return register_power_pmu(&power4_pmu);
614} 614}
615 615
616arch_initcall(init_power4_pmu); 616early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 199de527d411..eca0ac595cb6 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
682 return register_power_pmu(&power5p_pmu); 682 return register_power_pmu(&power5p_pmu);
683} 683}
684 684
685arch_initcall(init_power5p_pmu); 685early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 98b6a729a9dd..d5ff0f64a5e6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
621 return register_power_pmu(&power5_pmu); 621 return register_power_pmu(&power5_pmu);
622} 622}
623 623
624arch_initcall(init_power5_pmu); 624early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 84a607bda8fb..31603927e376 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
544 return register_power_pmu(&power6_pmu); 544 return register_power_pmu(&power6_pmu);
545} 545}
546 546
547arch_initcall(init_power6_pmu); 547early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 852f7b7f6b40..593740fcb799 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
369 return register_power_pmu(&power7_pmu); 369 return register_power_pmu(&power7_pmu);
370} 370}
371 371
372arch_initcall(init_power7_pmu); 372early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 3fee685de4df..9a6e093858fe 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
494 return register_power_pmu(&ppc970_pmu); 494 return register_power_pmu(&ppc970_pmu);
495} 495}
496 496
497arch_initcall(init_ppc970_pmu); 497early_initcall(init_ppc970_pmu);
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index dbf3b4bb71fe..748955df018d 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
250 250
251 return register_sh_pmu(&sh7750_pmu); 251 return register_sh_pmu(&sh7750_pmu);
252} 252}
253arch_initcall(sh7750_pmu_init); 253early_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 580276525731..17e6bebfede0 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
284 284
285 return register_sh_pmu(&sh4a_pmu); 285 return register_sh_pmu(&sh4a_pmu);
286} 286}
287arch_initcall(sh4a_pmu_init); 287early_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 5a4b33435650..2ee21a47b5af 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
389 389
390 WARN_ON(_pmu->num_events > MAX_HWEVENTS); 390 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
391 391
392 perf_pmu_register(&pmu); 392 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
393 perf_cpu_notifier(sh_pmu_notifier); 393 perf_cpu_notifier(sh_pmu_notifier);
394 return 0; 394 return 0;
395} 395}
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 6e8bfa1786da..4d3dbe3703e9 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -4,8 +4,6 @@
4#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
5#include <asm/ptrace.h> 5#include <asm/ptrace.h>
6 6
7extern void init_hw_perf_events(void);
8
9#define perf_arch_fetch_caller_regs(regs, ip) \ 7#define perf_arch_fetch_caller_regs(regs, ip) \
10do { \ 8do { \
11 unsigned long _pstate, _asi, _pil, _i7, _fp; \ 9 unsigned long _pstate, _asi, _pil, _i7, _fp; \
@@ -26,8 +24,6 @@ do { \
26 (regs)->u_regs[UREG_I6] = _fp; \ 24 (regs)->u_regs[UREG_I6] = _fp; \
27 (regs)->u_regs[UREG_I7] = _i7; \ 25 (regs)->u_regs[UREG_I7] = _i7; \
28} while (0) 26} while (0)
29#else
30static inline void init_hw_perf_events(void) { }
31#endif 27#endif
32 28
33#endif 29#endif
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a4bd7ba74c89..300f810142f5 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -270,8 +270,6 @@ int __init nmi_init(void)
270 atomic_set(&nmi_active, -1); 270 atomic_set(&nmi_active, -1);
271 } 271 }
272 } 272 }
273 if (!err)
274 init_hw_perf_events();
275 273
276 return err; 274 return err;
277} 275}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0d6deb55a2ae..760578687e7c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
1307 return false; 1307 return false;
1308} 1308}
1309 1309
1310void __init init_hw_perf_events(void) 1310int __init init_hw_perf_events(void)
1311{ 1311{
1312 pr_info("Performance events: "); 1312 pr_info("Performance events: ");
1313 1313
1314 if (!supported_pmu()) { 1314 if (!supported_pmu()) {
1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1316 return; 1316 return 0;
1317 } 1317 }
1318 1318
1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1320 1320
1321 perf_pmu_register(&pmu); 1321 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1322 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1323
1324 return 0;
1323} 1325}
1326early_initcall(init_hw_perf_events);
1324 1327
1325void perf_callchain_kernel(struct perf_callchain_entry *entry, 1328void perf_callchain_kernel(struct perf_callchain_entry *entry,
1326 struct pt_regs *regs) 1329 struct pt_regs *regs)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d20ea2f..4a2adaa9aefc 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
181 * inconsistent instruction while you patch. 181 * inconsistent instruction while you patch.
182 */ 182 */
183struct text_poke_param {
184 void *addr;
185 const void *opcode;
186 size_t len;
187};
188
183extern void *text_poke(void *addr, const void *opcode, size_t len); 189extern void *text_poke(void *addr, const void *opcode, size_t len);
184extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 190extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
191extern void text_poke_smp_batch(struct text_poke_param *params, int n);
185 192
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 193#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5 194#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..f23eb2528464 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
28extern int __must_check __die(const char *, struct pt_regs *, long); 28extern int __must_check __die(const char *, struct pt_regs *, long);
29extern void show_registers(struct pt_regs *regs); 29extern void show_registers(struct pt_regs *regs);
30extern void show_trace(struct task_struct *t, struct pt_regs *regs, 30extern void show_trace(struct task_struct *t, struct pt_regs *regs,
31 unsigned long *sp, unsigned long bp); 31 unsigned long *sp);
32extern void __show_regs(struct pt_regs *regs, int all); 32extern void __show_regs(struct pt_regs *regs, int all);
33extern void show_regs(struct pt_regs *regs); 33extern void show_regs(struct pt_regs *regs);
34extern unsigned long oops_begin(void); 34extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..3545838cddeb 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,39 +7,13 @@
7 7
8#ifdef ARCH_HAS_NMI_WATCHDOG 8#ifdef ARCH_HAS_NMI_WATCHDOG
9 9
10/**
11 * do_nmi_callback
12 *
13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu);
17
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled;
22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
24extern int reserve_perfctr_nmi(unsigned int); 12extern int reserve_perfctr_nmi(unsigned int);
25extern void release_perfctr_nmi(unsigned int); 13extern void release_perfctr_nmi(unsigned int);
26extern int reserve_evntsel_nmi(unsigned int); 14extern int reserve_evntsel_nmi(unsigned int);
27extern void release_evntsel_nmi(unsigned int); 15extern void release_evntsel_nmi(unsigned int);
28 16
29extern void setup_apic_nmi_watchdog(void *);
30extern void stop_apic_nmi_watchdog(void *);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
34extern void cpu_nmi_set_wd_enabled(void);
35
36extern atomic_t nmi_active;
37extern unsigned int nmi_watchdog;
38#define NMI_NONE 0
39#define NMI_IO_APIC 1
40#define NMI_LOCAL_APIC 2
41#define NMI_INVALID 3
42
43struct ctl_table; 17struct ctl_table;
44extern int proc_nmi_enabled(struct ctl_table *, int , 18extern int proc_nmi_enabled(struct ctl_table *, int ,
45 void __user *, size_t *, loff_t *); 19 void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
47 21
48void arch_trigger_all_cpu_backtrace(void); 22void arch_trigger_all_cpu_backtrace(void);
49#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
50
51static inline void localise_nmi_watchdog(void)
52{
53 if (nmi_watchdog == NMI_IO_APIC)
54 nmi_watchdog = NMI_LOCAL_APIC;
55}
56
57/* check if nmi_watchdog is active (ie was specified at boot) */
58static inline int nmi_watchdog_active(void)
59{
60 /*
61 * actually it should be:
62 * return (nmi_watchdog == NMI_LOCAL_APIC ||
63 * nmi_watchdog == NMI_IO_APIC)
64 * but since they are power of two we could use a
65 * cheaper way --cvg
66 */
67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68}
69#endif 24#endif
70 25
71void lapic_watchdog_stop(void);
72int lapic_watchdog_init(unsigned nmi_hz);
73int lapic_wd_event(unsigned nmi_hz);
74unsigned lapic_adjust_nmi_hz(unsigned hz);
75void disable_lapic_nmi_watchdog(void);
76void enable_lapic_nmi_watchdog(void);
77void stop_nmi(void); 26void stop_nmi(void);
78void restart_nmi(void); 27void restart_nmi(void);
79 28
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
126 126
127#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
128extern void init_hw_perf_events(void);
129extern void perf_events_lapic_init(void); 128extern void perf_events_lapic_init(void);
130 129
131#define PERF_EVENT_INDEX_OFFSET 0 130#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
156} 155}
157 156
158#else 157#else
159static inline void init_hw_perf_events(void) { }
160static inline void perf_events_lapic_init(void) { } 158static inline void perf_events_lapic_init(void) { }
161#endif 159#endif
162 160
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd216be5d..295e2ff18a6a 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
744}; 744};
745 745
746/* 746/*
747 * P4 PEBS specifics (Replay Event only)
748 *
749 * Format (bits):
750 * 0-6: metric from P4_PEBS_METRIC enum
751 * 7 : reserved
752 * 8 : reserved
753 * 9-11 : reserved
754 *
755 * Note we have UOP and PEBS bits reserved for now 747 * Note we have UOP and PEBS bits reserved for now
756 * just in case if we will need them once 748 * just in case if we will need them once
757 */ 749 */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
788 P4_PEBS_METRIC__max 780 P4_PEBS_METRIC__max
789}; 781};
790 782
783/*
784 * Notes on internal configuration of ESCR+CCCR tuples
785 *
786 * Since P4 has quite the different architecture of
787 * performance registers in compare with "architectural"
788 * once and we have on 64 bits to keep configuration
789 * of performance event, the following trick is used.
790 *
791 * 1) Since both ESCR and CCCR registers have only low
792 * 32 bits valuable, we pack them into a single 64 bit
793 * configuration. Low 32 bits of such config correspond
794 * to low 32 bits of CCCR register and high 32 bits
795 * correspond to low 32 bits of ESCR register.
796 *
797 * 2) The meaning of every bit of such config field can
798 * be found in Intel SDM but it should be noted that
799 * we "borrow" some reserved bits for own usage and
800 * clean them or set to a proper value when we do
801 * a real write to hardware registers.
802 *
803 * 3) The format of bits of config is the following
804 * and should be either 0 or set to some predefined
805 * values:
806 *
807 * Low 32 bits
808 * -----------
809 * 0-6: P4_PEBS_METRIC enum
810 * 7-11: reserved
811 * 12: reserved (Enable)
812 * 13-15: reserved (ESCR select)
813 * 16-17: Active Thread
814 * 18: Compare
815 * 19: Complement
816 * 20-23: Threshold
817 * 24: Edge
818 * 25: reserved (FORCE_OVF)
819 * 26: reserved (OVF_PMI_T0)
820 * 27: reserved (OVF_PMI_T1)
821 * 28-29: reserved
822 * 30: reserved (Cascade)
823 * 31: reserved (OVF)
824 *
825 * High 32 bits
826 * ------------
827 * 0: reserved (T1_USR)
828 * 1: reserved (T1_OS)
829 * 2: reserved (T0_USR)
830 * 3: reserved (T0_OS)
831 * 4: Tag Enable
832 * 5-8: Tag Value
833 * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
834 * 25-30: enum P4_EVENTS
835 * 31: reserved (HT thread)
836 */
837
791#endif /* PERF_EVENT_P4_H */ 838#endif /* PERF_EVENT_P4_H */
792 839
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
48 setup_IO_APIC(); 48 setup_IO_APIC();
49 else { 49 else {
50 nr_ioapics = 0; 50 nr_ioapics = 0;
51 localise_nmi_watchdog();
52 } 51 }
53#endif 52#endif
54} 53}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
7#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h>
10 11
11extern int kstack_depth_to_print; 12extern int kstack_depth_to_print;
12 13
@@ -46,7 +47,7 @@ struct stacktrace_ops {
46}; 47};
47 48
48void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 49void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
49 unsigned long *stack, unsigned long bp, 50 unsigned long *stack,
50 const struct stacktrace_ops *ops, void *data); 51 const struct stacktrace_ops *ops, void *data);
51 52
52#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 58#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif 59#endif
59 60
61#ifdef CONFIG_FRAME_POINTER
62static inline unsigned long
63stack_frame(struct task_struct *task, struct pt_regs *regs)
64{
65 unsigned long bp;
66
67 if (regs)
68 return regs->bp;
69
70 if (task == current) {
71 /* Grab bp right from our regs */
72 get_bp(bp);
73 return bp;
74 }
75
76 /* bp is the last reg pushed by switch_to */
77 return *(unsigned long *)task->thread.sp;
78}
79#else
80static inline unsigned long
81stack_frame(struct task_struct *task, struct pt_regs *regs)
82{
83 return 0;
84}
85#endif
86
60extern void 87extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 88show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl); 89 unsigned long *stack, char *log_lvl);
63 90
64extern void 91extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl); 93 unsigned long *sp, char *log_lvl);
67 94
68extern unsigned int code_bytes; 95extern unsigned int code_bytes;
69 96
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
10unsigned long long native_sched_clock(void); 10unsigned long long native_sched_clock(void);
11extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
12 12
13#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
14extern int timer_ack;
15#else
16# define timer_ack (0)
17#endif
18
19extern int no_timer_check; 13extern int no_timer_check;
20 14
21/* Accelerators for sched_clock() 15/* Accelerators for sched_clock()
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..553d0b0d639b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 591static int wrote_text;
592 592
593struct text_poke_params { 593struct text_poke_params {
594 void *addr; 594 struct text_poke_param *params;
595 const void *opcode; 595 int nparams;
596 size_t len;
597}; 596};
598 597
599static int __kprobes stop_machine_text_poke(void *data) 598static int __kprobes stop_machine_text_poke(void *data)
600{ 599{
601 struct text_poke_params *tpp = data; 600 struct text_poke_params *tpp = data;
601 struct text_poke_param *p;
602 int i;
602 603
603 if (atomic_dec_and_test(&stop_machine_first)) { 604 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 605 for (i = 0; i < tpp->nparams; i++) {
606 p = &tpp->params[i];
607 text_poke(p->addr, p->opcode, p->len);
608 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 609 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 610 wrote_text = 1;
607 } else { 611 } else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 614 smp_mb(); /* Load wrote_text before following execution */
611 } 615 }
612 616
613 flush_icache_range((unsigned long)tpp->addr, 617 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 618 p = &tpp->params[i];
619 flush_icache_range((unsigned long)p->addr,
620 (unsigned long)p->addr + p->len);
621 }
622
615 return 0; 623 return 0;
616} 624}
617 625
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 639void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 640{
633 struct text_poke_params tpp; 641 struct text_poke_params tpp;
642 struct text_poke_param p;
634 643
635 tpp.addr = addr; 644 p.addr = addr;
636 tpp.opcode = opcode; 645 p.opcode = opcode;
637 tpp.len = len; 646 p.len = len;
647 tpp.params = &p;
648 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 649 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 650 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 651 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 653 return addr;
643} 654}
644 655
656/**
657 * text_poke_smp_batch - Update instructions on a live kernel on SMP
658 * @params: an array of text_poke parameters
659 * @n: the number of elements in params.
660 *
661 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
662 * stop_machine() is heavy task, it is better to aggregate text_poke requests
663 * and do it once if possible.
664 *
665 * Note: Must be called under get_online_cpus() and text_mutex.
666 */
667void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
668{
669 struct text_poke_params tpp = {.params = params, .nparams = n};
670
671 atomic_set(&stop_machine_first, 1);
672 wrote_text = 0;
673 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
674}
675
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 676#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 677
647#ifdef CONFIG_X86_64 678#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78218135b48e..fb7657822aad 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 798 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 799 * device.
801 */ 800 */
802 if (nmi_watchdog != NMI_IO_APIC) 801 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 802
808 /* Setup the lapic or request the broadcast */ 803 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 804 setup_APIC_timer();
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1382 }
1388#endif 1383#endif
1389 1384
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1385 apic_pm_activate();
1392 1386
1393 /* 1387 /*
@@ -1758,17 +1752,10 @@ int __init APIC_init_uniprocessor(void)
1758 setup_IO_APIC(); 1752 setup_IO_APIC();
1759 else { 1753 else {
1760 nr_ioapics = 0; 1754 nr_ioapics = 0;
1761 localise_nmi_watchdog();
1762 } 1755 }
1763#else
1764 localise_nmi_watchdog();
1765#endif 1756#endif
1766 1757
1767 x86_init.timers.setup_percpu_clockev(); 1758 x86_init.timers.setup_percpu_clockev();
1768#ifdef CONFIG_X86_64
1769 check_nmi_watchdog();
1770#endif
1771
1772 return 0; 1759 return 0;
1773} 1760}
1774 1761
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..93da91df5b38 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,13 +17,24 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef ARCH_HAS_NMI_WATCHDOG
21#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 22u64 hw_nmi_get_sample_period(void)
21{ 23{
22 return (u64)(cpu_khz) * 1000 * 60; 24 return (u64)(cpu_khz) * 1000 * 60;
23} 25}
26#endif
24 27
25#ifdef ARCH_HAS_NMI_WATCHDOG 28#ifndef CONFIG_HARDLOCKUP_DETECTOR
29void touch_nmi_watchdog(void)
30{
31 touch_softlockup_watchdog();
32}
33EXPORT_SYMBOL(touch_nmi_watchdog);
34#endif
35#endif
26 36
37#ifdef arch_trigger_all_cpu_backtrace
27/* For reliability, we're prepared to waste bits here. */ 38/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 39static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 40
@@ -91,18 +102,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 102}
92early_initcall(register_trigger_all_cpu_backtrace); 103early_initcall(register_trigger_all_cpu_backtrace);
93#endif 104#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd743a74f..16c2db8750a2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -2642,24 +2641,6 @@ static void lapic_register_intr(int irq)
2642 "edge"); 2641 "edge");
2643} 2642}
2644 2643
2645static void __init setup_nmi(void)
2646{
2647 /*
2648 * Dirty trick to enable the NMI watchdog ...
2649 * We put the 8259A master into AEOI mode and
2650 * unmask on all local APICs LVT0 as NMI.
2651 *
2652 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2653 * is from Maciej W. Rozycki - so we do not have to EOI from
2654 * the NMI handler or the timer interrupt.
2655 */
2656 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2657
2658 enable_NMI_through_LVT0();
2659
2660 apic_printk(APIC_VERBOSE, " done.\n");
2661}
2662
2663/* 2644/*
2664 * This looks a bit hackish but it's about the only one way of sending 2645 * This looks a bit hackish but it's about the only one way of sending
2665 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2646 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2746,6 @@ static inline void __init check_timer(void)
2765 */ 2746 */
2766 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2747 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2767 legacy_pic->init(1); 2748 legacy_pic->init(1);
2768#ifdef CONFIG_X86_32
2769 {
2770 unsigned int ver;
2771
2772 ver = apic_read(APIC_LVR);
2773 ver = GET_APIC_VERSION(ver);
2774 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2775 }
2776#endif
2777 2749
2778 pin1 = find_isa_irq_pin(0, mp_INT); 2750 pin1 = find_isa_irq_pin(0, mp_INT);
2779 apic1 = find_isa_irq_apic(0, mp_INT); 2751 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2793,6 @@ static inline void __init check_timer(void)
2821 unmask_ioapic(cfg); 2793 unmask_ioapic(cfg);
2822 } 2794 }
2823 if (timer_irq_works()) { 2795 if (timer_irq_works()) {
2824 if (nmi_watchdog == NMI_IO_APIC) {
2825 setup_nmi();
2826 legacy_pic->unmask(0);
2827 }
2828 if (disable_timer_pin_1 > 0) 2796 if (disable_timer_pin_1 > 0)
2829 clear_IO_APIC_pin(0, pin1); 2797 clear_IO_APIC_pin(0, pin1);
2830 goto out; 2798 goto out;
@@ -2850,11 +2818,6 @@ static inline void __init check_timer(void)
2850 if (timer_irq_works()) { 2818 if (timer_irq_works()) {
2851 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2819 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2852 timer_through_8259 = 1; 2820 timer_through_8259 = 1;
2853 if (nmi_watchdog == NMI_IO_APIC) {
2854 legacy_pic->mask(0);
2855 setup_nmi();
2856 legacy_pic->unmask(0);
2857 }
2858 goto out; 2821 goto out;
2859 } 2822 }
2860 /* 2823 /*
@@ -2866,15 +2829,6 @@ static inline void __init check_timer(void)
2866 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2829 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2867 } 2830 }
2868 2831
2869 if (nmi_watchdog == NMI_IO_APIC) {
2870 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2871 "through the IO-APIC - disabling NMI Watchdog!\n");
2872 nmi_watchdog = NMI_NONE;
2873 }
2874#ifdef CONFIG_X86_32
2875 timer_ack = 0;
2876#endif
2877
2878 apic_printk(APIC_QUIET, KERN_INFO 2832 apic_printk(APIC_QUIET, KERN_INFO
2879 "...trying to set up timer as Virtual Wire IRQ...\n"); 2833 "...trying to set up timer as Virtual Wire IRQ...\n");
2880 2834
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..0a360d146596 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1391 pr_info("no hardware sampling interrupt available.\n");
1363} 1392}
1364 1393
1365void __init init_hw_perf_events(void) 1394int __init init_hw_perf_events(void)
1366{ 1395{
1367 struct event_constraint *c; 1396 struct event_constraint *c;
1368 int err; 1397 int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1406 err = amd_pmu_init();
1378 break; 1407 break;
1379 default: 1408 default:
1380 return; 1409 return 0;
1381 } 1410 }
1382 if (err != 0) { 1411 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1412 pr_cont("no PMU driver, software events only.\n");
1384 return; 1413 return 0;
1385 } 1414 }
1386 1415
1387 pmu_check_apic(); 1416 pmu_check_apic();
1388 1417
1389 /* sanity check that the hardware exists or is emulated */ 1418 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1419 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1420 return 0;
1392 return;
1393 }
1394 1421
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1422 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1423
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1465 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1466 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1467
1441 perf_pmu_register(&pmu); 1468 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1469 perf_cpu_notifier(x86_pmu_notifier);
1470
1471 return 0;
1443} 1472}
1473early_initcall(init_hw_perf_events);
1444 1474
1445static inline void x86_pmu_read(struct perf_event *event) 1475static inline void x86_pmu_read(struct perf_event *event)
1446{ 1476{
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1716
1687 perf_callchain_store(entry, regs->ip); 1717 perf_callchain_store(entry, regs->ip);
1688 1718
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1719 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1720}
1691 1721
1692#ifdef CONFIG_COMPAT 1722#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..24e390e40f2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..14d45928c282 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -22,26 +22,6 @@
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -210,7 +210,7 @@ void dump_stack(void)
210 init_utsname()->release, 210 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 211 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 212 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 213 show_trace(NULL, NULL, &stack);
214} 214}
215EXPORT_SYMBOL(dump_stack); 215EXPORT_SYMBOL(dump_stack);
216 216
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..96ed1aac543a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
91void show_regs(struct pt_regs *regs) 91void show_regs(struct pt_regs *regs)
92{ 92{
93 show_registers(regs); 93 show_registers(regs);
94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
95 regs->bp);
96} 95}
97 96
98void show_regs_common(void) 97void show_regs_common(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..68f61ac632e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1191#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1192 setup_ioapic_dest();
1198#endif 1193#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1194 mtrr_aps_init();
1201} 1195}
1202 1196
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1335 if (cpu == 0)
1342 return -EBUSY; 1336 return -EBUSY;
1343 1337
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1338 clear_local_APIC();
1347 1339
1348 cpu_disable_common(); 1340 cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..bb6f04167361 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87
86static inline void conditional_sti(struct pt_regs *regs) 88static inline void conditional_sti(struct pt_regs *regs)
87{ 89{
88 if (regs->flags & X86_EFLAGS_IF) 90 if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 302 die("general protection fault", regs, error_code);
301} 303}
302 304
305static int __init setup_unknown_nmi_panic(char *str)
306{
307 unknown_nmi_panic = 1;
308 return 1;
309}
310__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
311
303static notrace __kprobes void 312static notrace __kprobes void
304mem_parity_error(unsigned char reason, struct pt_regs *regs) 313mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 314{
@@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
371 reason, smp_processor_id()); 380 reason, smp_processor_id());
372 381
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 382 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 383 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 384 panic("NMI: Not continuing");
376 385
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 386 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 406 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP) 407 == NOTIFY_STOP)
399 return; 408 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
402 /*
403 * Ok, so this is none of the documented NMI sources,
404 * so it must be the NMI watchdog.
405 */
406 if (nmi_watchdog_tick(regs, reason))
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 409#endif
410 unknown_nmi_error(reason, regs);
414 411
415 return; 412 return;
416 } 413 }
@@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 443
447void stop_nmi(void) 444void stop_nmi(void)
448{ 445{
449 acpi_nmi_disable();
450 ignore_nmis++; 446 ignore_nmis++;
451} 447}
452 448
453void restart_nmi(void) 449void restart_nmi(void)
454{ 450{
455 ignore_nmis--; 451 ignore_nmis--;
456 acpi_nmi_enable();
457} 452}
458 453
459/* May run on IST stack. */ 454/* May run on IST stack. */
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
126 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
127 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
128 if (depth) 128 if (depth)
129 dump_trace(NULL, regs, (unsigned long *)stack, 0, 129 dump_trace(NULL, regs, (unsigned long *)stack,
130 &backtrace_ops, &depth); 130 &backtrace_ops, &depth);
131 return; 131 return;
132 } 132 }
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..0636dd93cef8 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@ static void timer_stop(void)
58 58
59int __init op_nmi_timer_init(struct oprofile_operations *ops) 59int __init op_nmi_timer_init(struct oprofile_operations *ops)
60{ 60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start; 61 ops->start = timer_start;
65 ops->stop = timer_stop; 62 ops->stop = timer_stop;
66 ops->cpu_type = "timer"; 63 ops->cpu_type = "timer";
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 660a2728908d..0cac7ec0d2ec 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
577 * as possible (without an NMI being received in the middle of 577 * as possible (without an NMI being received in the middle of
578 * this) - so disable NMIs and initialize the device: 578 * this) - so disable NMIs and initialize the device:
579 */ 579 */
580 acpi_nmi_disable();
581 status = acpi_ns_evaluate(info); 580 status = acpi_ns_evaluate(info);
582 acpi_nmi_enable();
583 581
584 if (ACPI_SUCCESS(status)) { 582 if (ACPI_SUCCESS(status)) {
585 walk_info->num_INI++; 583 walk_info->num_INI++;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3d77116e4634..c19f4a20794a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -649,12 +649,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
649 * If nmi_watchdog is turned off then we can turn on 649 * If nmi_watchdog is turned off then we can turn on
650 * our nmi decoding capability. 650 * our nmi decoding capability.
651 */ 651 */
652 if (!nmi_watchdog_active()) 652 hpwdt_nmi_decoding = 1;
653 hpwdt_nmi_decoding = 1;
654 else
655 dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
656 "functionality you must reboot with nmi_watchdog=0 "
657 "and load the hpwdt driver with priority=1.\n");
658} 653}
659#else 654#else
660static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) 655static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8beabb958f61..725bf6bd39f7 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -154,12 +154,14 @@ enum {
154 TRACE_EVENT_FL_ENABLED_BIT, 154 TRACE_EVENT_FL_ENABLED_BIT,
155 TRACE_EVENT_FL_FILTERED_BIT, 155 TRACE_EVENT_FL_FILTERED_BIT,
156 TRACE_EVENT_FL_RECORDED_CMD_BIT, 156 TRACE_EVENT_FL_RECORDED_CMD_BIT,
157 TRACE_EVENT_FL_CAP_ANY_BIT,
157}; 158};
158 159
159enum { 160enum {
160 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), 161 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
161 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 162 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
162 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), 163 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
164 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
163}; 165};
164 166
165struct ftrace_event_call { 167struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
196#endif 198#endif
197}; 199};
198 200
201#define __TRACE_EVENT_FLAGS(name, value) \
202 static int __init trace_init_flags_##name(void) \
203 { \
204 event_##name.flags = value; \
205 return 0; \
206 } \
207 early_initcall(trace_init_flags_##name);
208
199#define PERF_MAX_TRACE_SIZE 2048 209#define PERF_MAX_TRACE_SIZE 2048
200 210
201#define MAX_FILTER_PRED 32 211#define MAX_FILTER_PRED 32
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e0070d..b78edb58ee66 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); 275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); 276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); 277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
278extern int arch_optimize_kprobe(struct optimized_kprobe *op); 278extern void arch_optimize_kprobes(struct list_head *oplist);
279extern void arch_unoptimize_kprobes(struct list_head *oplist,
280 struct list_head *done_list);
279extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); 281extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
280extern kprobe_opcode_t *get_optinsn_slot(void); 282extern kprobe_opcode_t *get_optinsn_slot(void);
281extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); 283extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5eee134..17ccf44e7dcb 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,8 +17,6 @@
17#ifdef ARCH_HAS_NMI_WATCHDOG 17#ifdef ARCH_HAS_NMI_WATCHDOG
18#include <asm/nmi.h> 18#include <asm/nmi.h>
19extern void touch_nmi_watchdog(void); 19extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void);
22#else 20#else
23#ifndef CONFIG_HARDLOCKUP_DETECTOR 21#ifndef CONFIG_HARDLOCKUP_DETECTOR
24static inline void touch_nmi_watchdog(void) 22static inline void touch_nmi_watchdog(void)
@@ -28,8 +26,6 @@ static inline void touch_nmi_watchdog(void)
28#else 26#else
29extern void touch_nmi_watchdog(void); 27extern void touch_nmi_watchdog(void);
30#endif 28#endif
31static inline void acpi_nmi_disable(void) { }
32static inline void acpi_nmi_enable(void) { }
33#endif 29#endif
34 30
35/* 31/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f1279e105ee..dda5b0a3ff60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@ struct perf_event_attr {
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */ 217 mmap_data : 1, /* non-exec mmap data */
218 sample_id_all : 1, /* sample_type all events */
218 219
219 __reserved_1 : 46; 220 __reserved_1 : 45;
220 221
221 union { 222 union {
222 __u32 wakeup_events; /* wakeup every n events */ 223 __u32 wakeup_events; /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
327enum perf_event_type { 328enum perf_event_type {
328 329
329 /* 330 /*
331 * If perf_event_attr.sample_id_all is set then all event types will
332 * have the sample_type selected fields related to where/when
333 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
334 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
335 * the perf_event_header and the fields already present for the existing
336 * fields, i.e. at the end of the payload. That way a newer perf.data
337 * file will be supported by older perf tools, with these new optional
338 * fields being ignored.
339 *
330 * The MMAP events record the PROT_EXEC mappings so that we can 340 * The MMAP events record the PROT_EXEC mappings so that we can
331 * correlate userspace IPs to code. They have the following structure: 341 * correlate userspace IPs to code. They have the following structure:
332 * 342 *
@@ -578,6 +588,10 @@ struct perf_event;
578struct pmu { 588struct pmu {
579 struct list_head entry; 589 struct list_head entry;
580 590
591 struct device *dev;
592 char *name;
593 int type;
594
581 int * __percpu pmu_disable_count; 595 int * __percpu pmu_disable_count;
582 struct perf_cpu_context * __percpu pmu_cpu_context; 596 struct perf_cpu_context * __percpu pmu_cpu_context;
583 int task_ctx_nr; 597 int task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
758 u64 shadow_ctx_time; 772 u64 shadow_ctx_time;
759 773
760 struct perf_event_attr attr; 774 struct perf_event_attr attr;
775 u16 header_size;
776 u16 id_header_size;
777 u16 read_size;
761 struct hw_perf_event hw; 778 struct hw_perf_event hw;
762 779
763 struct perf_event_context *ctx; 780 struct perf_event_context *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
903 920
904#ifdef CONFIG_PERF_EVENTS 921#ifdef CONFIG_PERF_EVENTS
905 922
906extern int perf_pmu_register(struct pmu *pmu); 923extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
907extern void perf_pmu_unregister(struct pmu *pmu); 924extern void perf_pmu_unregister(struct pmu *pmu);
908 925
909extern int perf_num_counters(void); 926extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
970 struct perf_sample_data *data, 987 struct perf_sample_data *data,
971 struct pt_regs *regs); 988 struct pt_regs *regs);
972 989
990static inline bool is_sampling_event(struct perf_event *event)
991{
992 return event->attr.sample_period != 0;
993}
994
973/* 995/*
974 * Return 1 for a software event, 0 for a hardware event 996 * Return 1 for a software event, 0 for a hardware event
975 */ 997 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 223874538b33..a99d735db3df 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
316 size_t *lenp, loff_t *ppos); 316 size_t *lenp, loff_t *ppos);
317extern unsigned int softlockup_panic; 317extern unsigned int softlockup_panic;
318extern int softlockup_thresh; 318extern int softlockup_thresh;
319void lockup_detector_init(void);
319#else 320#else
320static inline void touch_softlockup_watchdog(void) 321static inline void touch_softlockup_watchdog(void)
321{ 322{
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
326static inline void touch_all_softlockup_watchdogs(void) 327static inline void touch_all_softlockup_watchdogs(void)
327{ 328{
328} 329}
330static inline void lockup_detector_init(void)
331{
332}
329#endif 333#endif
330 334
331#ifdef CONFIG_DETECT_HUNG_TASK 335#ifdef CONFIG_DETECT_HUNG_TASK
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 51efbef38fb0..25310f1d7f37 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -2,6 +2,7 @@
2#define __LINUX_STACKTRACE_H 2#define __LINUX_STACKTRACE_H
3 3
4struct task_struct; 4struct task_struct;
5struct pt_regs;
5 6
6#ifdef CONFIG_STACKTRACE 7#ifdef CONFIG_STACKTRACE
7struct task_struct; 8struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
13}; 14};
14 15
15extern void save_stack_trace(struct stack_trace *trace); 16extern void save_stack_trace(struct stack_trace *trace);
16extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); 17extern void save_stack_trace_regs(struct stack_trace *trace,
18 struct pt_regs *regs);
17extern void save_stack_trace_tsk(struct task_struct *tsk, 19extern void save_stack_trace_tsk(struct task_struct *tsk,
18 struct stack_trace *trace); 20 struct stack_trace *trace);
19 21
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cacc27a0e285..18cd0684fc4e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
127#define SYSCALL_TRACE_ENTER_EVENT(sname) \ 127#define SYSCALL_TRACE_ENTER_EVENT(sname) \
128 static struct syscall_metadata \ 128 static struct syscall_metadata \
129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
130 static struct ftrace_event_call \
131 __attribute__((__aligned__(4))) event_enter_##sname; \
132 static struct ftrace_event_call __used \ 130 static struct ftrace_event_call __used \
133 __attribute__((__aligned__(4))) \ 131 __attribute__((__aligned__(4))) \
134 __attribute__((section("_ftrace_events"))) \ 132 __attribute__((section("_ftrace_events"))) \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
137 .class = &event_class_syscall_enter, \ 135 .class = &event_class_syscall_enter, \
138 .event.funcs = &enter_syscall_print_funcs, \ 136 .event.funcs = &enter_syscall_print_funcs, \
139 .data = (void *)&__syscall_meta_##sname,\ 137 .data = (void *)&__syscall_meta_##sname,\
140 } 138 }; \
139 __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
141 140
142#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 141#define SYSCALL_TRACE_EXIT_EVENT(sname) \
143 static struct syscall_metadata \ 142 static struct syscall_metadata \
144 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 143 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
145 static struct ftrace_event_call \
146 __attribute__((__aligned__(4))) event_exit_##sname; \
147 static struct ftrace_event_call __used \ 144 static struct ftrace_event_call __used \
148 __attribute__((__aligned__(4))) \ 145 __attribute__((__aligned__(4))) \
149 __attribute__((section("_ftrace_events"))) \ 146 __attribute__((section("_ftrace_events"))) \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
152 .class = &event_class_syscall_exit, \ 149 .class = &event_class_syscall_exit, \
153 .event.funcs = &exit_syscall_print_funcs, \ 150 .event.funcs = &exit_syscall_print_funcs, \
154 .data = (void *)&__syscall_meta_##sname,\ 151 .data = (void *)&__syscall_meta_##sname,\
155 } 152 }; \
153 __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
156 154
157#define SYSCALL_METADATA(sname, nb) \ 155#define SYSCALL_METADATA(sname, nb) \
158 SYSCALL_TRACE_ENTER_EVENT(sname); \ 156 SYSCALL_TRACE_ENTER_EVENT(sname); \
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a4a90b6726ce..5a6074fcd81d 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -234,6 +234,8 @@ do_trace: \
234 PARAMS(void *__data, proto), \ 234 PARAMS(void *__data, proto), \
235 PARAMS(__data, args)) 235 PARAMS(__data, args))
236 236
237#define TRACE_EVENT_FLAGS(event, flag)
238
237#endif /* DECLARE_TRACE */ 239#endif /* DECLARE_TRACE */
238 240
239#ifndef TRACE_EVENT 241#ifndef TRACE_EVENT
@@ -354,4 +356,6 @@ do_trace: \
354 assign, print, reg, unreg) \ 356 assign, print, reg, unreg) \
355 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 357 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
356 358
359#define TRACE_EVENT_FLAGS(event, flag)
360
357#endif /* ifdef TRACE_EVENT (see note above) */ 361#endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index fb726ac7caee..5a4c04a75b3d 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
40 syscall_regfunc, syscall_unregfunc 40 syscall_regfunc, syscall_unregfunc
41); 41);
42 42
43TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
44
43TRACE_EVENT_FN(sys_exit, 45TRACE_EVENT_FN(sys_exit,
44 46
45 TP_PROTO(struct pt_regs *regs, long ret), 47 TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
62 syscall_regfunc, syscall_unregfunc 64 syscall_regfunc, syscall_unregfunc
63); 65);
64 66
67TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
68
65#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ 69#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
66 70
67#endif /* _TRACE_EVENTS_SYSCALLS_H */ 71#endif /* _TRACE_EVENTS_SYSCALLS_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a9377c0083ad..e718a917d897 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -82,6 +82,10 @@
82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ 82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ 83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
84 84
85#undef TRACE_EVENT_FLAGS
86#define TRACE_EVENT_FLAGS(name, value) \
87 __TRACE_EVENT_FLAGS(name, value)
88
85#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 89#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
86 90
87 91
@@ -129,6 +133,9 @@
129#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 133#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
130 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 134 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
131 135
136#undef TRACE_EVENT_FLAGS
137#define TRACE_EVENT_FLAGS(event, flag)
138
132#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 139#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
133 140
134/* 141/*
diff --git a/init/main.c b/init/main.c
index 8646401f7a0e..ea51770c0170 100644
--- a/init/main.c
+++ b/init/main.c
@@ -67,6 +67,7 @@
67#include <linux/sfi.h> 67#include <linux/sfi.h>
68#include <linux/shmem_fs.h> 68#include <linux/shmem_fs.h>
69#include <linux/slab.h> 69#include <linux/slab.h>
70#include <linux/perf_event.h>
70 71
71#include <asm/io.h> 72#include <asm/io.h>
72#include <asm/bugs.h> 73#include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
603 "enabled *very* early, fixing it\n"); 604 "enabled *very* early, fixing it\n");
604 local_irq_disable(); 605 local_irq_disable();
605 } 606 }
607 idr_init_cache();
608 perf_event_init();
606 rcu_init(); 609 rcu_init();
607 radix_tree_init(); 610 radix_tree_init();
608 /* init some links before init_ISA_irqs() */ 611 /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
658 enable_debug_pagealloc(); 661 enable_debug_pagealloc();
659 kmemleak_init(); 662 kmemleak_init();
660 debug_objects_mem_init(); 663 debug_objects_mem_init();
661 idr_init_cache();
662 setup_per_cpu_pageset(); 664 setup_per_cpu_pageset();
663 numa_policy_init(); 665 numa_policy_init();
664 if (late_time_init) 666 if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
882 smp_prepare_cpus(setup_max_cpus); 884 smp_prepare_cpus(setup_max_cpus);
883 885
884 do_pre_smp_initcalls(); 886 do_pre_smp_initcalls();
887 lockup_detector_init();
885 888
886 smp_init(); 889 smp_init();
887 sched_init_smp(); 890 sched_init_smp();
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e5325825aeb6..086adf25a55e 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
641 641
642 constraints_initialized = 1; 642 constraints_initialized = 1;
643 643
644 perf_pmu_register(&perf_breakpoint); 644 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
645 645
646 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
647 647
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76e106f..7663e5df0e6f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
354 return p->pre_handler == aggr_pre_handler; 354 return p->pre_handler == aggr_pre_handler;
355} 355}
356 356
357/* Return true(!0) if the kprobe is unused */
358static inline int kprobe_unused(struct kprobe *p)
359{
360 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
361 list_empty(&p->list);
362}
363
357/* 364/*
358 * Keep all fields in the kprobe consistent 365 * Keep all fields in the kprobe consistent
359 */ 366 */
360static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 367static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
361{ 368{
362 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 369 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
363 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 370 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
364} 371}
365 372
366#ifdef CONFIG_OPTPROBES 373#ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
384 } 391 }
385} 392}
386 393
394/* Free optimized instructions and optimized_kprobe */
395static __kprobes void free_aggr_kprobe(struct kprobe *p)
396{
397 struct optimized_kprobe *op;
398
399 op = container_of(p, struct optimized_kprobe, kp);
400 arch_remove_optimized_kprobe(op);
401 arch_remove_kprobe(p);
402 kfree(op);
403}
404
387/* Return true(!0) if the kprobe is ready for optimization. */ 405/* Return true(!0) if the kprobe is ready for optimization. */
388static inline int kprobe_optready(struct kprobe *p) 406static inline int kprobe_optready(struct kprobe *p)
389{ 407{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
397 return 0; 415 return 0;
398} 416}
399 417
418/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
419static inline int kprobe_disarmed(struct kprobe *p)
420{
421 struct optimized_kprobe *op;
422
423 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
424 if (!kprobe_aggrprobe(p))
425 return kprobe_disabled(p);
426
427 op = container_of(p, struct optimized_kprobe, kp);
428
429 return kprobe_disabled(p) && list_empty(&op->list);
430}
431
432/* Return true(!0) if the probe is queued on (un)optimizing lists */
433static int __kprobes kprobe_queued(struct kprobe *p)
434{
435 struct optimized_kprobe *op;
436
437 if (kprobe_aggrprobe(p)) {
438 op = container_of(p, struct optimized_kprobe, kp);
439 if (!list_empty(&op->list))
440 return 1;
441 }
442 return 0;
443}
444
400/* 445/*
401 * Return an optimized kprobe whose optimizing code replaces 446 * Return an optimized kprobe whose optimizing code replaces
402 * instructions including addr (exclude breakpoint). 447 * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
422 467
423/* Optimization staging list, protected by kprobe_mutex */ 468/* Optimization staging list, protected by kprobe_mutex */
424static LIST_HEAD(optimizing_list); 469static LIST_HEAD(optimizing_list);
470static LIST_HEAD(unoptimizing_list);
425 471
426static void kprobe_optimizer(struct work_struct *work); 472static void kprobe_optimizer(struct work_struct *work);
427static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 473static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
474static DECLARE_COMPLETION(optimizer_comp);
428#define OPTIMIZE_DELAY 5 475#define OPTIMIZE_DELAY 5
429 476
430/* Kprobe jump optimizer */ 477/*
431static __kprobes void kprobe_optimizer(struct work_struct *work) 478 * Optimize (replace a breakpoint with a jump) kprobes listed on
479 * optimizing_list.
480 */
481static __kprobes void do_optimize_kprobes(void)
432{ 482{
433 struct optimized_kprobe *op, *tmp; 483 /* Optimization never be done when disarmed */
434 484 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
435 /* Lock modules while optimizing kprobes */ 485 list_empty(&optimizing_list))
436 mutex_lock(&module_mutex); 486 return;
437 mutex_lock(&kprobe_mutex);
438 if (kprobes_all_disarmed || !kprobes_allow_optimization)
439 goto end;
440
441 /*
442 * Wait for quiesence period to ensure all running interrupts
443 * are done. Because optprobe may modify multiple instructions
444 * there is a chance that Nth instruction is interrupted. In that
445 * case, running interrupt can return to 2nd-Nth byte of jump
446 * instruction. This wait is for avoiding it.
447 */
448 synchronize_sched();
449 487
450 /* 488 /*
451 * The optimization/unoptimization refers online_cpus via 489 * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
459 */ 497 */
460 get_online_cpus(); 498 get_online_cpus();
461 mutex_lock(&text_mutex); 499 mutex_lock(&text_mutex);
462 list_for_each_entry_safe(op, tmp, &optimizing_list, list) { 500 arch_optimize_kprobes(&optimizing_list);
463 WARN_ON(kprobe_disabled(&op->kp)); 501 mutex_unlock(&text_mutex);
464 if (arch_optimize_kprobe(op) < 0) 502 put_online_cpus();
465 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 503}
466 list_del_init(&op->list); 504
505/*
506 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
507 * if need) kprobes listed on unoptimizing_list.
508 */
509static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
510{
511 struct optimized_kprobe *op, *tmp;
512
513 /* Unoptimization must be done anytime */
514 if (list_empty(&unoptimizing_list))
515 return;
516
517 /* Ditto to do_optimize_kprobes */
518 get_online_cpus();
519 mutex_lock(&text_mutex);
520 arch_unoptimize_kprobes(&unoptimizing_list, free_list);
521 /* Loop free_list for disarming */
522 list_for_each_entry_safe(op, tmp, free_list, list) {
523 /* Disarm probes if marked disabled */
524 if (kprobe_disabled(&op->kp))
525 arch_disarm_kprobe(&op->kp);
526 if (kprobe_unused(&op->kp)) {
527 /*
528 * Remove unused probes from hash list. After waiting
529 * for synchronization, these probes are reclaimed.
530 * (reclaiming is done by do_free_cleaned_kprobes.)
531 */
532 hlist_del_rcu(&op->kp.hlist);
533 } else
534 list_del_init(&op->list);
467 } 535 }
468 mutex_unlock(&text_mutex); 536 mutex_unlock(&text_mutex);
469 put_online_cpus(); 537 put_online_cpus();
470end: 538}
539
540/* Reclaim all kprobes on the free_list */
541static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
542{
543 struct optimized_kprobe *op, *tmp;
544
545 list_for_each_entry_safe(op, tmp, free_list, list) {
546 BUG_ON(!kprobe_unused(&op->kp));
547 list_del_init(&op->list);
548 free_aggr_kprobe(&op->kp);
549 }
550}
551
552/* Start optimizer after OPTIMIZE_DELAY passed */
553static __kprobes void kick_kprobe_optimizer(void)
554{
555 if (!delayed_work_pending(&optimizing_work))
556 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
557}
558
559/* Kprobe jump optimizer */
560static __kprobes void kprobe_optimizer(struct work_struct *work)
561{
562 LIST_HEAD(free_list);
563
564 /* Lock modules while optimizing kprobes */
565 mutex_lock(&module_mutex);
566 mutex_lock(&kprobe_mutex);
567
568 /*
569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
570 * kprobes before waiting for quiesence period.
571 */
572 do_unoptimize_kprobes(&free_list);
573
574 /*
575 * Step 2: Wait for quiesence period to ensure all running interrupts
576 * are done. Because optprobe may modify multiple instructions
577 * there is a chance that Nth instruction is interrupted. In that
578 * case, running interrupt can return to 2nd-Nth byte of jump
579 * instruction. This wait is for avoiding it.
580 */
581 synchronize_sched();
582
583 /* Step 3: Optimize kprobes after quiesence period */
584 do_optimize_kprobes();
585
586 /* Step 4: Free cleaned kprobes after quiesence period */
587 do_free_cleaned_kprobes(&free_list);
588
471 mutex_unlock(&kprobe_mutex); 589 mutex_unlock(&kprobe_mutex);
472 mutex_unlock(&module_mutex); 590 mutex_unlock(&module_mutex);
591
592 /* Step 5: Kick optimizer again if needed */
593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
594 kick_kprobe_optimizer();
595 else
596 /* Wake up all waiters */
597 complete_all(&optimizer_comp);
598}
599
600/* Wait for completing optimization and unoptimization */
601static __kprobes void wait_for_kprobe_optimizer(void)
602{
603 if (delayed_work_pending(&optimizing_work))
604 wait_for_completion(&optimizer_comp);
473} 605}
474 606
475/* Optimize kprobe if p is ready to be optimized */ 607/* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
495 /* Check if it is already optimized. */ 627 /* Check if it is already optimized. */
496 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) 628 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
497 return; 629 return;
498
499 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 630 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
500 list_add(&op->list, &optimizing_list); 631
501 if (!delayed_work_pending(&optimizing_work)) 632 if (!list_empty(&op->list))
502 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 633 /* This is under unoptimizing. Just dequeue the probe */
634 list_del_init(&op->list);
635 else {
636 list_add(&op->list, &optimizing_list);
637 kick_kprobe_optimizer();
638 }
639}
640
641/* Short cut to direct unoptimizing */
642static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
643{
644 get_online_cpus();
645 arch_unoptimize_kprobe(op);
646 put_online_cpus();
647 if (kprobe_disabled(&op->kp))
648 arch_disarm_kprobe(&op->kp);
503} 649}
504 650
505/* Unoptimize a kprobe if p is optimized */ 651/* Unoptimize a kprobe if p is optimized */
506static __kprobes void unoptimize_kprobe(struct kprobe *p) 652static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
507{ 653{
508 struct optimized_kprobe *op; 654 struct optimized_kprobe *op;
509 655
510 if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { 656 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
511 op = container_of(p, struct optimized_kprobe, kp); 657 return; /* This is not an optprobe nor optimized */
512 if (!list_empty(&op->list)) 658
513 /* Dequeue from the optimization queue */ 659 op = container_of(p, struct optimized_kprobe, kp);
660 if (!kprobe_optimized(p)) {
661 /* Unoptimized or unoptimizing case */
662 if (force && !list_empty(&op->list)) {
663 /*
664 * Only if this is unoptimizing kprobe and forced,
665 * forcibly unoptimize it. (No need to unoptimize
666 * unoptimized kprobe again :)
667 */
514 list_del_init(&op->list); 668 list_del_init(&op->list);
515 else 669 force_unoptimize_kprobe(op);
516 /* Replace jump with break */ 670 }
517 arch_unoptimize_kprobe(op); 671 return;
518 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 672 }
673
674 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
675 if (!list_empty(&op->list)) {
676 /* Dequeue from the optimization queue */
677 list_del_init(&op->list);
678 return;
679 }
680 /* Optimized kprobe case */
681 if (force)
682 /* Forcibly update the code: this is a special case */
683 force_unoptimize_kprobe(op);
684 else {
685 list_add(&op->list, &unoptimizing_list);
686 kick_kprobe_optimizer();
519 } 687 }
520} 688}
521 689
690/* Cancel unoptimizing for reusing */
691static void reuse_unused_kprobe(struct kprobe *ap)
692{
693 struct optimized_kprobe *op;
694
695 BUG_ON(!kprobe_unused(ap));
696 /*
697 * Unused kprobe MUST be on the way of delayed unoptimizing (means
698 * there is still a relative jump) and disabled.
699 */
700 op = container_of(ap, struct optimized_kprobe, kp);
701 if (unlikely(list_empty(&op->list)))
702 printk(KERN_WARNING "Warning: found a stray unused "
703 "aggrprobe@%p\n", ap->addr);
704 /* Enable the probe again */
705 ap->flags &= ~KPROBE_FLAG_DISABLED;
706 /* Optimize it again (remove from op->list) */
707 BUG_ON(!kprobe_optready(ap));
708 optimize_kprobe(ap);
709}
710
522/* Remove optimized instructions */ 711/* Remove optimized instructions */
523static void __kprobes kill_optimized_kprobe(struct kprobe *p) 712static void __kprobes kill_optimized_kprobe(struct kprobe *p)
524{ 713{
525 struct optimized_kprobe *op; 714 struct optimized_kprobe *op;
526 715
527 op = container_of(p, struct optimized_kprobe, kp); 716 op = container_of(p, struct optimized_kprobe, kp);
528 if (!list_empty(&op->list)) { 717 if (!list_empty(&op->list))
529 /* Dequeue from the optimization queue */ 718 /* Dequeue from the (un)optimization queue */
530 list_del_init(&op->list); 719 list_del_init(&op->list);
531 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 720
532 } 721 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
533 /* Don't unoptimize, because the target code will be freed. */ 722 /* Don't touch the code, because it is already freed. */
534 arch_remove_optimized_kprobe(op); 723 arch_remove_optimized_kprobe(op);
535} 724}
536 725
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
543 arch_prepare_optimized_kprobe(op); 732 arch_prepare_optimized_kprobe(op);
544} 733}
545 734
546/* Free optimized instructions and optimized_kprobe */
547static __kprobes void free_aggr_kprobe(struct kprobe *p)
548{
549 struct optimized_kprobe *op;
550
551 op = container_of(p, struct optimized_kprobe, kp);
552 arch_remove_optimized_kprobe(op);
553 kfree(op);
554}
555
556/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 735/* Allocate new optimized_kprobe and try to prepare optimized instructions */
557static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 736static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
558{ 737{
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
587 op = container_of(ap, struct optimized_kprobe, kp); 766 op = container_of(ap, struct optimized_kprobe, kp);
588 if (!arch_prepared_optinsn(&op->optinsn)) { 767 if (!arch_prepared_optinsn(&op->optinsn)) {
589 /* If failed to setup optimizing, fallback to kprobe */ 768 /* If failed to setup optimizing, fallback to kprobe */
590 free_aggr_kprobe(ap); 769 arch_remove_optimized_kprobe(op);
770 kfree(op);
591 return; 771 return;
592 } 772 }
593 773
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
631 return; 811 return;
632 812
633 kprobes_allow_optimization = false; 813 kprobes_allow_optimization = false;
634 printk(KERN_INFO "Kprobes globally unoptimized\n");
635 get_online_cpus(); /* For avoiding text_mutex deadlock */
636 mutex_lock(&text_mutex);
637 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 814 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
638 head = &kprobe_table[i]; 815 head = &kprobe_table[i];
639 hlist_for_each_entry_rcu(p, node, head, hlist) { 816 hlist_for_each_entry_rcu(p, node, head, hlist) {
640 if (!kprobe_disabled(p)) 817 if (!kprobe_disabled(p))
641 unoptimize_kprobe(p); 818 unoptimize_kprobe(p, false);
642 } 819 }
643 } 820 }
644 821 /* Wait for unoptimizing completion */
645 mutex_unlock(&text_mutex); 822 wait_for_kprobe_optimizer();
646 put_online_cpus(); 823 printk(KERN_INFO "Kprobes globally unoptimized\n");
647 /* Allow all currently running kprobes to complete */
648 synchronize_sched();
649} 824}
650 825
651int sysctl_kprobes_optimization; 826int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
669} 844}
670#endif /* CONFIG_SYSCTL */ 845#endif /* CONFIG_SYSCTL */
671 846
847/* Put a breakpoint for a probe. Must be called with text_mutex locked */
672static void __kprobes __arm_kprobe(struct kprobe *p) 848static void __kprobes __arm_kprobe(struct kprobe *p)
673{ 849{
674 struct kprobe *old_p; 850 struct kprobe *_p;
675 851
676 /* Check collision with other optimized kprobes */ 852 /* Check collision with other optimized kprobes */
677 old_p = get_optimized_kprobe((unsigned long)p->addr); 853 _p = get_optimized_kprobe((unsigned long)p->addr);
678 if (unlikely(old_p)) 854 if (unlikely(_p))
679 unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ 855 /* Fallback to unoptimized kprobe */
856 unoptimize_kprobe(_p, true);
680 857
681 arch_arm_kprobe(p); 858 arch_arm_kprobe(p);
682 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 859 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
683} 860}
684 861
685static void __kprobes __disarm_kprobe(struct kprobe *p) 862/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
863static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
686{ 864{
687 struct kprobe *old_p; 865 struct kprobe *_p;
688 866
689 unoptimize_kprobe(p); /* Try to unoptimize */ 867 unoptimize_kprobe(p, false); /* Try to unoptimize */
690 arch_disarm_kprobe(p);
691 868
692 /* If another kprobe was blocked, optimize it. */ 869 if (!kprobe_queued(p)) {
693 old_p = get_optimized_kprobe((unsigned long)p->addr); 870 arch_disarm_kprobe(p);
694 if (unlikely(old_p)) 871 /* If another kprobe was blocked, optimize it. */
695 optimize_kprobe(old_p); 872 _p = get_optimized_kprobe((unsigned long)p->addr);
873 if (unlikely(_p) && reopt)
874 optimize_kprobe(_p);
875 }
876 /* TODO: reoptimize others after unoptimized this probe */
696} 877}
697 878
698#else /* !CONFIG_OPTPROBES */ 879#else /* !CONFIG_OPTPROBES */
699 880
700#define optimize_kprobe(p) do {} while (0) 881#define optimize_kprobe(p) do {} while (0)
701#define unoptimize_kprobe(p) do {} while (0) 882#define unoptimize_kprobe(p, f) do {} while (0)
702#define kill_optimized_kprobe(p) do {} while (0) 883#define kill_optimized_kprobe(p) do {} while (0)
703#define prepare_optimized_kprobe(p) do {} while (0) 884#define prepare_optimized_kprobe(p) do {} while (0)
704#define try_to_optimize_kprobe(p) do {} while (0) 885#define try_to_optimize_kprobe(p) do {} while (0)
705#define __arm_kprobe(p) arch_arm_kprobe(p) 886#define __arm_kprobe(p) arch_arm_kprobe(p)
706#define __disarm_kprobe(p) arch_disarm_kprobe(p) 887#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
888#define kprobe_disarmed(p) kprobe_disabled(p)
889#define wait_for_kprobe_optimizer() do {} while (0)
890
891/* There should be no unused kprobes can be reused without optimization */
892static void reuse_unused_kprobe(struct kprobe *ap)
893{
894 printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
895 BUG_ON(kprobe_unused(ap));
896}
707 897
708static __kprobes void free_aggr_kprobe(struct kprobe *p) 898static __kprobes void free_aggr_kprobe(struct kprobe *p)
709{ 899{
900 arch_remove_kprobe(p);
710 kfree(p); 901 kfree(p);
711} 902}
712 903
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
732/* Disarm a kprobe with text_mutex */ 923/* Disarm a kprobe with text_mutex */
733static void __kprobes disarm_kprobe(struct kprobe *kp) 924static void __kprobes disarm_kprobe(struct kprobe *kp)
734{ 925{
735 get_online_cpus(); /* For avoiding text_mutex deadlock */ 926 /* Ditto */
736 mutex_lock(&text_mutex); 927 mutex_lock(&text_mutex);
737 __disarm_kprobe(kp); 928 __disarm_kprobe(kp, true);
738 mutex_unlock(&text_mutex); 929 mutex_unlock(&text_mutex);
739 put_online_cpus();
740} 930}
741 931
742/* 932/*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
942 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1132 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
943 1133
944 if (p->break_handler || p->post_handler) 1134 if (p->break_handler || p->post_handler)
945 unoptimize_kprobe(ap); /* Fall back to normal kprobe */ 1135 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
946 1136
947 if (p->break_handler) { 1137 if (p->break_handler) {
948 if (ap->break_handler) 1138 if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
993 * This is the second or subsequent kprobe at the address - handle 1183 * This is the second or subsequent kprobe at the address - handle
994 * the intricacies 1184 * the intricacies
995 */ 1185 */
996static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 1186static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
997 struct kprobe *p) 1187 struct kprobe *p)
998{ 1188{
999 int ret = 0; 1189 int ret = 0;
1000 struct kprobe *ap = old_p; 1190 struct kprobe *ap = orig_p;
1001 1191
1002 if (!kprobe_aggrprobe(old_p)) { 1192 if (!kprobe_aggrprobe(orig_p)) {
1003 /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ 1193 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1004 ap = alloc_aggr_kprobe(old_p); 1194 ap = alloc_aggr_kprobe(orig_p);
1005 if (!ap) 1195 if (!ap)
1006 return -ENOMEM; 1196 return -ENOMEM;
1007 init_aggr_kprobe(ap, old_p); 1197 init_aggr_kprobe(ap, orig_p);
1008 } 1198 } else if (kprobe_unused(ap))
1199 /* This probe is going to die. Rescue it */
1200 reuse_unused_kprobe(ap);
1009 1201
1010 if (kprobe_gone(ap)) { 1202 if (kprobe_gone(ap)) {
1011 /* 1203 /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
1039 return add_new_kprobe(ap, p); 1231 return add_new_kprobe(ap, p);
1040} 1232}
1041 1233
1042/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1043static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1044{
1045 struct kprobe *kp;
1046
1047 list_for_each_entry_rcu(kp, &p->list, list) {
1048 if (!kprobe_disabled(kp))
1049 /*
1050 * There is an active probe on the list.
1051 * We can't disable aggr_kprobe.
1052 */
1053 return 0;
1054 }
1055 p->flags |= KPROBE_FLAG_DISABLED;
1056 return 1;
1057}
1058
1059static int __kprobes in_kprobes_functions(unsigned long addr) 1234static int __kprobes in_kprobes_functions(unsigned long addr)
1060{ 1235{
1061 struct kprobe_blackpoint *kb; 1236 struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1098/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1273/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1099static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1274static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1100{ 1275{
1101 struct kprobe *old_p, *list_p; 1276 struct kprobe *ap, *list_p;
1102 1277
1103 old_p = get_kprobe(p->addr); 1278 ap = get_kprobe(p->addr);
1104 if (unlikely(!old_p)) 1279 if (unlikely(!ap))
1105 return NULL; 1280 return NULL;
1106 1281
1107 if (p != old_p) { 1282 if (p != ap) {
1108 list_for_each_entry_rcu(list_p, &old_p->list, list) 1283 list_for_each_entry_rcu(list_p, &ap->list, list)
1109 if (list_p == p) 1284 if (list_p == p)
1110 /* kprobe p is a valid probe */ 1285 /* kprobe p is a valid probe */
1111 goto valid; 1286 goto valid;
1112 return NULL; 1287 return NULL;
1113 } 1288 }
1114valid: 1289valid:
1115 return old_p; 1290 return ap;
1116} 1291}
1117 1292
1118/* Return error if the kprobe is being re-registered */ 1293/* Return error if the kprobe is being re-registered */
1119static inline int check_kprobe_rereg(struct kprobe *p) 1294static inline int check_kprobe_rereg(struct kprobe *p)
1120{ 1295{
1121 int ret = 0; 1296 int ret = 0;
1122 struct kprobe *old_p;
1123 1297
1124 mutex_lock(&kprobe_mutex); 1298 mutex_lock(&kprobe_mutex);
1125 old_p = __get_valid_kprobe(p); 1299 if (__get_valid_kprobe(p))
1126 if (old_p)
1127 ret = -EINVAL; 1300 ret = -EINVAL;
1128 mutex_unlock(&kprobe_mutex); 1301 mutex_unlock(&kprobe_mutex);
1302
1129 return ret; 1303 return ret;
1130} 1304}
1131 1305
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
1229} 1403}
1230EXPORT_SYMBOL_GPL(register_kprobe); 1404EXPORT_SYMBOL_GPL(register_kprobe);
1231 1405
1406/* Check if all probes on the aggrprobe are disabled */
1407static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
1408{
1409 struct kprobe *kp;
1410
1411 list_for_each_entry_rcu(kp, &ap->list, list)
1412 if (!kprobe_disabled(kp))
1413 /*
1414 * There is an active probe on the list.
1415 * We can't disable this ap.
1416 */
1417 return 0;
1418
1419 return 1;
1420}
1421
1422/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1423static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1424{
1425 struct kprobe *orig_p;
1426
1427 /* Get an original kprobe for return */
1428 orig_p = __get_valid_kprobe(p);
1429 if (unlikely(orig_p == NULL))
1430 return NULL;
1431
1432 if (!kprobe_disabled(p)) {
1433 /* Disable probe if it is a child probe */
1434 if (p != orig_p)
1435 p->flags |= KPROBE_FLAG_DISABLED;
1436
1437 /* Try to disarm and disable this/parent probe */
1438 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1439 disarm_kprobe(orig_p);
1440 orig_p->flags |= KPROBE_FLAG_DISABLED;
1441 }
1442 }
1443
1444 return orig_p;
1445}
1446
1232/* 1447/*
1233 * Unregister a kprobe without a scheduler synchronization. 1448 * Unregister a kprobe without a scheduler synchronization.
1234 */ 1449 */
1235static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1450static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1236{ 1451{
1237 struct kprobe *old_p, *list_p; 1452 struct kprobe *ap, *list_p;
1238 1453
1239 old_p = __get_valid_kprobe(p); 1454 /* Disable kprobe. This will disarm it if needed. */
1240 if (old_p == NULL) 1455 ap = __disable_kprobe(p);
1456 if (ap == NULL)
1241 return -EINVAL; 1457 return -EINVAL;
1242 1458
1243 if (old_p == p || 1459 if (ap == p)
1244 (kprobe_aggrprobe(old_p) &&
1245 list_is_singular(&old_p->list))) {
1246 /* 1460 /*
1247 * Only probe on the hash list. Disarm only if kprobes are 1461 * This probe is an independent(and non-optimized) kprobe
1248 * enabled and not gone - otherwise, the breakpoint would 1462 * (not an aggrprobe). Remove from the hash list.
1249 * already have been removed. We save on flushing icache.
1250 */ 1463 */
1251 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) 1464 goto disarmed;
1252 disarm_kprobe(old_p); 1465
1253 hlist_del_rcu(&old_p->hlist); 1466 /* Following process expects this probe is an aggrprobe */
1254 } else { 1467 WARN_ON(!kprobe_aggrprobe(ap));
1468
1469 if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1470 /*
1471 * !disarmed could be happen if the probe is under delayed
1472 * unoptimizing.
1473 */
1474 goto disarmed;
1475 else {
1476 /* If disabling probe has special handlers, update aggrprobe */
1255 if (p->break_handler && !kprobe_gone(p)) 1477 if (p->break_handler && !kprobe_gone(p))
1256 old_p->break_handler = NULL; 1478 ap->break_handler = NULL;
1257 if (p->post_handler && !kprobe_gone(p)) { 1479 if (p->post_handler && !kprobe_gone(p)) {
1258 list_for_each_entry_rcu(list_p, &old_p->list, list) { 1480 list_for_each_entry_rcu(list_p, &ap->list, list) {
1259 if ((list_p != p) && (list_p->post_handler)) 1481 if ((list_p != p) && (list_p->post_handler))
1260 goto noclean; 1482 goto noclean;
1261 } 1483 }
1262 old_p->post_handler = NULL; 1484 ap->post_handler = NULL;
1263 } 1485 }
1264noclean: 1486noclean:
1487 /*
1488 * Remove from the aggrprobe: this path will do nothing in
1489 * __unregister_kprobe_bottom().
1490 */
1265 list_del_rcu(&p->list); 1491 list_del_rcu(&p->list);
1266 if (!kprobe_disabled(old_p)) { 1492 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1267 try_to_disable_aggr_kprobe(old_p); 1493 /*
1268 if (!kprobes_all_disarmed) { 1494 * Try to optimize this probe again, because post
1269 if (kprobe_disabled(old_p)) 1495 * handler may have been changed.
1270 disarm_kprobe(old_p); 1496 */
1271 else 1497 optimize_kprobe(ap);
1272 /* Try to optimize this probe again */
1273 optimize_kprobe(old_p);
1274 }
1275 }
1276 } 1498 }
1277 return 0; 1499 return 0;
1500
1501disarmed:
1502 BUG_ON(!kprobe_disarmed(ap));
1503 hlist_del_rcu(&ap->hlist);
1504 return 0;
1278} 1505}
1279 1506
1280static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1507static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1281{ 1508{
1282 struct kprobe *old_p; 1509 struct kprobe *ap;
1283 1510
1284 if (list_empty(&p->list)) 1511 if (list_empty(&p->list))
1512 /* This is an independent kprobe */
1285 arch_remove_kprobe(p); 1513 arch_remove_kprobe(p);
1286 else if (list_is_singular(&p->list)) { 1514 else if (list_is_singular(&p->list)) {
1287 /* "p" is the last child of an aggr_kprobe */ 1515 /* This is the last child of an aggrprobe */
1288 old_p = list_entry(p->list.next, struct kprobe, list); 1516 ap = list_entry(p->list.next, struct kprobe, list);
1289 list_del(&p->list); 1517 list_del(&p->list);
1290 arch_remove_kprobe(old_p); 1518 free_aggr_kprobe(ap);
1291 free_aggr_kprobe(old_p);
1292 } 1519 }
1520 /* Otherwise, do nothing. */
1293} 1521}
1294 1522
1295int __kprobes register_kprobes(struct kprobe **kps, int num) 1523int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1607int __kprobes disable_kprobe(struct kprobe *kp) 1835int __kprobes disable_kprobe(struct kprobe *kp)
1608{ 1836{
1609 int ret = 0; 1837 int ret = 0;
1610 struct kprobe *p;
1611 1838
1612 mutex_lock(&kprobe_mutex); 1839 mutex_lock(&kprobe_mutex);
1613 1840
1614 /* Check whether specified probe is valid. */ 1841 /* Disable this kprobe */
1615 p = __get_valid_kprobe(kp); 1842 if (__disable_kprobe(kp) == NULL)
1616 if (unlikely(p == NULL)) {
1617 ret = -EINVAL; 1843 ret = -EINVAL;
1618 goto out;
1619 }
1620 1844
1621 /* If the probe is already disabled (or gone), just return */
1622 if (kprobe_disabled(kp))
1623 goto out;
1624
1625 kp->flags |= KPROBE_FLAG_DISABLED;
1626 if (p != kp)
1627 /* When kp != p, p is always enabled. */
1628 try_to_disable_aggr_kprobe(p);
1629
1630 if (!kprobes_all_disarmed && kprobe_disabled(p))
1631 disarm_kprobe(p);
1632out:
1633 mutex_unlock(&kprobe_mutex); 1845 mutex_unlock(&kprobe_mutex);
1634 return ret; 1846 return ret;
1635} 1847}
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
1927 mutex_lock(&kprobe_mutex); 2139 mutex_lock(&kprobe_mutex);
1928 2140
1929 /* If kprobes are already disarmed, just return */ 2141 /* If kprobes are already disarmed, just return */
1930 if (kprobes_all_disarmed) 2142 if (kprobes_all_disarmed) {
1931 goto already_disabled; 2143 mutex_unlock(&kprobe_mutex);
2144 return;
2145 }
1932 2146
1933 kprobes_all_disarmed = true; 2147 kprobes_all_disarmed = true;
1934 printk(KERN_INFO "Kprobes globally disabled\n"); 2148 printk(KERN_INFO "Kprobes globally disabled\n");
1935 2149
1936 /*
1937 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1938 * because disarming may also unoptimize kprobes.
1939 */
1940 get_online_cpus();
1941 mutex_lock(&text_mutex); 2150 mutex_lock(&text_mutex);
1942 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2151 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1943 head = &kprobe_table[i]; 2152 head = &kprobe_table[i];
1944 hlist_for_each_entry_rcu(p, node, head, hlist) { 2153 hlist_for_each_entry_rcu(p, node, head, hlist) {
1945 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2154 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1946 __disarm_kprobe(p); 2155 __disarm_kprobe(p, false);
1947 } 2156 }
1948 } 2157 }
1949
1950 mutex_unlock(&text_mutex); 2158 mutex_unlock(&text_mutex);
1951 put_online_cpus();
1952 mutex_unlock(&kprobe_mutex); 2159 mutex_unlock(&kprobe_mutex);
1953 /* Allow all currently running kprobes to complete */
1954 synchronize_sched();
1955 return;
1956 2160
1957already_disabled: 2161 /* Wait for disarming all kprobes by optimizer */
1958 mutex_unlock(&kprobe_mutex); 2162 wait_for_kprobe_optimizer();
1959 return;
1960} 2163}
1961 2164
1962/* 2165/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2870feee81dd..11847bf1e8cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/idr.h>
16#include <linux/file.h> 17#include <linux/file.h>
17#include <linux/poll.h> 18#include <linux/poll.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
@@ -21,7 +22,9 @@
21#include <linux/dcache.h> 22#include <linux/dcache.h>
22#include <linux/percpu.h> 23#include <linux/percpu.h>
23#include <linux/ptrace.h> 24#include <linux/ptrace.h>
25#include <linux/reboot.h>
24#include <linux/vmstat.h> 26#include <linux/vmstat.h>
27#include <linux/device.h>
25#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
26#include <linux/hardirq.h> 29#include <linux/hardirq.h>
27#include <linux/rculist.h> 30#include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
133 } 136 }
134} 137}
135 138
139static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
140{
141 /*
142 * only top level events have the pid namespace they were created in
143 */
144 if (event->parent)
145 event = event->parent;
146
147 return task_tgid_nr_ns(p, event->ns);
148}
149
150static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
151{
152 /*
153 * only top level events have the pid namespace they were created in
154 */
155 if (event->parent)
156 event = event->parent;
157
158 return task_pid_nr_ns(p, event->ns);
159}
160
136/* 161/*
137 * If we inherit events we want to return the parent event id 162 * If we inherit events we want to return the parent event id
138 * to userspace. 163 * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 337 ctx->nr_stat++;
313} 338}
314 339
340/*
341 * Called at perf_event creation and when events are attached/detached from a
342 * group.
343 */
344static void perf_event__read_size(struct perf_event *event)
345{
346 int entry = sizeof(u64); /* value */
347 int size = 0;
348 int nr = 1;
349
350 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
351 size += sizeof(u64);
352
353 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
354 size += sizeof(u64);
355
356 if (event->attr.read_format & PERF_FORMAT_ID)
357 entry += sizeof(u64);
358
359 if (event->attr.read_format & PERF_FORMAT_GROUP) {
360 nr += event->group_leader->nr_siblings;
361 size += sizeof(u64);
362 }
363
364 size += entry * nr;
365 event->read_size = size;
366}
367
368static void perf_event__header_size(struct perf_event *event)
369{
370 struct perf_sample_data *data;
371 u64 sample_type = event->attr.sample_type;
372 u16 size = 0;
373
374 perf_event__read_size(event);
375
376 if (sample_type & PERF_SAMPLE_IP)
377 size += sizeof(data->ip);
378
379 if (sample_type & PERF_SAMPLE_ADDR)
380 size += sizeof(data->addr);
381
382 if (sample_type & PERF_SAMPLE_PERIOD)
383 size += sizeof(data->period);
384
385 if (sample_type & PERF_SAMPLE_READ)
386 size += event->read_size;
387
388 event->header_size = size;
389}
390
391static void perf_event__id_header_size(struct perf_event *event)
392{
393 struct perf_sample_data *data;
394 u64 sample_type = event->attr.sample_type;
395 u16 size = 0;
396
397 if (sample_type & PERF_SAMPLE_TID)
398 size += sizeof(data->tid_entry);
399
400 if (sample_type & PERF_SAMPLE_TIME)
401 size += sizeof(data->time);
402
403 if (sample_type & PERF_SAMPLE_ID)
404 size += sizeof(data->id);
405
406 if (sample_type & PERF_SAMPLE_STREAM_ID)
407 size += sizeof(data->stream_id);
408
409 if (sample_type & PERF_SAMPLE_CPU)
410 size += sizeof(data->cpu_entry);
411
412 event->id_header_size = size;
413}
414
315static void perf_group_attach(struct perf_event *event) 415static void perf_group_attach(struct perf_event *event)
316{ 416{
317 struct perf_event *group_leader = event->group_leader; 417 struct perf_event *group_leader = event->group_leader, *pos;
318 418
319 /* 419 /*
320 * We can have double attach due to group movement in perf_event_open. 420 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
333 433
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 434 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 435 group_leader->nr_siblings++;
436
437 perf_event__header_size(group_leader);
438
439 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
440 perf_event__header_size(pos);
336} 441}
337 442
338/* 443/*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 496 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 497 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 498 event->group_leader->nr_siblings--;
394 return; 499 goto out;
395 } 500 }
396 501
397 if (!list_empty(&event->group_entry)) 502 if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 515 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 516 sibling->group_flags = event->group_flags;
412 } 517 }
518
519out:
520 perf_event__header_size(event->group_leader);
521
522 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
523 perf_event__header_size(tmp);
413} 524}
414 525
415static inline int 526static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1073 /* 1184 /*
1074 * not supported on inherited events 1185 * not supported on inherited events
1075 */ 1186 */
1076 if (event->attr.inherit) 1187 if (event->attr.inherit || !is_sampling_event(event))
1077 return -EINVAL; 1188 return -EINVAL;
1078 1189
1079 atomic_add(refresh, &event->event_limit); 1190 atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2400 return perf_event_release_kernel(event);
2290} 2401}
2291 2402
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2403u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2404{
2319 struct perf_event *child; 2405 struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2514 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2515 return 0;
2430 2516
2431 if (count < perf_event_read_size(event)) 2517 if (count < event->read_size)
2432 return -ENOSPC; 2518 return -ENOSPC;
2433 2519
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2520 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2514 int ret = 0; 2600 int ret = 0;
2515 u64 value; 2601 u64 value;
2516 2602
2517 if (!event->attr.sample_period) 2603 if (!is_sampling_event(event))
2518 return -EINVAL; 2604 return -EINVAL;
2519 2605
2520 if (copy_from_user(&value, arg, sizeof(value))) 2606 if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3305 } while (len); 3391 } while (len);
3306} 3392}
3307 3393
3394static void __perf_event_header__init_id(struct perf_event_header *header,
3395 struct perf_sample_data *data,
3396 struct perf_event *event)
3397{
3398 u64 sample_type = event->attr.sample_type;
3399
3400 data->type = sample_type;
3401 header->size += event->id_header_size;
3402
3403 if (sample_type & PERF_SAMPLE_TID) {
3404 /* namespace issues */
3405 data->tid_entry.pid = perf_event_pid(event, current);
3406 data->tid_entry.tid = perf_event_tid(event, current);
3407 }
3408
3409 if (sample_type & PERF_SAMPLE_TIME)
3410 data->time = perf_clock();
3411
3412 if (sample_type & PERF_SAMPLE_ID)
3413 data->id = primary_event_id(event);
3414
3415 if (sample_type & PERF_SAMPLE_STREAM_ID)
3416 data->stream_id = event->id;
3417
3418 if (sample_type & PERF_SAMPLE_CPU) {
3419 data->cpu_entry.cpu = raw_smp_processor_id();
3420 data->cpu_entry.reserved = 0;
3421 }
3422}
3423
3424static void perf_event_header__init_id(struct perf_event_header *header,
3425 struct perf_sample_data *data,
3426 struct perf_event *event)
3427{
3428 if (event->attr.sample_id_all)
3429 __perf_event_header__init_id(header, data, event);
3430}
3431
3432static void __perf_event__output_id_sample(struct perf_output_handle *handle,
3433 struct perf_sample_data *data)
3434{
3435 u64 sample_type = data->type;
3436
3437 if (sample_type & PERF_SAMPLE_TID)
3438 perf_output_put(handle, data->tid_entry);
3439
3440 if (sample_type & PERF_SAMPLE_TIME)
3441 perf_output_put(handle, data->time);
3442
3443 if (sample_type & PERF_SAMPLE_ID)
3444 perf_output_put(handle, data->id);
3445
3446 if (sample_type & PERF_SAMPLE_STREAM_ID)
3447 perf_output_put(handle, data->stream_id);
3448
3449 if (sample_type & PERF_SAMPLE_CPU)
3450 perf_output_put(handle, data->cpu_entry);
3451}
3452
3453static void perf_event__output_id_sample(struct perf_event *event,
3454 struct perf_output_handle *handle,
3455 struct perf_sample_data *sample)
3456{
3457 if (event->attr.sample_id_all)
3458 __perf_event__output_id_sample(handle, sample);
3459}
3460
3308int perf_output_begin(struct perf_output_handle *handle, 3461int perf_output_begin(struct perf_output_handle *handle,
3309 struct perf_event *event, unsigned int size, 3462 struct perf_event *event, unsigned int size,
3310 int nmi, int sample) 3463 int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3312 struct perf_buffer *buffer; 3465 struct perf_buffer *buffer;
3313 unsigned long tail, offset, head; 3466 unsigned long tail, offset, head;
3314 int have_lost; 3467 int have_lost;
3468 struct perf_sample_data sample_data;
3315 struct { 3469 struct {
3316 struct perf_event_header header; 3470 struct perf_event_header header;
3317 u64 id; 3471 u64 id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
3338 goto out; 3492 goto out;
3339 3493
3340 have_lost = local_read(&buffer->lost); 3494 have_lost = local_read(&buffer->lost);
3341 if (have_lost) 3495 if (have_lost) {
3342 size += sizeof(lost_event); 3496 lost_event.header.size = sizeof(lost_event);
3497 perf_event_header__init_id(&lost_event.header, &sample_data,
3498 event);
3499 size += lost_event.header.size;
3500 }
3343 3501
3344 perf_output_get_handle(handle); 3502 perf_output_get_handle(handle);
3345 3503
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
3370 if (have_lost) { 3528 if (have_lost) {
3371 lost_event.header.type = PERF_RECORD_LOST; 3529 lost_event.header.type = PERF_RECORD_LOST;
3372 lost_event.header.misc = 0; 3530 lost_event.header.misc = 0;
3373 lost_event.header.size = sizeof(lost_event);
3374 lost_event.id = event->id; 3531 lost_event.id = event->id;
3375 lost_event.lost = local_xchg(&buffer->lost, 0); 3532 lost_event.lost = local_xchg(&buffer->lost, 0);
3376 3533
3377 perf_output_put(handle, lost_event); 3534 perf_output_put(handle, lost_event);
3535 perf_event__output_id_sample(event, handle, &sample_data);
3378 } 3536 }
3379 3537
3380 return 0; 3538 return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
3407 rcu_read_unlock(); 3565 rcu_read_unlock();
3408} 3566}
3409 3567
3410static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
3411{
3412 /*
3413 * only top level events have the pid namespace they were created in
3414 */
3415 if (event->parent)
3416 event = event->parent;
3417
3418 return task_tgid_nr_ns(p, event->ns);
3419}
3420
3421static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3422{
3423 /*
3424 * only top level events have the pid namespace they were created in
3425 */
3426 if (event->parent)
3427 event = event->parent;
3428
3429 return task_pid_nr_ns(p, event->ns);
3430}
3431
3432static void perf_output_read_one(struct perf_output_handle *handle, 3568static void perf_output_read_one(struct perf_output_handle *handle,
3433 struct perf_event *event, 3569 struct perf_event *event,
3434 u64 enabled, u64 running) 3570 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
3603{ 3739{
3604 u64 sample_type = event->attr.sample_type; 3740 u64 sample_type = event->attr.sample_type;
3605 3741
3606 data->type = sample_type;
3607
3608 header->type = PERF_RECORD_SAMPLE; 3742 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3743 header->size = sizeof(*header) + event->header_size;
3610 3744
3611 header->misc = 0; 3745 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3746 header->misc |= perf_misc_flags(regs);
3613 3747
3614 if (sample_type & PERF_SAMPLE_IP) { 3748 __perf_event_header__init_id(header, data, event);
3615 data->ip = perf_instruction_pointer(regs);
3616
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 }
3627
3628 if (sample_type & PERF_SAMPLE_TIME) {
3629 data->time = perf_clock();
3630
3631 header->size += sizeof(data->time);
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event);
3639
3640 header->size += sizeof(data->id);
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id;
3645
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 }
3655
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658 3749
3659 if (sample_type & PERF_SAMPLE_READ) 3750 if (sample_type & PERF_SAMPLE_IP)
3660 header->size += perf_event_read_size(event); 3751 data->ip = perf_instruction_pointer(regs);
3661 3752
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3753 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3754 int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
3722 struct task_struct *task) 3813 struct task_struct *task)
3723{ 3814{
3724 struct perf_output_handle handle; 3815 struct perf_output_handle handle;
3816 struct perf_sample_data sample;
3725 struct perf_read_event read_event = { 3817 struct perf_read_event read_event = {
3726 .header = { 3818 .header = {
3727 .type = PERF_RECORD_READ, 3819 .type = PERF_RECORD_READ,
3728 .misc = 0, 3820 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3821 .size = sizeof(read_event) + event->read_size,
3730 }, 3822 },
3731 .pid = perf_event_pid(event, task), 3823 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3824 .tid = perf_event_tid(event, task),
3733 }; 3825 };
3734 int ret; 3826 int ret;
3735 3827
3828 perf_event_header__init_id(&read_event.header, &sample, event);
3736 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); 3829 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
3737 if (ret) 3830 if (ret)
3738 return; 3831 return;
3739 3832
3740 perf_output_put(&handle, read_event); 3833 perf_output_put(&handle, read_event);
3741 perf_output_read(&handle, event); 3834 perf_output_read(&handle, event);
3835 perf_event__output_id_sample(event, &handle, &sample);
3742 3836
3743 perf_output_end(&handle); 3837 perf_output_end(&handle);
3744} 3838}
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
3768 struct perf_task_event *task_event) 3862 struct perf_task_event *task_event)
3769{ 3863{
3770 struct perf_output_handle handle; 3864 struct perf_output_handle handle;
3865 struct perf_sample_data sample;
3771 struct task_struct *task = task_event->task; 3866 struct task_struct *task = task_event->task;
3772 int size, ret; 3867 int ret, size = task_event->event_id.header.size;
3773 3868
3774 size = task_event->event_id.header.size; 3869 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
3775 ret = perf_output_begin(&handle, event, size, 0, 0);
3776 3870
3871 ret = perf_output_begin(&handle, event,
3872 task_event->event_id.header.size, 0, 0);
3777 if (ret) 3873 if (ret)
3778 return; 3874 goto out;
3779 3875
3780 task_event->event_id.pid = perf_event_pid(event, task); 3876 task_event->event_id.pid = perf_event_pid(event, task);
3781 task_event->event_id.ppid = perf_event_pid(event, current); 3877 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
3785 3881
3786 perf_output_put(&handle, task_event->event_id); 3882 perf_output_put(&handle, task_event->event_id);
3787 3883
3884 perf_event__output_id_sample(event, &handle, &sample);
3885
3788 perf_output_end(&handle); 3886 perf_output_end(&handle);
3887out:
3888 task_event->event_id.header.size = size;
3789} 3889}
3790 3890
3791static int perf_event_task_match(struct perf_event *event) 3891static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
3900 struct perf_comm_event *comm_event) 4000 struct perf_comm_event *comm_event)
3901{ 4001{
3902 struct perf_output_handle handle; 4002 struct perf_output_handle handle;
4003 struct perf_sample_data sample;
3903 int size = comm_event->event_id.header.size; 4004 int size = comm_event->event_id.header.size;
3904 int ret = perf_output_begin(&handle, event, size, 0, 0); 4005 int ret;
4006
4007 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
4008 ret = perf_output_begin(&handle, event,
4009 comm_event->event_id.header.size, 0, 0);
3905 4010
3906 if (ret) 4011 if (ret)
3907 return; 4012 goto out;
3908 4013
3909 comm_event->event_id.pid = perf_event_pid(event, comm_event->task); 4014 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
3910 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); 4015 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
3912 perf_output_put(&handle, comm_event->event_id); 4017 perf_output_put(&handle, comm_event->event_id);
3913 perf_output_copy(&handle, comm_event->comm, 4018 perf_output_copy(&handle, comm_event->comm,
3914 comm_event->comm_size); 4019 comm_event->comm_size);
4020
4021 perf_event__output_id_sample(event, &handle, &sample);
4022
3915 perf_output_end(&handle); 4023 perf_output_end(&handle);
4024out:
4025 comm_event->event_id.header.size = size;
3916} 4026}
3917 4027
3918static int perf_event_comm_match(struct perf_event *event) 4028static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3957 comm_event->comm_size = size; 4067 comm_event->comm_size = size;
3958 4068
3959 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 4069 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3960
3961 rcu_read_lock(); 4070 rcu_read_lock();
3962 list_for_each_entry_rcu(pmu, &pmus, entry) { 4071 list_for_each_entry_rcu(pmu, &pmus, entry) {
3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4072 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
4038 struct perf_mmap_event *mmap_event) 4147 struct perf_mmap_event *mmap_event)
4039{ 4148{
4040 struct perf_output_handle handle; 4149 struct perf_output_handle handle;
4150 struct perf_sample_data sample;
4041 int size = mmap_event->event_id.header.size; 4151 int size = mmap_event->event_id.header.size;
4042 int ret = perf_output_begin(&handle, event, size, 0, 0); 4152 int ret;
4043 4153
4154 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
4155 ret = perf_output_begin(&handle, event,
4156 mmap_event->event_id.header.size, 0, 0);
4044 if (ret) 4157 if (ret)
4045 return; 4158 goto out;
4046 4159
4047 mmap_event->event_id.pid = perf_event_pid(event, current); 4160 mmap_event->event_id.pid = perf_event_pid(event, current);
4048 mmap_event->event_id.tid = perf_event_tid(event, current); 4161 mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
4050 perf_output_put(&handle, mmap_event->event_id); 4163 perf_output_put(&handle, mmap_event->event_id);
4051 perf_output_copy(&handle, mmap_event->file_name, 4164 perf_output_copy(&handle, mmap_event->file_name,
4052 mmap_event->file_size); 4165 mmap_event->file_size);
4166
4167 perf_event__output_id_sample(event, &handle, &sample);
4168
4053 perf_output_end(&handle); 4169 perf_output_end(&handle);
4170out:
4171 mmap_event->event_id.header.size = size;
4054} 4172}
4055 4173
4056static int perf_event_mmap_match(struct perf_event *event, 4174static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
4205static void perf_log_throttle(struct perf_event *event, int enable) 4323static void perf_log_throttle(struct perf_event *event, int enable)
4206{ 4324{
4207 struct perf_output_handle handle; 4325 struct perf_output_handle handle;
4326 struct perf_sample_data sample;
4208 int ret; 4327 int ret;
4209 4328
4210 struct { 4329 struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
4226 if (enable) 4345 if (enable)
4227 throttle_event.header.type = PERF_RECORD_UNTHROTTLE; 4346 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
4228 4347
4229 ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); 4348 perf_event_header__init_id(&throttle_event.header, &sample, event);
4349
4350 ret = perf_output_begin(&handle, event,
4351 throttle_event.header.size, 1, 0);
4230 if (ret) 4352 if (ret)
4231 return; 4353 return;
4232 4354
4233 perf_output_put(&handle, throttle_event); 4355 perf_output_put(&handle, throttle_event);
4356 perf_event__output_id_sample(event, &handle, &sample);
4234 perf_output_end(&handle); 4357 perf_output_end(&handle);
4235} 4358}
4236 4359
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4246 struct hw_perf_event *hwc = &event->hw; 4369 struct hw_perf_event *hwc = &event->hw;
4247 int ret = 0; 4370 int ret = 0;
4248 4371
4372 /*
4373 * Non-sampling counters might still use the PMI to fold short
4374 * hardware counters, ignore those.
4375 */
4376 if (unlikely(!is_sampling_event(event)))
4377 return 0;
4378
4249 if (!throttle) { 4379 if (!throttle) {
4250 hwc->interrupts++; 4380 hwc->interrupts++;
4251 } else { 4381 } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4391 if (!regs) 4521 if (!regs)
4392 return; 4522 return;
4393 4523
4394 if (!hwc->sample_period) 4524 if (!is_sampling_event(event))
4395 return; 4525 return;
4396 4526
4397 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4527 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
4554 struct hw_perf_event *hwc = &event->hw; 4684 struct hw_perf_event *hwc = &event->hw;
4555 struct hlist_head *head; 4685 struct hlist_head *head;
4556 4686
4557 if (hwc->sample_period) { 4687 if (is_sampling_event(event)) {
4558 hwc->last_period = hwc->sample_period; 4688 hwc->last_period = hwc->sample_period;
4559 perf_swevent_set_period(event); 4689 perf_swevent_set_period(event);
4560 } 4690 }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
4811 if (event->attr.type != PERF_TYPE_TRACEPOINT) 4941 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4812 return -ENOENT; 4942 return -ENOENT;
4813 4943
4814 /*
4815 * Raw tracepoint data is a severe data leak, only allow root to
4816 * have these.
4817 */
4818 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4819 perf_paranoid_tracepoint_raw() &&
4820 !capable(CAP_SYS_ADMIN))
4821 return -EPERM;
4822
4823 err = perf_trace_init(event); 4944 err = perf_trace_init(event);
4824 if (err) 4945 if (err)
4825 return err; 4946 return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
4842 4963
4843static inline void perf_tp_register(void) 4964static inline void perf_tp_register(void)
4844{ 4965{
4845 perf_pmu_register(&perf_tracepoint); 4966 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
4846} 4967}
4847 4968
4848static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4969static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4932static void perf_swevent_start_hrtimer(struct perf_event *event) 5053static void perf_swevent_start_hrtimer(struct perf_event *event)
4933{ 5054{
4934 struct hw_perf_event *hwc = &event->hw; 5055 struct hw_perf_event *hwc = &event->hw;
5056 s64 period;
5057
5058 if (!is_sampling_event(event))
5059 return;
4935 5060
4936 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 5061 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4937 hwc->hrtimer.function = perf_swevent_hrtimer; 5062 hwc->hrtimer.function = perf_swevent_hrtimer;
4938 if (hwc->sample_period) {
4939 s64 period = local64_read(&hwc->period_left);
4940 5063
4941 if (period) { 5064 period = local64_read(&hwc->period_left);
4942 if (period < 0) 5065 if (period) {
4943 period = 10000; 5066 if (period < 0)
5067 period = 10000;
4944 5068
4945 local64_set(&hwc->period_left, 0); 5069 local64_set(&hwc->period_left, 0);
4946 } else { 5070 } else {
4947 period = max_t(u64, 10000, hwc->sample_period); 5071 period = max_t(u64, 10000, hwc->sample_period);
4948 } 5072 }
4949 __hrtimer_start_range_ns(&hwc->hrtimer, 5073 __hrtimer_start_range_ns(&hwc->hrtimer,
4950 ns_to_ktime(period), 0, 5074 ns_to_ktime(period), 0,
4951 HRTIMER_MODE_REL_PINNED, 0); 5075 HRTIMER_MODE_REL_PINNED, 0);
4952 }
4953} 5076}
4954 5077
4955static void perf_swevent_cancel_hrtimer(struct perf_event *event) 5078static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4956{ 5079{
4957 struct hw_perf_event *hwc = &event->hw; 5080 struct hw_perf_event *hwc = &event->hw;
4958 5081
4959 if (hwc->sample_period) { 5082 if (is_sampling_event(event)) {
4960 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 5083 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4961 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 5084 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4962 5085
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
5184out: 5307out:
5185 mutex_unlock(&pmus_lock); 5308 mutex_unlock(&pmus_lock);
5186} 5309}
5310static struct idr pmu_idr;
5311
5312static ssize_t
5313type_show(struct device *dev, struct device_attribute *attr, char *page)
5314{
5315 struct pmu *pmu = dev_get_drvdata(dev);
5316
5317 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
5318}
5319
5320static struct device_attribute pmu_dev_attrs[] = {
5321 __ATTR_RO(type),
5322 __ATTR_NULL,
5323};
5324
5325static int pmu_bus_running;
5326static struct bus_type pmu_bus = {
5327 .name = "event_source",
5328 .dev_attrs = pmu_dev_attrs,
5329};
5330
5331static void pmu_dev_release(struct device *dev)
5332{
5333 kfree(dev);
5334}
5335
5336static int pmu_dev_alloc(struct pmu *pmu)
5337{
5338 int ret = -ENOMEM;
5339
5340 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
5341 if (!pmu->dev)
5342 goto out;
5343
5344 device_initialize(pmu->dev);
5345 ret = dev_set_name(pmu->dev, "%s", pmu->name);
5346 if (ret)
5347 goto free_dev;
5348
5349 dev_set_drvdata(pmu->dev, pmu);
5350 pmu->dev->bus = &pmu_bus;
5351 pmu->dev->release = pmu_dev_release;
5352 ret = device_add(pmu->dev);
5353 if (ret)
5354 goto free_dev;
5355
5356out:
5357 return ret;
5358
5359free_dev:
5360 put_device(pmu->dev);
5361 goto out;
5362}
5187 5363
5188int perf_pmu_register(struct pmu *pmu) 5364int perf_pmu_register(struct pmu *pmu, char *name, int type)
5189{ 5365{
5190 int cpu, ret; 5366 int cpu, ret;
5191 5367
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
5195 if (!pmu->pmu_disable_count) 5371 if (!pmu->pmu_disable_count)
5196 goto unlock; 5372 goto unlock;
5197 5373
5374 pmu->type = -1;
5375 if (!name)
5376 goto skip_type;
5377 pmu->name = name;
5378
5379 if (type < 0) {
5380 int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
5381 if (!err)
5382 goto free_pdc;
5383
5384 err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
5385 if (err) {
5386 ret = err;
5387 goto free_pdc;
5388 }
5389 }
5390 pmu->type = type;
5391
5392 if (pmu_bus_running) {
5393 ret = pmu_dev_alloc(pmu);
5394 if (ret)
5395 goto free_idr;
5396 }
5397
5398skip_type:
5198 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); 5399 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
5199 if (pmu->pmu_cpu_context) 5400 if (pmu->pmu_cpu_context)
5200 goto got_cpu_context; 5401 goto got_cpu_context;
5201 5402
5202 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); 5403 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5203 if (!pmu->pmu_cpu_context) 5404 if (!pmu->pmu_cpu_context)
5204 goto free_pdc; 5405 goto free_dev;
5205 5406
5206 for_each_possible_cpu(cpu) { 5407 for_each_possible_cpu(cpu) {
5207 struct perf_cpu_context *cpuctx; 5408 struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
5245 5446
5246 return ret; 5447 return ret;
5247 5448
5449free_dev:
5450 device_del(pmu->dev);
5451 put_device(pmu->dev);
5452
5453free_idr:
5454 if (pmu->type >= PERF_TYPE_MAX)
5455 idr_remove(&pmu_idr, pmu->type);
5456
5248free_pdc: 5457free_pdc:
5249 free_percpu(pmu->pmu_disable_count); 5458 free_percpu(pmu->pmu_disable_count);
5250 goto unlock; 5459 goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
5264 synchronize_rcu(); 5473 synchronize_rcu();
5265 5474
5266 free_percpu(pmu->pmu_disable_count); 5475 free_percpu(pmu->pmu_disable_count);
5476 if (pmu->type >= PERF_TYPE_MAX)
5477 idr_remove(&pmu_idr, pmu->type);
5478 device_del(pmu->dev);
5479 put_device(pmu->dev);
5267 free_pmu_context(pmu); 5480 free_pmu_context(pmu);
5268} 5481}
5269 5482
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
5273 int idx; 5486 int idx;
5274 5487
5275 idx = srcu_read_lock(&pmus_srcu); 5488 idx = srcu_read_lock(&pmus_srcu);
5489
5490 rcu_read_lock();
5491 pmu = idr_find(&pmu_idr, event->attr.type);
5492 rcu_read_unlock();
5493 if (pmu)
5494 goto unlock;
5495
5276 list_for_each_entry_rcu(pmu, &pmus, entry) { 5496 list_for_each_entry_rcu(pmu, &pmus, entry) {
5277 int ret = pmu->event_init(event); 5497 int ret = pmu->event_init(event);
5278 if (!ret) 5498 if (!ret)
@@ -5738,6 +5958,12 @@ SYSCALL_DEFINE5(perf_event_open,
5738 mutex_unlock(&current->perf_event_mutex); 5958 mutex_unlock(&current->perf_event_mutex);
5739 5959
5740 /* 5960 /*
5961 * Precalculate sample_data sizes
5962 */
5963 perf_event__header_size(event);
5964 perf_event__id_header_size(event);
5965
5966 /*
5741 * Drop the reference on the group_event after placing the 5967 * Drop the reference on the group_event after placing the
5742 * new event on the sibling_list. This ensures destruction 5968 * new event on the sibling_list. This ensures destruction
5743 * of the group leader will find the pointer to itself in 5969 * of the group leader will find the pointer to itself in
@@ -6090,6 +6316,12 @@ inherit_event(struct perf_event *parent_event,
6090 child_event->overflow_handler = parent_event->overflow_handler; 6316 child_event->overflow_handler = parent_event->overflow_handler;
6091 6317
6092 /* 6318 /*
6319 * Precalculate sample_data sizes
6320 */
6321 perf_event__header_size(child_event);
6322 perf_event__id_header_size(child_event);
6323
6324 /*
6093 * Link it up in the child's context: 6325 * Link it up in the child's context:
6094 */ 6326 */
6095 raw_spin_lock_irqsave(&child_ctx->lock, flags); 6327 raw_spin_lock_irqsave(&child_ctx->lock, flags);
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
6320 mutex_unlock(&swhash->hlist_mutex); 6552 mutex_unlock(&swhash->hlist_mutex);
6321} 6553}
6322 6554
6323#ifdef CONFIG_HOTPLUG_CPU 6555#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
6324static void perf_pmu_rotate_stop(struct pmu *pmu) 6556static void perf_pmu_rotate_stop(struct pmu *pmu)
6325{ 6557{
6326 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 6558 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
6374static inline void perf_event_exit_cpu(int cpu) { } 6606static inline void perf_event_exit_cpu(int cpu) { }
6375#endif 6607#endif
6376 6608
6609static int
6610perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
6611{
6612 int cpu;
6613
6614 for_each_online_cpu(cpu)
6615 perf_event_exit_cpu(cpu);
6616
6617 return NOTIFY_OK;
6618}
6619
6620/*
6621 * Run the perf reboot notifier at the very last possible moment so that
6622 * the generic watchdog code runs as long as possible.
6623 */
6624static struct notifier_block perf_reboot_notifier = {
6625 .notifier_call = perf_reboot,
6626 .priority = INT_MIN,
6627};
6628
6377static int __cpuinit 6629static int __cpuinit
6378perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) 6630perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6379{ 6631{
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
6402{ 6654{
6403 int ret; 6655 int ret;
6404 6656
6657 idr_init(&pmu_idr);
6658
6405 perf_event_init_all_cpus(); 6659 perf_event_init_all_cpus();
6406 init_srcu_struct(&pmus_srcu); 6660 init_srcu_struct(&pmus_srcu);
6407 perf_pmu_register(&perf_swevent); 6661 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
6408 perf_pmu_register(&perf_cpu_clock); 6662 perf_pmu_register(&perf_cpu_clock, NULL, -1);
6409 perf_pmu_register(&perf_task_clock); 6663 perf_pmu_register(&perf_task_clock, NULL, -1);
6410 perf_tp_register(); 6664 perf_tp_register();
6411 perf_cpu_notifier(perf_cpu_notify); 6665 perf_cpu_notifier(perf_cpu_notify);
6666 register_reboot_notifier(&perf_reboot_notifier);
6412 6667
6413 ret = init_hw_breakpoint(); 6668 ret = init_hw_breakpoint();
6414 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 6669 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6415} 6670}
6671
6672static int __init perf_event_sysfs_init(void)
6673{
6674 struct pmu *pmu;
6675 int ret;
6676
6677 mutex_lock(&pmus_lock);
6678
6679 ret = bus_register(&pmu_bus);
6680 if (ret)
6681 goto unlock;
6682
6683 list_for_each_entry(pmu, &pmus, entry) {
6684 if (!pmu->name || pmu->type < 0)
6685 continue;
6686
6687 ret = pmu_dev_alloc(pmu);
6688 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
6689 }
6690 pmu_bus_running = 1;
6691 ret = 0;
6692
6693unlock:
6694 mutex_unlock(&pmus_lock);
6695
6696 return ret;
6697}
6698device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297d1a0eedb0..c68cead94dd7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8293,8 +8293,6 @@ void __init sched_init(void)
8293 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 8293 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
8294#endif /* SMP */ 8294#endif /* SMP */
8295 8295
8296 perf_event_init();
8297
8298 scheduler_running = 1; 8296 scheduler_running = 1;
8299} 8297}
8300 8298
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5abfa1518554..46404414d8a7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -745,21 +745,21 @@ static struct ctl_table kern_table[] = {
745 .extra1 = &zero, 745 .extra1 = &zero,
746 .extra2 = &one, 746 .extra2 = &one,
747 }, 747 },
748#endif
749#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
750 { 748 {
751 .procname = "unknown_nmi_panic", 749 .procname = "nmi_watchdog",
752 .data = &unknown_nmi_panic, 750 .data = &watchdog_enabled,
753 .maxlen = sizeof (int), 751 .maxlen = sizeof (int),
754 .mode = 0644, 752 .mode = 0644,
755 .proc_handler = proc_dointvec, 753 .proc_handler = proc_dowatchdog_enabled,
756 }, 754 },
755#endif
756#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
757 { 757 {
758 .procname = "nmi_watchdog", 758 .procname = "unknown_nmi_panic",
759 .data = &nmi_watchdog_enabled, 759 .data = &unknown_nmi_panic,
760 .maxlen = sizeof (int), 760 .maxlen = sizeof (int),
761 .mode = 0644, 761 .mode = 0644,
762 .proc_handler = proc_nmi_enabled, 762 .proc_handler = proc_dointvec,
763 }, 763 },
764#endif 764#endif
765#if defined(CONFIG_X86) 765#if defined(CONFIG_X86)
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 1357c5786064..4b2545a136ff 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, 136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, 137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" },
138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, 138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
139 { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" },
140 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, 139 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
141 {} 140 {}
142}; 141};
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059ca670e..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21/* Count the events in use (per event id, not per instance) */ 21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count; 22static int total_ref_count;
23 23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 48 struct perf_event *p_event)
26{ 49{
27 struct hlist_head __percpu *list; 50 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 51 int ret;
29 int cpu; 52 int cpu;
30 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
31 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
32 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
33 return 0; 60 return 0;
34 61
62 ret = -ENOMEM;
63
35 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
36 if (!list) 65 if (!list)
37 goto fail; 66 goto fail;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..eb17e143b5da 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
57{ 57{
58 if (!strncmp(str, "panic", 5)) 58 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 59 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1;
60 return 1; 62 return 1;
61} 63}
62__setup("nmi_watchdog=", hardlockup_panic_setup); 64__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -547,13 +549,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
547 .notifier_call = cpu_callback 549 .notifier_call = cpu_callback
548}; 550};
549 551
550static int __init spawn_watchdog_task(void) 552void __init lockup_detector_init(void)
551{ 553{
552 void *cpu = (void *)(long)smp_processor_id(); 554 void *cpu = (void *)(long)smp_processor_id();
553 int err; 555 int err;
554 556
555 if (no_watchdog) 557 if (no_watchdog)
556 return 0; 558 return;
557 559
558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 560 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
559 WARN_ON(notifier_to_errno(err)); 561 WARN_ON(notifier_to_errno(err));
@@ -561,6 +563,5 @@ static int __init spawn_watchdog_task(void)
561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 563 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
562 register_cpu_notifier(&cpu_nfb); 564 register_cpu_notifier(&cpu_nfb);
563 565
564 return 0; 566 return;
565} 567}
566early_initcall(spawn_watchdog_task);
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index b2c63309a651..6f5a498608b2 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -24,12 +24,47 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27-d::
28--dsos=<dso[,dso...]>::
29 Only consider symbols in these dsos.
30-s::
31--symbol=<symbol>::
32 Symbol to annotate.
33
34-f::
35--force::
36 Don't complain, do it.
37
38-v::
39--verbose::
40 Be more verbose. (Show symbol address, etc)
41
42-D::
43--dump-raw-trace::
44 Dump raw trace in ASCII.
45
46-k::
47--vmlinux=<file>::
48 vmlinux pathname.
49
50-m::
51--modules::
52 Load module symbols. WARNING: use only with -k and LIVE kernel.
53
54-l::
55--print-line::
56 Print matching source lines (may be slow).
57
58-P::
59--full-paths::
60 Don't shorten the displayed pathnames.
61
27--stdio:: Use the stdio interface. 62--stdio:: Use the stdio interface.
28 63
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not 64--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is 65 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
33 68
34SEE ALSO 69SEE ALSO
35-------- 70--------
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index 01b642c0bf8f..5eaac6f26d51 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -18,6 +18,9 @@ perf report.
18 18
19OPTIONS 19OPTIONS
20------- 20-------
21-H::
22--with-hits::
23 Show only DSOs with hits.
21-i:: 24-i::
22--input=:: 25--input=::
23 Input file name. (default: perf.data) 26 Input file name. (default: perf.data)
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 20d97d84ea1c..6a9ec2b35310 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
19 19
20OPTIONS 20OPTIONS
21------- 21-------
22-M::
23--displacement::
24 Show position displacement relative to baseline.
25
26-D::
27--dump-raw-trace::
28 Dump raw trace in ASCII.
29
30-m::
31--modules::
32 Load module symbols. WARNING: use only with -k and LIVE kernel
33
22-d:: 34-d::
23--dsos=:: 35--dsos=::
24 Only consider symbols in these dsos. CSV that understands 36 Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
42--field-separator=:: 54--field-separator=::
43 55
44 Use a special separator character and don't pad with spaces, replacing 56 Use a special separator character and don't pad with spaces, replacing
45 all occurances of this separator in symbol names (and other output) 57 all occurrences of this separator in symbol names (and other output)
46 with a '.' character, that thus it's the only non valid separator. 58 with a '.' character, that thus it's the only non valid separator.
47 59
48-v:: 60-v::
@@ -50,6 +62,11 @@ OPTIONS
50 Be verbose, for instance, show the raw counts in addition to the 62 Be verbose, for instance, show the raw counts in addition to the
51 diff. 63 diff.
52 64
65-f::
66--force::
67 Don't complain, do it.
68
69
53SEE ALSO 70SEE ALSO
54-------- 71--------
55linkperf:perf-record[1] 72linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index d004e19fe6d6..dd84cb2f0a88 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
22 a performance counter profile of guest os in realtime 22 a performance counter profile of guest os in realtime
23 of an arbitrary workload. 23 of an arbitrary workload.
24 24
25 'perf kvm record <command>' to record the performance couinter profile 25 'perf kvm record <command>' to record the performance counter profile
26 of an arbitrary workload and save it into a perf data file. If both 26 of an arbitrary workload and save it into a perf data file. If both
27 --host and --guest are input, the perf data file name is perf.data.kvm. 27 --host and --guest are input, the perf data file name is perf.data.kvm.
28 If there is no --host but --guest, the file name is perf.data.guest. 28 If there is no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
40 40
41OPTIONS 41OPTIONS
42------- 42-------
43-i::
44--input=::
45 Input file name.
46-o::
47--output::
48 Output file name.
43--host=:: 49--host=::
44 Collect host side performance profile. 50 Collect host side performance profile.
45--guest=:: 51--guest=::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index b317102138c8..921de259ea10 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27OPTIONS
28-------
29
30-i::
31--input=<file>::
32 Input file name.
33
34-v::
35--verbose::
36 Be more verbose (show symbol address, etc).
37
38-D::
39--dump-raw-trace::
40 Dump raw trace in ASCII.
41
27SEE ALSO 42SEE ALSO
28-------- 43--------
29linkperf:perf[1] 44linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 62de1b7f4e76..4e2323276984 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -115,7 +115,7 @@ Each probe argument follows below syntax.
115 115
116LINE SYNTAX 116LINE SYNTAX
117----------- 117-----------
118Line range is descripted by following syntax. 118Line range is described by following syntax.
119 119
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" 120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
121 121
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index a91f9f9e6e5c..52462ae26455 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -39,15 +39,24 @@ OPTIONS
39 be passed as follows: '\mem:addr[:[r][w][x]]'. 39 be passed as follows: '\mem:addr[:[r][w][x]]'.
40 If you want to profile read-write accesses in 0x1000, just set 40 If you want to profile read-write accesses in 0x1000, just set
41 'mem:0x1000:rw'. 41 'mem:0x1000:rw'.
42
43--filter=<filter>::
44 Event filter.
45
42-a:: 46-a::
43 System-wide collection. 47--all-cpus::
48 System-wide collection from all CPUs.
44 49
45-l:: 50-l::
46 Scale counter values. 51 Scale counter values.
47 52
48-p:: 53-p::
49--pid=:: 54--pid=::
50 Record events on existing pid. 55 Record events on existing process ID.
56
57-t::
58--tid=::
59 Record events on existing thread ID.
51 60
52-r:: 61-r::
53--realtime=:: 62--realtime=::
@@ -99,6 +108,11 @@ OPTIONS
99--data:: 108--data::
100 Sample addresses. 109 Sample addresses.
101 110
111-T::
112--timestamp::
113 Sample timestamps. Use it with 'perf report -D' to see the timestamps,
114 for instance.
115
102-n:: 116-n::
103--no-samples:: 117--no-samples::
104 Don't sample. 118 Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
109 123
110-C:: 124-C::
111--cpu:: 125--cpu::
112Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a 126Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
113comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 127comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
114In per-thread mode with inheritance mode on (default), samples are captured only when 128In per-thread mode with inheritance mode on (default), samples are captured only when
115the thread executes on the designated CPUs. Default is to monitor all CPUs. 129the thread executes on the designated CPUs. Default is to monitor all CPUs.
116 130
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 12052c9ed0ba..fefea77ec6e9 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -20,6 +20,11 @@ OPTIONS
20-i:: 20-i::
21--input=:: 21--input=::
22 Input file name. (default: perf.data) 22 Input file name. (default: perf.data)
23
24-v::
25--verbose::
26 Be more verbose. (show symbol address, etc)
27
23-d:: 28-d::
24--dsos=:: 29--dsos=::
25 Only consider symbols in these dsos. CSV that understands 30 Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
27-n:: 32-n::
28--show-nr-samples:: 33--show-nr-samples::
29 Show the number of samples for each symbol 34 Show the number of samples for each symbol
35
36--showcpuutilization::
37 Show sample percentage for different cpu modes.
38
30-T:: 39-T::
31--threads:: 40--threads::
32 Show per-thread event counters 41 Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
39 Only consider these symbols. CSV that understands 48 Only consider these symbols. CSV that understands
40 file://filename entries. 49 file://filename entries.
41 50
51-U::
52--hide-unresolved::
53 Only display entries resolved to a symbol.
54
42-s:: 55-s::
43--sort=:: 56--sort=::
44 Sort by key(s): pid, comm, dso, symbol, parent. 57 Sort by key(s): pid, comm, dso, symbol, parent.
45 58
59-p::
60--parent=<regex>::
61 regex filter to identify parent, see: '--sort parent'
62
63-x::
64--exclude-other::
65 Only display entries with parent-match.
66
46-w:: 67-w::
47--field-width=:: 68--column-widths=<width[,width...]>::
48 Force each column width to the provided list, for large terminal 69 Force each column width to the provided list, for large terminal
49 readability. 70 readability.
50 71
@@ -52,19 +73,26 @@ OPTIONS
52--field-separator=:: 73--field-separator=::
53 74
54 Use a special separator character and don't pad with spaces, replacing 75 Use a special separator character and don't pad with spaces, replacing
55 all occurances of this separator in symbol names (and other output) 76 all occurrences of this separator in symbol names (and other output)
56 with a '.' character, that thus it's the only non valid separator. 77 with a '.' character, that thus it's the only non valid separator.
57 78
79-D::
80--dump-raw-trace::
81 Dump raw trace in ASCII.
82
58-g [type,min]:: 83-g [type,min]::
59--call-graph:: 84--call-graph::
60 Display callchains using type and min percent threshold. 85 Display call chains using type and min percent threshold.
61 type can be either: 86 type can be either:
62 - flat: single column, linear exposure of callchains. 87 - flat: single column, linear exposure of call chains.
63 - graph: use a graph tree, displaying absolute overhead rates. 88 - graph: use a graph tree, displaying absolute overhead rates.
64 - fractal: like graph, but displays relative rates. Each branch of 89 - fractal: like graph, but displays relative rates. Each branch of
65 the tree is considered as a new profiled object. + 90 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 91 Default: fractal,0.5.
67 92
93--pretty=<key>::
94 Pretty printing style. key: normal, raw
95
68--stdio:: Use the stdio interface. 96--stdio:: Use the stdio interface.
69 97
70--tui:: Use the TUI interface, that is integrated with annotate and allows 98--tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,22 @@ OPTIONS
72 requires a tty, if one is not present, as when piping to other 100 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used. 101 commands, the stdio interface is used.
74 102
103-k::
104--vmlinux=<file>::
105 vmlinux pathname
106
107--kallsyms=<file>::
108 kallsyms pathname
109
110-m::
111--modules::
112 Load module symbols. WARNING: This should only be used with -k and
113 a LIVE kernel.
114
115-f::
116--force::
117 Don't complain, do it.
118
75SEE ALSO 119SEE ALSO
76-------- 120--------
77linkperf:perf-stat[1] 121linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8417644a6166..46822d5fde1c 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf sched' {record|latency|replay|trace} 11'perf sched' {record|latency|map|replay|trace}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There are four variants of perf sched: 15There are five variants of perf sched:
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
30 of the workload as it occurred when it was recorded - and can repeat 30 of the workload as it occurred when it was recorded - and can repeat
31 it a number of times, measuring its performance.) 31 it a number of times, measuring its performance.)
32 32
33 'perf sched map' to print a textual context-switching outline of
34 workload captured via perf sched record. Columns stand for
35 individual CPUs, and the two-letter shortcuts stand for tasks that
36 are running on a CPU. A '*' denotes the CPU that had the event, and
37 a dot signals an idle CPU.
38
33OPTIONS 39OPTIONS
34------- 40-------
41-i::
42--input=<file>::
43 Input file name. (default: perf.data)
44
45-v::
46--verbose::
47 Be more verbose. (show symbol address, etc)
48
35-D:: 49-D::
36--dump-raw-trace=:: 50--dump-raw-trace=::
37 Display verbose dump of the sched data. 51 Display verbose dump of the sched data.
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index ee6525ee6d69..5bb41e55a3ac 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,19 +1,19 @@
1perf-trace-perl(1) 1perf-script-perl(1)
2================== 2==================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-perl - Process trace data with a Perl script 6perf-script-perl - Process trace data with a Perl script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Perl]:script[.pl] ] 11'perf script' [-s [Perl]:script[.pl] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Perl interpreter. It reads and processes the input file and 17built-in Perl interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Perl script, if any. 19Perl script, if any.
@@ -21,7 +21,7 @@ Perl script, if any.
21STARTER SCRIPTS 21STARTER SCRIPTS
22--------------- 22---------------
23 23
24You can avoid reading the rest of this document by running 'perf trace 24You can avoid reading the rest of this document by running 'perf script
25-g perl' in the same directory as an existing perf.data trace file. 25-g perl' in the same directory as an existing perf.data trace file.
26That will generate a starter script containing a handler for each of 26That will generate a starter script containing a handler for each of
27the event types in the trace file; it simply prints every available 27the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@ field for each event in the trace file.
30You can also look at the existing scripts in 30You can also look at the existing scripts in
31~/libexec/perf-core/scripts/perl for typical examples showing how to 31~/libexec/perf-core/scripts/perl for typical examples showing how to
32do basic things like aggregate event data, print results, etc. Also, 32do basic things like aggregate event data, print results, etc. Also,
33the check-perf-trace.pl script, while not interesting for its results, 33the check-perf-script.pl script, while not interesting for its results,
34attempts to exercise all of the main scripting features. 34attempts to exercise all of the main scripting features.
35 35
36EVENT HANDLERS 36EVENT HANDLERS
37-------------- 37--------------
38 38
39When perf trace is invoked using a trace script, a user-defined 39When perf script is invoked using a trace script, a user-defined
40'handler function' is called for each event in the trace. If there's 40'handler function' is called for each event in the trace. If there's
41no handler function defined for a given event type, the event is 41no handler function defined for a given event type, the event is
42ignored (or passed to a 'trace_handled' function, see below) and the 42ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest.
112SCRIPT LAYOUT 112SCRIPT LAYOUT
113------------- 113-------------
114 114
115Every perf trace Perl script should start by setting up a Perl module 115Every perf script Perl script should start by setting up a Perl module
116search path and 'use'ing a few support modules (see module 116search path and 'use'ing a few support modules (see module
117descriptions below): 117descriptions below):
118 118
119---- 119----
120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; 120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
121 use lib "./Perf-Trace-Util/lib"; 121 use lib "./perf-script-Util/lib";
122 use Perf::Trace::Core; 122 use Perf::Trace::Core;
123 use Perf::Trace::Context; 123 use Perf::Trace::Context;
124 use Perf::Trace::Util; 124 use Perf::Trace::Util;
@@ -162,7 +162,7 @@ sub trace_unhandled
162---- 162----
163 163
164The remaining sections provide descriptions of each of the available 164The remaining sections provide descriptions of each of the available
165built-in perf trace Perl modules and their associated functions. 165built-in perf script Perl modules and their associated functions.
166 166
167AVAILABLE MODULES AND FUNCTIONS 167AVAILABLE MODULES AND FUNCTIONS
168------------------------------- 168-------------------------------
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS
170The following sections describe the functions and variables available 170The following sections describe the functions and variables available
171via the various Perf::Trace::* Perl modules. To use the functions and 171via the various Perf::Trace::* Perl modules. To use the functions and
172variables from the given module, add the corresponding 'use 172variables from the given module, add the corresponding 'use
173Perf::Trace::XXX' line to your perf trace script. 173Perf::Trace::XXX' line to your perf script script.
174 174
175Perf::Trace::Core Module 175Perf::Trace::Core Module
176~~~~~~~~~~~~~~~~~~~~~~~~ 176~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ argument.
204Perf::Trace::Util Module 204Perf::Trace::Util Module
205~~~~~~~~~~~~~~~~~~~~~~~~ 205~~~~~~~~~~~~~~~~~~~~~~~~
206 206
207Various utility functions for use with perf trace: 207Various utility functions for use with perf script:
208 208
209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair 209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
210 nsecs_secs($nsecs) - returns whole secs portion given nsecs 210 nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace:
214 214
215SEE ALSO 215SEE ALSO
216-------- 216--------
217linkperf:perf-trace[1] 217linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 693be804dd3d..36b38277422c 100644
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -1,19 +1,19 @@
1perf-trace-python(1) 1perf-script-python(1)
2==================== 2====================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-python - Process trace data with a Python script 6perf-script-python - Process trace data with a Python script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Python]:script[.py] ] 11'perf script' [-s [Python]:script[.py] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Python interpreter. It reads and processes the input file and 17built-in Python interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Python script, if any. 19Python script, if any.
@@ -23,15 +23,15 @@ A QUICK EXAMPLE
23 23
24This section shows the process, start to finish, of creating a working 24This section shows the process, start to finish, of creating a working
25Python script that aggregates and extracts useful information from a 25Python script that aggregates and extracts useful information from a
26raw perf trace stream. You can avoid reading the rest of this 26raw perf script stream. You can avoid reading the rest of this
27document if an example is enough for you; the rest of the document 27document if an example is enough for you; the rest of the document
28provides more details on each step and lists the library functions 28provides more details on each step and lists the library functions
29available to script writers. 29available to script writers.
30 30
31This example actually details the steps that were used to create the 31This example actually details the steps that were used to create the
32'syscall-counts' script you see when you list the available perf trace 32'syscall-counts' script you see when you list the available perf script
33scripts via 'perf trace -l'. As such, this script also shows how to 33scripts via 'perf script -l'. As such, this script also shows how to
34integrate your script into the list of general-purpose 'perf trace' 34integrate your script into the list of general-purpose 'perf script'
35scripts listed by that command. 35scripts listed by that command.
36 36
37The syscall-counts script is a simple script, but demonstrates all the 37The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory
105called perf.data. 105called perf.data.
106 106
107Once we have a perf.data file containing our data, we can use the -g 107Once we have a perf.data file containing our data, we can use the -g
108'perf trace' option to generate a Python script that will contain a 108'perf script' option to generate a Python script that will contain a
109callback handler for each event type found in the perf.data trace 109callback handler for each event type found in the perf.data trace
110stream (for more details, see the STARTER SCRIPTS section). 110stream (for more details, see the STARTER SCRIPTS section).
111 111
112---- 112----
113# perf trace -g python 113# perf script -g python
114generated Python script: perf-trace.py 114generated Python script: perf-script.py
115 115
116The output file created also in the current directory is named 116The output file created also in the current directory is named
117perf-trace.py. Here's the file in its entirety: 117perf-script.py. Here's the file in its entirety:
118 118
119# perf trace event handlers, generated by perf trace -g python 119# perf script event handlers, generated by perf script -g python
120# Licensed under the terms of the GNU GPL License version 2 120# Licensed under the terms of the GNU GPL License version 2
121 121
122# The common_* event handler fields are the most useful fields common to 122# The common_* event handler fields are the most useful fields common to
123# all events. They don't necessarily correspond to the 'common_*' fields 123# all events. They don't necessarily correspond to the 'common_*' fields
124# in the format files. Those fields not available as handler params can 124# in the format files. Those fields not available as handler params can
125# be retrieved using Python functions of the form common_*(context). 125# be retrieved using Python functions of the form common_*(context).
126# See the perf-trace-python Documentation for the list of available functions. 126# See the perf-script-python Documentation for the list of available functions.
127 127
128import os 128import os
129import sys 129import sys
130 130
131sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 131sys.path.append(os.environ['PERF_EXEC_PATH'] + \
132 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 132 '/scripts/python/perf-script-Util/lib/Perf/Trace')
133 133
134from perf_trace_context import * 134from perf_trace_context import *
135from Core import * 135from Core import *
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm):
160---- 160----
161 161
162At the top is a comment block followed by some import statements and a 162At the top is a comment block followed by some import statements and a
163path append which every perf trace script should include. 163path append which every perf script script should include.
164 164
165Following that are a couple generated functions, trace_begin() and 165Following that are a couple generated functions, trace_begin() and
166trace_end(), which are called at the beginning and the end of the 166trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the
189script and run it to see the default output: 189script and run it to see the default output:
190 190
191---- 191----
192# mv perf-trace.py syscall-counts.py 192# mv perf-script.py syscall-counts.py
193# perf trace -s syscall-counts.py 193# perf script -s syscall-counts.py
194 194
195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= 195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= 196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
@@ -216,7 +216,7 @@ import os
216import sys 216import sys
217 217
218sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 218sys.path.append(os.environ['PERF_EXEC_PATH'] + \
219 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 219 '/scripts/python/perf-script-Util/lib/Perf/Trace')
220 220
221from perf_trace_context import * 221from perf_trace_context import *
222from Core import * 222from Core import *
@@ -279,7 +279,7 @@ import os
279import sys 279import sys
280 280
281sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 281sys.path.append(os.environ['PERF_EXEC_PATH'] + \
282 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 282 '/scripts/python/perf-script-Util/lib/Perf/Trace')
283 283
284from perf_trace_context import * 284from perf_trace_context import *
285from Core import * 285from Core import *
@@ -315,7 +315,7 @@ def print_syscall_totals():
315 315
316The script can be run just as before: 316The script can be run just as before:
317 317
318 # perf trace -s syscall-counts.py 318 # perf script -s syscall-counts.py
319 319
320So those are the essential steps in writing and running a script. The 320So those are the essential steps in writing and running a script. The
321process can be generalized to any tracepoint or set of tracepoints 321process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by
324'perf list' and/or look in /sys/kernel/debug/tracing events for 324'perf list' and/or look in /sys/kernel/debug/tracing events for
325detailed event and field info, record the corresponding trace data 325detailed event and field info, record the corresponding trace data
326using 'perf record', passing it the list of interesting events, 326using 'perf record', passing it the list of interesting events,
327generate a skeleton script using 'perf trace -g python' and modify the 327generate a skeleton script using 'perf script -g python' and modify the
328code to aggregate and display it for your particular needs. 328code to aggregate and display it for your particular needs.
329 329
330After you've done that you may end up with a general-purpose script 330After you've done that you may end up with a general-purpose script
331that you want to keep around and have available for future use. By 331that you want to keep around and have available for future use. By
332writing a couple of very simple shell scripts and putting them in the 332writing a couple of very simple shell scripts and putting them in the
333right place, you can have your script listed alongside the other 333right place, you can have your script listed alongside the other
334scripts listed by the 'perf trace -l' command e.g.: 334scripts listed by the 'perf script -l' command e.g.:
335 335
336---- 336----
337root@tropicana:~# perf trace -l 337root@tropicana:~# perf script -l
338List of available trace scripts: 338List of available trace scripts:
339 workqueue-stats workqueue stats (ins/exe/create/destroy) 339 workqueue-stats workqueue stats (ins/exe/create/destroy)
340 wakeup-latency system-wide min/max/avg wakeup latency 340 wakeup-latency system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter
365The 'report' script is also a shell script with the same base name as 365The 'report' script is also a shell script with the same base name as
366your script, but with -report appended. It should also be located in 366your script, but with -report appended. It should also be located in
367the perf/scripts/python/bin directory. In that script, you write the 367the perf/scripts/python/bin directory. In that script, you write the
368'perf trace -s' command-line needed for running your script: 368'perf script -s' command-line needed for running your script:
369 369
370---- 370----
371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report 371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
372 372
373#!/bin/bash 373#!/bin/bash
374# description: system-wide syscall counts 374# description: system-wide syscall counts
375perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py 375perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
376---- 376----
377 377
378Note that the location of the Python script given in the shell script 378Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@ total 32
390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . 390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. 391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin 392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py 393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util 394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py 395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
396---- 396----
397 397
398Once you've done that (don't forget to do a new 'make install', 398Once you've done that (don't forget to do a new 'make install',
399otherwise your script won't show up at run-time), 'perf trace -l' 399otherwise your script won't show up at run-time), 'perf script -l'
400should show a new entry for your script: 400should show a new entry for your script:
401 401
402---- 402----
403root@tropicana:~# perf trace -l 403root@tropicana:~# perf script -l
404List of available trace scripts: 404List of available trace scripts:
405 workqueue-stats workqueue stats (ins/exe/create/destroy) 405 workqueue-stats workqueue stats (ins/exe/create/destroy)
406 wakeup-latency system-wide min/max/avg wakeup latency 406 wakeup-latency system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@ List of available trace scripts:
409 syscall-counts system-wide syscall counts 409 syscall-counts system-wide syscall counts
410---- 410----
411 411
412You can now perform the record step via 'perf trace record': 412You can now perform the record step via 'perf script record':
413 413
414 # perf trace record syscall-counts 414 # perf script record syscall-counts
415 415
416and display the output using 'perf trace report': 416and display the output using 'perf script report':
417 417
418 # perf trace report syscall-counts 418 # perf script report syscall-counts
419 419
420STARTER SCRIPTS 420STARTER SCRIPTS
421--------------- 421---------------
422 422
423You can quickly get started writing a script for a particular set of 423You can quickly get started writing a script for a particular set of
424trace data by generating a skeleton script using 'perf trace -g 424trace data by generating a skeleton script using 'perf script -g
425python' in the same directory as an existing perf.data trace file. 425python' in the same directory as an existing perf.data trace file.
426That will generate a starter script containing a handler for each of 426That will generate a starter script containing a handler for each of
427the event types in the trace file; it simply prints every available 427the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@ field for each event in the trace file.
430You can also look at the existing scripts in 430You can also look at the existing scripts in
431~/libexec/perf-core/scripts/python for typical examples showing how to 431~/libexec/perf-core/scripts/python for typical examples showing how to
432do basic things like aggregate event data, print results, etc. Also, 432do basic things like aggregate event data, print results, etc. Also,
433the check-perf-trace.py script, while not interesting for its results, 433the check-perf-script.py script, while not interesting for its results,
434attempts to exercise all of the main scripting features. 434attempts to exercise all of the main scripting features.
435 435
436EVENT HANDLERS 436EVENT HANDLERS
437-------------- 437--------------
438 438
439When perf trace is invoked using a trace script, a user-defined 439When perf script is invoked using a trace script, a user-defined
440'handler function' is called for each event in the trace. If there's 440'handler function' is called for each event in the trace. If there's
441no handler function defined for a given event type, the event is 441no handler function defined for a given event type, the event is
442ignored (or passed to a 'trace_handled' function, see below) and the 442ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest.
510SCRIPT LAYOUT 510SCRIPT LAYOUT
511------------- 511-------------
512 512
513Every perf trace Python script should start by setting up a Python 513Every perf script Python script should start by setting up a Python
514module search path and 'import'ing a few support modules (see module 514module search path and 'import'ing a few support modules (see module
515descriptions below): 515descriptions below):
516 516
@@ -519,7 +519,7 @@ descriptions below):
519 import sys 519 import sys
520 520
521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
522 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 522 '/scripts/python/perf-script-Util/lib/Perf/Trace')
523 523
524 from perf_trace_context import * 524 from perf_trace_context import *
525 from Core import * 525 from Core import *
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs,
559---- 559----
560 560
561The remaining sections provide descriptions of each of the available 561The remaining sections provide descriptions of each of the available
562built-in perf trace Python modules and their associated functions. 562built-in perf script Python modules and their associated functions.
563 563
564AVAILABLE MODULES AND FUNCTIONS 564AVAILABLE MODULES AND FUNCTIONS
565------------------------------- 565-------------------------------
566 566
567The following sections describe the functions and variables available 567The following sections describe the functions and variables available
568via the various perf trace Python modules. To use the functions and 568via the various perf script Python modules. To use the functions and
569variables from the given module, add the corresponding 'from XXXX 569variables from the given module, add the corresponding 'from XXXX
570import' line to your perf trace script. 570import' line to your perf script script.
571 571
572Core.py Module 572Core.py Module
573~~~~~~~~~~~~~~ 573~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@ argument.
610Util.py Module 610Util.py Module
611~~~~~~~~~~~~~~ 611~~~~~~~~~~~~~~
612 612
613Various utility functions for use with perf trace: 613Various utility functions for use with perf script:
614 614
615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair 615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
616 nsecs_secs(nsecs) - returns whole secs portion given nsecs 616 nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace:
620 620
621SEE ALSO 621SEE ALSO
622-------- 622--------
623linkperf:perf-trace[1] 623linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt
index 26aff6bf9e50..29ad94293cd2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -1,71 +1,71 @@
1perf-trace(1) 1perf-script(1)
2============= 2=============
3 3
4NAME 4NAME
5---- 5----
6perf-trace - Read perf.data (created by perf record) and display trace output 6perf-script - Read perf.data (created by perf record) and display trace output
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [<options>] 11'perf script' [<options>]
12'perf trace' [<options>] record <script> [<record-options>] <command> 12'perf script' [<options>] record <script> [<record-options>] <command>
13'perf trace' [<options>] report <script> [script-args] 13'perf script' [<options>] report <script> [script-args]
14'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command> 14'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
15'perf trace' [<options>] <top-script> [script-args] 15'perf script' [<options>] <top-script> [script-args]
16 16
17DESCRIPTION 17DESCRIPTION
18----------- 18-----------
19This command reads the input file and displays the trace recorded. 19This command reads the input file and displays the trace recorded.
20 20
21There are several variants of perf trace: 21There are several variants of perf script:
22 22
23 'perf trace' to see a detailed trace of the workload that was 23 'perf script' to see a detailed trace of the workload that was
24 recorded. 24 recorded.
25 25
26 You can also run a set of pre-canned scripts that aggregate and 26 You can also run a set of pre-canned scripts that aggregate and
27 summarize the raw trace data in various ways (the list of scripts is 27 summarize the raw trace data in various ways (the list of scripts is
28 available via 'perf trace -l'). The following variants allow you to 28 available via 'perf script -l'). The following variants allow you to
29 record and run those scripts: 29 record and run those scripts:
30 30
31 'perf trace record <script> <command>' to record the events required 31 'perf script record <script> <command>' to record the events required
32 for 'perf trace report'. <script> is the name displayed in the 32 for 'perf script report'. <script> is the name displayed in the
33 output of 'perf trace --list' i.e. the actual script name minus any 33 output of 'perf script --list' i.e. the actual script name minus any
34 language extension. If <command> is not specified, the events are 34 language extension. If <command> is not specified, the events are
35 recorded using the -a (system-wide) 'perf record' option. 35 recorded using the -a (system-wide) 'perf record' option.
36 36
37 'perf trace report <script> [args]' to run and display the results 37 'perf script report <script> [args]' to run and display the results
38 of <script>. <script> is the name displayed in the output of 'perf 38 of <script>. <script> is the name displayed in the output of 'perf
39 trace --list' i.e. the actual script name minus any language 39 trace --list' i.e. the actual script name minus any language
40 extension. The perf.data output from a previous run of 'perf trace 40 extension. The perf.data output from a previous run of 'perf script
41 record <script>' is used and should be present for this command to 41 record <script>' is used and should be present for this command to
42 succeed. [args] refers to the (mainly optional) args expected by 42 succeed. [args] refers to the (mainly optional) args expected by
43 the script. 43 the script.
44 44
45 'perf trace <script> <required-script-args> <command>' to both 45 'perf script <script> <required-script-args> <command>' to both
46 record the events required for <script> and to run the <script> 46 record the events required for <script> and to run the <script>
47 using 'live-mode' i.e. without writing anything to disk. <script> 47 using 'live-mode' i.e. without writing anything to disk. <script>
48 is the name displayed in the output of 'perf trace --list' i.e. the 48 is the name displayed in the output of 'perf script --list' i.e. the
49 actual script name minus any language extension. If <command> is 49 actual script name minus any language extension. If <command> is
50 not specified, the events are recorded using the -a (system-wide) 50 not specified, the events are recorded using the -a (system-wide)
51 'perf record' option. If <script> has any required args, they 51 'perf record' option. If <script> has any required args, they
52 should be specified before <command>. This mode doesn't allow for 52 should be specified before <command>. This mode doesn't allow for
53 optional script args to be specified; if optional script args are 53 optional script args to be specified; if optional script args are
54 desired, they can be specified using separate 'perf trace record' 54 desired, they can be specified using separate 'perf script record'
55 and 'perf trace report' commands, with the stdout of the record step 55 and 'perf script report' commands, with the stdout of the record step
56 piped to the stdin of the report script, using the '-o -' and '-i -' 56 piped to the stdin of the report script, using the '-o -' and '-i -'
57 options of the corresponding commands. 57 options of the corresponding commands.
58 58
59 'perf trace <top-script>' to both record the events required for 59 'perf script <top-script>' to both record the events required for
60 <top-script> and to run the <top-script> using 'live-mode' 60 <top-script> and to run the <top-script> using 'live-mode'
61 i.e. without writing anything to disk. <top-script> is the name 61 i.e. without writing anything to disk. <top-script> is the name
62 displayed in the output of 'perf trace --list' i.e. the actual 62 displayed in the output of 'perf script --list' i.e. the actual
63 script name minus any language extension; a <top-script> is defined 63 script name minus any language extension; a <top-script> is defined
64 as any script name ending with the string 'top'. 64 as any script name ending with the string 'top'.
65 65
66 [<record-options>] can be passed to the record steps of 'perf trace 66 [<record-options>] can be passed to the record steps of 'perf script
67 record' and 'live-mode' variants; this isn't possible however for 67 record' and 'live-mode' variants; this isn't possible however for
68 <top-script> 'live-mode' or 'perf trace report' variants. 68 <top-script> 'live-mode' or 'perf script report' variants.
69 69
70 See the 'SEE ALSO' section for links to language-specific 70 See the 'SEE ALSO' section for links to language-specific
71 information on how to write and run your own trace scripts. 71 information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@ OPTIONS
76 Any command you can specify in a shell. 76 Any command you can specify in a shell.
77 77
78-D:: 78-D::
79--dump-raw-trace=:: 79--dump-raw-script=::
80 Display verbose dump of the trace data. 80 Display verbose dump of the trace data.
81 81
82-L:: 82-L::
@@ -95,7 +95,7 @@ OPTIONS
95 95
96-g:: 96-g::
97--gen-script=:: 97--gen-script=::
98 Generate perf-trace.[ext] starter script for given language, 98 Generate perf-script.[ext] starter script for given language,
99 using current perf.data. 99 using current perf.data.
100 100
101-a:: 101-a::
@@ -104,8 +104,15 @@ OPTIONS
104 normally don't - this option allows the latter to be run in 104 normally don't - this option allows the latter to be run in
105 system-wide mode. 105 system-wide mode.
106 106
107-i::
108--input=::
109 Input file name.
110
111-d::
112--debug-mode::
113 Do various checks like samples ordering and lost events.
107 114
108SEE ALSO 115SEE ALSO
109-------- 116--------
110linkperf:perf-record[1], linkperf:perf-trace-perl[1], 117linkperf:perf-record[1], linkperf:perf-script-perl[1],
111linkperf:perf-trace-python[1] 118linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d46b437..b6da7affbbee 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> 11'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
12'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] 12'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
13 13
14DESCRIPTION 14DESCRIPTION
15----------- 15-----------
@@ -35,24 +35,54 @@ OPTIONS
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36-p:: 36-p::
37--pid=<pid>:: 37--pid=<pid>::
38 stat events on existing pid 38 stat events on existing process id
39
40-t::
41--tid=<tid>::
42 stat events on existing thread id
43
39 44
40-a:: 45-a::
41 system-wide collection 46--all-cpus::
47 system-wide collection from all CPUs
42 48
43-c:: 49-c::
44 scale counter values 50--scale::
51 scale/normalize counter values
52
53-r::
54--repeat=<n>::
55 repeat command and print average + stddev (max: 100)
45 56
46-B:: 57-B::
58--big-num::
47 print large numbers with thousands' separators according to locale 59 print large numbers with thousands' separators according to locale
48 60
49-C:: 61-C::
50--cpu=:: 62--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a 63Count only on the list of CPUs provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 64comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary 65In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs. 66to activate system-wide monitoring. Default is to count on all CPUs.
55 67
68-A::
69--no-aggr::
70Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
71This option is only valid in system-wide mode.
72
73-n::
74--null::
75 null run - don't start any counters
76
77-v::
78--verbose::
79 be more verbose (show counter open errors, etc)
80
81-x SEP::
82--field-separator SEP::
83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP.
85
56EXAMPLES 86EXAMPLES
57-------- 87--------
58 88
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 1c4b5f5b7f71..2c3b462f64b0 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command does assorted sanity tests, initially thru linked routines but 15This command does assorted sanity tests, initially through linked routines but
16also will look for a directory with more tests in the form of scripts. 16also will look for a directory with more tests in the form of scripts.
17 17
18OPTIONS 18OPTIONS
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 1f9687663f2a..f6eb1cdafb77 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command generates and displays a performance counter profile in realtime. 15This command generates and displays a performance counter profile in real time.
16 16
17 17
18OPTIONS 18OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
27 27
28-C <cpu-list>:: 28-C <cpu-list>::
29--cpu=<cpu>:: 29--cpu=<cpu>::
30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a 30Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 31comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS. 32Default is to monitor all CPUS.
33 33
34-d <seconds>:: 34-d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
50--count-filter=<count>:: 50--count-filter=<count>::
51 Only display functions with more events than this. 51 Only display functions with more events than this.
52 52
53-g::
54--group::
55 Put the counters into a counter group.
56
53-F <freq>:: 57-F <freq>::
54--freq=<freq>:: 58--freq=<freq>::
55 Profile at this frequency. 59 Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
68 72
69-p <pid>:: 73-p <pid>::
70--pid=<pid>:: 74--pid=<pid>::
71 Profile events on existing pid. 75 Profile events on existing Process ID.
76
77-t <tid>::
78--tid=<tid>::
79 Profile events on existing thread ID.
72 80
73-r <priority>:: 81-r <priority>::
74--realtime=<priority>:: 82--realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
78--sym-annotate=<symbol>:: 86--sym-annotate=<symbol>::
79 Annotate this symbol. 87 Annotate this symbol.
80 88
89-K::
90--hide_kernel_symbols::
91 Hide kernel symbols.
92
93-U::
94--hide_user_symbols::
95 Hide user symbols.
96
97-D::
98--dump-symtab::
99 Dump the symbol table used for profiling.
100
81-v:: 101-v::
82--verbose:: 102--verbose::
83 Be more verbose (show counter open errors, etc). 103 Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c7fc0c8f0b8..c12659d8cb26 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@ include/linux/stringify.h
7lib/rbtree.c 7lib/rbtree.c
8include/linux/swab.h 8include/linux/swab.h
9arch/*/include/asm/unistd*.h 9arch/*/include/asm/unistd*.h
10arch/*/lib/memcpy*.S
10include/linux/poison.h 11include/linux/poison.h
11include/linux/magic.h 12include/linux/magic.h
12include/linux/hw_breakpoint.h 13include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f676a4b..ac6692cf5508 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
185 ARCH := x86 185 ARCH := x86
186endif 186endif
187ifeq ($(ARCH),x86_64) 187ifeq ($(ARCH),x86_64)
188 RAW_ARCH := x86_64
188 ARCH := x86 189 ARCH := x86
190 ARCH_CFLAGS := -DARCH_X86_64
191 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
189endif 192endif
190 193
191# CFLAGS and LDFLAGS are for the users to override from the command line. 194# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
375LIB_H += util/include/linux/rbtree.h 378LIB_H += util/include/linux/rbtree.h
376LIB_H += util/include/linux/string.h 379LIB_H += util/include/linux/string.h
377LIB_H += util/include/linux/types.h 380LIB_H += util/include/linux/types.h
381LIB_H += util/include/linux/linkage.h
378LIB_H += util/include/asm/asm-offsets.h 382LIB_H += util/include/asm/asm-offsets.h
379LIB_H += util/include/asm/bug.h 383LIB_H += util/include/asm/bug.h
380LIB_H += util/include/asm/byteorder.h 384LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
383LIB_H += util/include/asm/system.h 387LIB_H += util/include/asm/system.h
384LIB_H += util/include/asm/uaccess.h 388LIB_H += util/include/asm/uaccess.h
385LIB_H += util/include/dwarf-regs.h 389LIB_H += util/include/dwarf-regs.h
390LIB_H += util/include/asm/dwarf2.h
391LIB_H += util/include/asm/cpufeature.h
386LIB_H += perf.h 392LIB_H += perf.h
387LIB_H += util/cache.h 393LIB_H += util/cache.h
388LIB_H += util/callchain.h 394LIB_H += util/callchain.h
@@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h
417LIB_H += util/probe-event.h 423LIB_H += util/probe-event.h
418LIB_H += util/pstack.h 424LIB_H += util/pstack.h
419LIB_H += util/cpumap.h 425LIB_H += util/cpumap.h
426LIB_H += $(ARCH_INCLUDE)
420 427
421LIB_OBJS += $(OUTPUT)util/abspath.o 428LIB_OBJS += $(OUTPUT)util/abspath.o
422LIB_OBJS += $(OUTPUT)util/alias.o 429LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
472# Benchmark modules 479# Benchmark modules
473BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o 480BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
474BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 481BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
482ifeq ($(RAW_ARCH),x86_64)
483BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
484endif
475BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 485BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
476 486
477BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 487BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +495,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
485BUILTIN_OBJS += $(OUTPUT)builtin-stat.o 495BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
486BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o 496BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
487BUILTIN_OBJS += $(OUTPUT)builtin-top.o 497BUILTIN_OBJS += $(OUTPUT)builtin-top.o
488BUILTIN_OBJS += $(OUTPUT)builtin-trace.o 498BUILTIN_OBJS += $(OUTPUT)builtin-script.o
489BUILTIN_OBJS += $(OUTPUT)builtin-probe.o 499BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
490BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o 500BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
491BUILTIN_OBJS += $(OUTPUT)builtin-lock.o 501BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE)
507-include config.mak 517-include config.mak
508 518
509ifndef NO_DWARF 519ifndef NO_DWARF
510FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) 520FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
511ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) 521ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 522 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 523 NO_DWARF := 1
@@ -554,7 +564,7 @@ ifndef NO_DWARF
554ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) 564ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
555 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); 565 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
556else 566else
557 BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT 567 BASIC_CFLAGS += -DDWARF_SUPPORT
558 EXTLIBS += -lelf -ldw 568 EXTLIBS += -lelf -ldw
559 LIB_OBJS += $(OUTPUT)util/probe-finder.o 569 LIB_OBJS += $(OUTPUT)util/probe-finder.o
560endif # PERF_HAVE_DWARF_REGS 570endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +901,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
891SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) 901SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
892PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) 902PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
893 903
894LIBS = $(PERFLIBS) $(EXTLIBS) 904LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
895 905
896BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ 906BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
897 $(COMPAT_CFLAGS) 907 $(COMPAT_CFLAGS)
898LIB_OBJS += $(COMPAT_OBJS) 908LIB_OBJS += $(COMPAT_OBJS)
899 909
900ALL_CFLAGS += $(BASIC_CFLAGS) 910ALL_CFLAGS += $(BASIC_CFLAGS)
911ALL_CFLAGS += $(ARCH_CFLAGS)
901ALL_LDFLAGS += $(BASIC_LDFLAGS) 912ALL_LDFLAGS += $(BASIC_LDFLAGS)
902 913
903export TAR INSTALL DESTDIR SHELL_PATH 914export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 000000000000..a72e36cb5394
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMCPY_FN(fn, name, desc) \
5 extern void *fn(void *, const void *, size_t);
6
7#include "mem-memcpy-x86-64-asm-def.h"
8
9#undef MEMCPY_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 000000000000..d588b87696fc
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,4 @@
1
2MEMCPY_FN(__memcpy,
3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 000000000000..a57b66e853c2
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,2 @@
1
2#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae7465142..db82021f4b91 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -12,6 +12,7 @@
12#include "../util/parse-options.h" 12#include "../util/parse-options.h"
13#include "../util/header.h" 13#include "../util/header.h"
14#include "bench.h" 14#include "bench.h"
15#include "mem-memcpy-arch.h"
15 16
16#include <stdio.h> 17#include <stdio.h>
17#include <stdlib.h> 18#include <stdlib.h>
@@ -23,8 +24,10 @@
23 24
24static const char *length_str = "1MB"; 25static const char *length_str = "1MB";
25static const char *routine = "default"; 26static const char *routine = "default";
26static bool use_clock = false; 27static bool use_clock;
27static int clock_fd; 28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
28 31
29static const struct option options[] = { 32static const struct option options[] = {
30 OPT_STRING('l', "length", &length_str, "1MB", 33 OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
34 "Specify routine to copy"), 37 "Specify routine to copy"),
35 OPT_BOOLEAN('c', "clock", &use_clock, 38 OPT_BOOLEAN('c', "clock", &use_clock,
36 "Use CPU clock for measuring"), 39 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
41 "Show only the result with page faults before memcpy()"),
42 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
43 "Show only the result without page faults before memcpy()"),
37 OPT_END() 44 OPT_END()
38}; 45};
39 46
47typedef void *(*memcpy_t)(void *, const void *, size_t);
48
40struct routine { 49struct routine {
41 const char *name; 50 const char *name;
42 const char *desc; 51 const char *desc;
43 void * (*fn)(void *dst, const void *src, size_t len); 52 memcpy_t fn;
44}; 53};
45 54
46struct routine routines[] = { 55struct routine routines[] = {
47 { "default", 56 { "default",
48 "Default memcpy() provided by glibc", 57 "Default memcpy() provided by glibc",
49 memcpy }, 58 memcpy },
59#ifdef ARCH_X86_64
60
61#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
62#include "mem-memcpy-x86-64-asm-def.h"
63#undef MEMCPY_FN
64
65#endif
66
50 { NULL, 67 { NULL,
51 NULL, 68 NULL,
52 NULL } 69 NULL }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
89 (double)ts->tv_usec / (double)1000000; 106 (double)ts->tv_usec / (double)1000000;
90} 107}
91 108
109static void alloc_mem(void **dst, void **src, size_t length)
110{
111 *dst = zalloc(length);
112 if (!dst)
113 die("memory allocation failed - maybe length is too large?\n");
114
115 *src = zalloc(length);
116 if (!src)
117 die("memory allocation failed - maybe length is too large?\n");
118}
119
120static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121{
122 u64 clock_start = 0ULL, clock_end = 0ULL;
123 void *src = NULL, *dst = NULL;
124
125 alloc_mem(&src, &dst, len);
126
127 if (prefault)
128 fn(dst, src, len);
129
130 clock_start = get_clock();
131 fn(dst, src, len);
132 clock_end = get_clock();
133
134 free(src);
135 free(dst);
136 return clock_end - clock_start;
137}
138
139static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140{
141 struct timeval tv_start, tv_end, tv_diff;
142 void *src = NULL, *dst = NULL;
143
144 alloc_mem(&src, &dst, len);
145
146 if (prefault)
147 fn(dst, src, len);
148
149 BUG_ON(gettimeofday(&tv_start, NULL));
150 fn(dst, src, len);
151 BUG_ON(gettimeofday(&tv_end, NULL));
152
153 timersub(&tv_end, &tv_start, &tv_diff);
154
155 free(src);
156 free(dst);
157 return (double)((double)len / timeval2double(&tv_diff));
158}
159
160#define pf (no_prefault ? 0 : 1)
161
162#define print_bps(x) do { \
163 if (x < K) \
164 printf(" %14lf B/Sec", x); \
165 else if (x < K * K) \
166 printf(" %14lfd KB/Sec", x / K); \
167 else if (x < K * K * K) \
168 printf(" %14lf MB/Sec", x / K / K); \
169 else \
170 printf(" %14lf GB/Sec", x / K / K / K); \
171 } while (0)
172
92int bench_mem_memcpy(int argc, const char **argv, 173int bench_mem_memcpy(int argc, const char **argv,
93 const char *prefix __used) 174 const char *prefix __used)
94{ 175{
95 int i; 176 int i;
96 void *dst, *src; 177 size_t len;
97 size_t length; 178 double result_bps[2];
98 double bps = 0.0; 179 u64 result_clock[2];
99 struct timeval tv_start, tv_end, tv_diff;
100 u64 clock_start, clock_end, clock_diff;
101 180
102 clock_start = clock_end = clock_diff = 0ULL;
103 argc = parse_options(argc, argv, options, 181 argc = parse_options(argc, argv, options,
104 bench_mem_memcpy_usage, 0); 182 bench_mem_memcpy_usage, 0);
105 183
106 tv_diff.tv_sec = 0; 184 if (use_clock)
107 tv_diff.tv_usec = 0; 185 init_clock();
108 length = (size_t)perf_atoll((char *)length_str); 186
187 len = (size_t)perf_atoll((char *)length_str);
109 188
110 if ((s64)length <= 0) { 189 result_clock[0] = result_clock[1] = 0ULL;
190 result_bps[0] = result_bps[1] = 0.0;
191
192 if ((s64)len <= 0) {
111 fprintf(stderr, "Invalid length:%s\n", length_str); 193 fprintf(stderr, "Invalid length:%s\n", length_str);
112 return 1; 194 return 1;
113 } 195 }
114 196
197 /* same to without specifying either of prefault and no-prefault */
198 if (only_prefault && no_prefault)
199 only_prefault = no_prefault = false;
200
115 for (i = 0; routines[i].name; i++) { 201 for (i = 0; routines[i].name; i++) {
116 if (!strcmp(routines[i].name, routine)) 202 if (!strcmp(routines[i].name, routine))
117 break; 203 break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
126 return 1; 212 return 1;
127 } 213 }
128 214
129 dst = zalloc(length); 215 if (bench_format == BENCH_FORMAT_DEFAULT)
130 if (!dst) 216 printf("# Copying %s Bytes ...\n\n", length_str);
131 die("memory allocation failed - maybe length is too large?\n");
132
133 src = zalloc(length);
134 if (!src)
135 die("memory allocation failed - maybe length is too large?\n");
136
137 if (bench_format == BENCH_FORMAT_DEFAULT) {
138 printf("# Copying %s Bytes from %p to %p ...\n\n",
139 length_str, src, dst);
140 }
141
142 if (use_clock) {
143 init_clock();
144 clock_start = get_clock();
145 } else {
146 BUG_ON(gettimeofday(&tv_start, NULL));
147 }
148
149 routines[i].fn(dst, src, length);
150 217
151 if (use_clock) { 218 if (!only_prefault && !no_prefault) {
152 clock_end = get_clock(); 219 /* show both of results */
153 clock_diff = clock_end - clock_start; 220 if (use_clock) {
221 result_clock[0] =
222 do_memcpy_clock(routines[i].fn, len, false);
223 result_clock[1] =
224 do_memcpy_clock(routines[i].fn, len, true);
225 } else {
226 result_bps[0] =
227 do_memcpy_gettimeofday(routines[i].fn,
228 len, false);
229 result_bps[1] =
230 do_memcpy_gettimeofday(routines[i].fn,
231 len, true);
232 }
154 } else { 233 } else {
155 BUG_ON(gettimeofday(&tv_end, NULL)); 234 if (use_clock) {
156 timersub(&tv_end, &tv_start, &tv_diff); 235 result_clock[pf] =
157 bps = (double)((double)length / timeval2double(&tv_diff)); 236 do_memcpy_clock(routines[i].fn,
237 len, only_prefault);
238 } else {
239 result_bps[pf] =
240 do_memcpy_gettimeofday(routines[i].fn,
241 len, only_prefault);
242 }
158 } 243 }
159 244
160 switch (bench_format) { 245 switch (bench_format) {
161 case BENCH_FORMAT_DEFAULT: 246 case BENCH_FORMAT_DEFAULT:
162 if (use_clock) { 247 if (!only_prefault && !no_prefault) {
163 printf(" %14lf Clock/Byte\n", 248 if (use_clock) {
164 (double)clock_diff / (double)length); 249 printf(" %14lf Clock/Byte\n",
165 } else { 250 (double)result_clock[0]
166 if (bps < K) 251 / (double)len);
167 printf(" %14lf B/Sec\n", bps); 252 printf(" %14lf Clock/Byte (with prefault)\n",
168 else if (bps < K * K) 253 (double)result_clock[1]
169 printf(" %14lfd KB/Sec\n", bps / 1024); 254 / (double)len);
170 else if (bps < K * K * K) 255 } else {
171 printf(" %14lf MB/Sec\n", bps / 1024 / 1024); 256 print_bps(result_bps[0]);
172 else { 257 printf("\n");
173 printf(" %14lf GB/Sec\n", 258 print_bps(result_bps[1]);
174 bps / 1024 / 1024 / 1024); 259 printf(" (with prefault)\n");
175 } 260 }
261 } else {
262 if (use_clock) {
263 printf(" %14lf Clock/Byte",
264 (double)result_clock[pf]
265 / (double)len);
266 } else
267 print_bps(result_bps[pf]);
268
269 printf("%s\n", only_prefault ? " (with prefault)" : "");
176 } 270 }
177 break; 271 break;
178 case BENCH_FORMAT_SIMPLE: 272 case BENCH_FORMAT_SIMPLE:
179 if (use_clock) { 273 if (!only_prefault && !no_prefault) {
180 printf("%14lf\n", 274 if (use_clock) {
181 (double)clock_diff / (double)length); 275 printf("%lf %lf\n",
182 } else 276 (double)result_clock[0] / (double)len,
183 printf("%lf\n", bps); 277 (double)result_clock[1] / (double)len);
278 } else {
279 printf("%lf %lf\n",
280 result_bps[0], result_bps[1]);
281 }
282 } else {
283 if (use_clock) {
284 printf("%lf\n", (double)result_clock[pf]
285 / (double)len);
286 } else
287 printf("%lf\n", result_bps[pf]);
288 }
184 break; 289 break;
185 default: 290 default:
186 /* reaching this means there's some disaster: */ 291 /* reaching this means there's some disaster: */
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6d5604d8df95..569a2761b90a 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
58 return hist_entry__inc_addr_samples(he, al->addr); 58 return hist_entry__inc_addr_samples(he, al->addr);
59} 59}
60 60
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct sample_data *sample,
62 struct perf_session *session)
62{ 63{
63 struct addr_location al; 64 struct addr_location al;
64 struct sample_data data;
65 65
66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 68 event->header.type);
69 return -1; 69 return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index fca1d4402910..5e1a043aae03 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 30 return -ENOMEM;
31} 31}
32 32
33static int diff__process_sample_event(event_t *event, struct perf_session *session) 33static int diff__process_sample_event(event_t *event,
34 struct sample_data *sample,
35 struct perf_session *session)
34{ 36{
35 struct addr_location al; 37 struct addr_location al;
36 struct sample_data data = { .period = 1, };
37 38
38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
39 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
40 event->header.type); 41 event->header.type);
41 return -1; 42 return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
44 if (al.filtered || al.sym == NULL) 45 if (al.filtered || al.sym == NULL)
45 return 0; 46 return 0;
46 47
47 if (hists__add_entry(&session->hists, &al, data.period)) { 48 if (hists__add_entry(&session->hists, &al, sample->period)) {
48 pr_warning("problem incrementing symbol period, skipping event\n"); 49 pr_warning("problem incrementing symbol period, skipping event\n");
49 return -1; 50 return -1;
50 } 51 }
51 52
52 session->hists.stats.total_period += data.period; 53 session->hists.stats.total_period += sample->period;
53 return 0; 54 return 0;
54} 55}
55 56
@@ -173,7 +174,7 @@ static const char * const diff_usage[] = {
173static const struct option options[] = { 174static const struct option options[] = {
174 OPT_INCR('v', "verbose", &verbose, 175 OPT_INCR('v', "verbose", &verbose,
175 "be more verbose (show symbol address, etc)"), 176 "be more verbose (show symbol address, etc)"),
176 OPT_BOOLEAN('m', "displacement", &show_displacement, 177 OPT_BOOLEAN('M', "displacement", &show_displacement,
177 "Show position displacement relative to baseline"), 178 "Show position displacement relative to baseline"),
178 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 179 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
179 "dump raw trace in ASCII"), 180 "dump raw trace in ASCII"),
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8e3e47b064ce..4b66b8579410 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
16static char const *input_name = "-"; 16static char const *input_name = "-";
17static bool inject_build_ids; 17static bool inject_build_ids;
18 18
19static int event__repipe(event_t *event __used, 19static int event__repipe_synth(event_t *event,
20 struct perf_session *session __used) 20 struct perf_session *session __used)
21{ 21{
22 uint32_t size; 22 uint32_t size;
23 void *buf = event; 23 void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
36 return 0; 36 return 0;
37} 37}
38 38
39static int event__repipe_mmap(event_t *self, struct perf_session *session) 39static int event__repipe(event_t *event, struct sample_data *sample __used,
40 struct perf_session *session)
41{
42 return event__repipe_synth(event, session);
43}
44
45static int event__repipe_mmap(event_t *self, struct sample_data *sample,
46 struct perf_session *session)
40{ 47{
41 int err; 48 int err;
42 49
43 err = event__process_mmap(self, session); 50 err = event__process_mmap(self, sample, session);
44 event__repipe(self, session); 51 event__repipe(self, sample, session);
45 52
46 return err; 53 return err;
47} 54}
48 55
49static int event__repipe_task(event_t *self, struct perf_session *session) 56static int event__repipe_task(event_t *self, struct sample_data *sample,
57 struct perf_session *session)
50{ 58{
51 int err; 59 int err;
52 60
53 err = event__process_task(self, session); 61 err = event__process_task(self, sample, session);
54 event__repipe(self, session); 62 event__repipe(self, sample, session);
55 63
56 return err; 64 return err;
57} 65}
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
61{ 69{
62 int err; 70 int err;
63 71
64 event__repipe(self, session); 72 event__repipe_synth(self, session);
65 err = event__process_tracing_data(self, session); 73 err = event__process_tracing_data(self, session);
66 74
67 return err; 75 return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
111 return 0; 119 return 0;
112} 120}
113 121
114static int event__inject_buildid(event_t *event, struct perf_session *session) 122static int event__inject_buildid(event_t *event, struct sample_data *sample,
123 struct perf_session *session)
115{ 124{
116 struct addr_location al; 125 struct addr_location al;
117 struct thread *thread; 126 struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
146 } 155 }
147 156
148repipe: 157repipe:
149 event__repipe(event, session); 158 event__repipe(event, sample, session);
150 return 0; 159 return 0;
151} 160}
152 161
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
160 .read = event__repipe, 169 .read = event__repipe,
161 .throttle = event__repipe, 170 .throttle = event__repipe,
162 .unthrottle = event__repipe, 171 .unthrottle = event__repipe,
163 .attr = event__repipe, 172 .attr = event__repipe_synth,
164 .event_type = event__repipe, 173 .event_type = event__repipe_synth,
165 .tracing_data = event__repipe, 174 .tracing_data = event__repipe_synth,
166 .build_id = event__repipe, 175 .build_id = event__repipe_synth,
167}; 176};
168 177
169extern volatile int session_done; 178extern volatile int session_done;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 31f60a2535e0..c9620ff6496f 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
304 } 304 }
305} 305}
306 306
307static int process_sample_event(event_t *event, struct perf_session *session) 307static int process_sample_event(event_t *event, struct sample_data *sample,
308 struct perf_session *session)
308{ 309{
309 struct sample_data data; 310 struct thread *thread = perf_session__findnew(session, event->ip.pid);
310 struct thread *thread;
311 311
312 memset(&data, 0, sizeof(data));
313 data.time = -1;
314 data.cpu = -1;
315 data.period = 1;
316
317 event__parse_sample(event, session->sample_type, &data);
318
319 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
320 data.pid, data.tid, data.ip, data.period);
321
322 thread = perf_session__findnew(session, event->ip.pid);
323 if (thread == NULL) { 312 if (thread == NULL) {
324 pr_debug("problem processing %d event, skipping it.\n", 313 pr_debug("problem processing %d event, skipping it.\n",
325 event->header.type); 314 event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
328 317
329 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 318 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
330 319
331 process_raw_event(event, data.raw_data, data.cpu, 320 process_raw_event(event, sample->raw_data, sample->cpu,
332 data.time, thread); 321 sample->time, thread);
333 322
334 return 0; 323 return 0;
335} 324}
@@ -747,6 +736,9 @@ static int __cmd_record(int argc, const char **argv)
747 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 736 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
748 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 737 rec_argv = calloc(rec_argc + 1, sizeof(char *));
749 738
739 if (rec_argv == NULL)
740 return -ENOMEM;
741
750 for (i = 0; i < ARRAY_SIZE(record_args); i++) 742 for (i = 0; i < ARRAY_SIZE(record_args); i++)
751 rec_argv[i] = strdup(record_args[i]); 743 rec_argv[i] = strdup(record_args[i]);
752 744
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 821c1586a22b..b41b4492b1cc 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,22 +834,18 @@ static void dump_info(void)
834 die("Unknown type of information\n"); 834 die("Unknown type of information\n");
835} 835}
836 836
837static int process_sample_event(event_t *self, struct perf_session *s) 837static int process_sample_event(event_t *self, struct sample_data *sample,
838 struct perf_session *s)
838{ 839{
839 struct sample_data data; 840 struct thread *thread = perf_session__findnew(s, sample->tid);
840 struct thread *thread;
841 841
842 bzero(&data, sizeof(data));
843 event__parse_sample(self, s->sample_type, &data);
844
845 thread = perf_session__findnew(s, data.tid);
846 if (thread == NULL) { 842 if (thread == NULL) {
847 pr_debug("problem processing %d event, skipping it.\n", 843 pr_debug("problem processing %d event, skipping it.\n",
848 self->header.type); 844 self->header.type);
849 return -1; 845 return -1;
850 } 846 }
851 847
852 process_raw_event(data.raw_data, data.cpu, data.time, thread); 848 process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
853 849
854 return 0; 850 return 0;
855} 851}
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
947 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 943 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
948 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 944 rec_argv = calloc(rec_argc + 1, sizeof(char *));
949 945
946 if (rec_argv == NULL)
947 return -ENOMEM;
948
950 for (i = 0; i < ARRAY_SIZE(record_args); i++) 949 for (i = 0; i < ARRAY_SIZE(record_args); i++)
951 rec_argv[i] = strdup(record_args[i]); 950 rec_argv[i] = strdup(record_args[i]);
952 951
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
982 usage_with_options(report_usage, report_options); 981 usage_with_options(report_usage, report_options);
983 } 982 }
984 __cmd_report(); 983 __cmd_report();
985 } else if (!strcmp(argv[0], "trace")) { 984 } else if (!strcmp(argv[0], "script")) {
986 /* Aliased to 'perf trace' */ 985 /* Aliased to 'perf script' */
987 return cmd_trace(argc, argv, prefix); 986 return cmd_script(argc, argv, prefix);
988 } else if (!strcmp(argv[0], "info")) { 987 } else if (!strcmp(argv[0], "info")) {
989 if (argc) { 988 if (argc) {
990 argc = parse_options(argc, argv, 989 argc = parse_options(argc, argv,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 564491fa18b2..e9be6ae87a27 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -36,6 +36,7 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
36 36
37static u64 user_interval = ULLONG_MAX; 37static u64 user_interval = ULLONG_MAX;
38static u64 default_interval = 0; 38static u64 default_interval = 0;
39static u64 sample_type;
39 40
40static int nr_cpus = 0; 41static int nr_cpus = 0;
41static unsigned int page_size; 42static unsigned int page_size;
@@ -48,6 +49,7 @@ static const char *output_name = "perf.data";
48static int group = 0; 49static int group = 0;
49static int realtime_prio = 0; 50static int realtime_prio = 0;
50static bool raw_samples = false; 51static bool raw_samples = false;
52static bool sample_id_all_avail = true;
51static bool system_wide = false; 53static bool system_wide = false;
52static pid_t target_pid = -1; 54static pid_t target_pid = -1;
53static pid_t target_tid = -1; 55static pid_t target_tid = -1;
@@ -60,7 +62,9 @@ static bool call_graph = false;
60static bool inherit_stat = false; 62static bool inherit_stat = false;
61static bool no_samples = false; 63static bool no_samples = false;
62static bool sample_address = false; 64static bool sample_address = false;
65static bool sample_time = false;
63static bool no_buildid = false; 66static bool no_buildid = false;
67static bool no_buildid_cache = false;
64 68
65static long samples = 0; 69static long samples = 0;
66static u64 bytes_written = 0; 70static u64 bytes_written = 0;
@@ -128,6 +132,7 @@ static void write_output(void *buf, size_t size)
128} 132}
129 133
130static int process_synthesized_event(event_t *event, 134static int process_synthesized_event(event_t *event,
135 struct sample_data *sample __used,
131 struct perf_session *self __used) 136 struct perf_session *self __used)
132{ 137{
133 write_output(event, event->header.size); 138 write_output(event, event->header.size);
@@ -280,12 +285,18 @@ static void create_counter(int counter, int cpu)
280 if (system_wide) 285 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU; 286 attr->sample_type |= PERF_SAMPLE_CPU;
282 287
288 if (sample_time)
289 attr->sample_type |= PERF_SAMPLE_TIME;
290
283 if (raw_samples) { 291 if (raw_samples) {
284 attr->sample_type |= PERF_SAMPLE_TIME; 292 attr->sample_type |= PERF_SAMPLE_TIME;
285 attr->sample_type |= PERF_SAMPLE_RAW; 293 attr->sample_type |= PERF_SAMPLE_RAW;
286 attr->sample_type |= PERF_SAMPLE_CPU; 294 attr->sample_type |= PERF_SAMPLE_CPU;
287 } 295 }
288 296
297 if (!sample_type)
298 sample_type = attr->sample_type;
299
289 attr->mmap = track; 300 attr->mmap = track;
290 attr->comm = track; 301 attr->comm = track;
291 attr->inherit = !no_inherit; 302 attr->inherit = !no_inherit;
@@ -293,6 +304,8 @@ static void create_counter(int counter, int cpu)
293 attr->disabled = 1; 304 attr->disabled = 1;
294 attr->enable_on_exec = 1; 305 attr->enable_on_exec = 1;
295 } 306 }
307retry_sample_id:
308 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
296 309
297 for (thread_index = 0; thread_index < thread_num; thread_index++) { 310 for (thread_index = 0; thread_index < thread_num; thread_index++) {
298try_again: 311try_again:
@@ -309,6 +322,12 @@ try_again:
309 else if (err == ENODEV && cpu_list) { 322 else if (err == ENODEV && cpu_list) {
310 die("No such device - did you specify" 323 die("No such device - did you specify"
311 " an out-of-range profile CPU?\n"); 324 " an out-of-range profile CPU?\n");
325 } else if (err == EINVAL && sample_id_all_avail) {
326 /*
327 * Old kernel, no attr->sample_id_type_all field
328 */
329 sample_id_all_avail = false;
330 goto retry_sample_id;
312 } 331 }
313 332
314 /* 333 /*
@@ -326,7 +345,7 @@ try_again:
326 goto try_again; 345 goto try_again;
327 } 346 }
328 printf("\n"); 347 printf("\n");
329 error("perfcounter syscall returned with %d (%s)\n", 348 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
330 fd[nr_cpu][counter][thread_index], strerror(err)); 349 fd[nr_cpu][counter][thread_index], strerror(err));
331 350
332#if defined(__i386__) || defined(__x86_64__) 351#if defined(__i386__) || defined(__x86_64__)
@@ -437,7 +456,8 @@ static void atexit_header(void)
437 if (!pipe_output) { 456 if (!pipe_output) {
438 session->header.data_size += bytes_written; 457 session->header.data_size += bytes_written;
439 458
440 process_buildids(); 459 if (!no_buildid)
460 process_buildids();
441 perf_header__write(&session->header, output, true); 461 perf_header__write(&session->header, output, true);
442 perf_session__delete(session); 462 perf_session__delete(session);
443 symbol__exit(); 463 symbol__exit();
@@ -558,6 +578,9 @@ static int __cmd_record(int argc, const char **argv)
558 return -1; 578 return -1;
559 } 579 }
560 580
581 if (!no_buildid)
582 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
583
561 if (!file_new) { 584 if (!file_new) {
562 err = perf_header__read(session, output); 585 err = perf_header__read(session, output);
563 if (err < 0) 586 if (err < 0)
@@ -639,6 +662,8 @@ static int __cmd_record(int argc, const char **argv)
639 open_counters(cpumap[i]); 662 open_counters(cpumap[i]);
640 } 663 }
641 664
665 perf_session__set_sample_type(session, sample_type);
666
642 if (pipe_output) { 667 if (pipe_output) {
643 err = perf_header__write_pipe(output); 668 err = perf_header__write_pipe(output);
644 if (err < 0) 669 if (err < 0)
@@ -651,6 +676,8 @@ static int __cmd_record(int argc, const char **argv)
651 676
652 post_processing_offset = lseek(output, 0, SEEK_CUR); 677 post_processing_offset = lseek(output, 0, SEEK_CUR);
653 678
679 perf_session__set_sample_id_all(session, sample_id_all_avail);
680
654 if (pipe_output) { 681 if (pipe_output) {
655 err = event__synthesize_attrs(&session->header, 682 err = event__synthesize_attrs(&session->header,
656 process_synthesized_event, 683 process_synthesized_event,
@@ -831,10 +858,13 @@ const struct option record_options[] = {
831 "per thread counts"), 858 "per thread counts"),
832 OPT_BOOLEAN('d', "data", &sample_address, 859 OPT_BOOLEAN('d', "data", &sample_address,
833 "Sample addresses"), 860 "Sample addresses"),
861 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
834 OPT_BOOLEAN('n', "no-samples", &no_samples, 862 OPT_BOOLEAN('n', "no-samples", &no_samples,
835 "don't sample"), 863 "don't sample"),
836 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid, 864 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
837 "do not update the buildid cache"), 865 "do not update the buildid cache"),
866 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
867 "do not collect buildids in perf.data"),
838 OPT_END() 868 OPT_END()
839}; 869};
840 870
@@ -859,7 +889,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
859 } 889 }
860 890
861 symbol__init(); 891 symbol__init();
862 if (no_buildid) 892
893 if (no_buildid_cache || no_buildid)
863 disable_buildid_cache(); 894 disable_buildid_cache();
864 895
865 if (!nr_counters) { 896 if (!nr_counters) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5de405d45230..b6a2a899aa8f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
150 return 0; 150 return 0;
151} 151}
152 152
153static int process_sample_event(event_t *event, struct perf_session *session) 153static int process_sample_event(event_t *event, struct sample_data *sample,
154 struct perf_session *session)
154{ 155{
155 struct sample_data data = { .period = 1, };
156 struct addr_location al; 156 struct addr_location al;
157 struct perf_event_attr *attr; 157 struct perf_event_attr *attr;
158 158
159 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 160 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 161 event->header.type);
162 return -1; 162 return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
165 if (al.filtered || (hide_unresolved && al.sym == NULL)) 165 if (al.filtered || (hide_unresolved && al.sym == NULL))
166 return 0; 166 return 0;
167 167
168 if (perf_session__add_hist_entry(session, &al, &data)) { 168 if (perf_session__add_hist_entry(session, &al, sample)) {
169 pr_debug("problem incrementing symbol period, skipping event\n"); 169 pr_debug("problem incrementing symbol period, skipping event\n");
170 return -1; 170 return -1;
171 } 171 }
172 172
173 attr = perf_header__find_attr(data.id, &session->header); 173 attr = perf_header__find_attr(sample->id, &session->header);
174 174
175 if (add_event_total(session, &data, attr)) { 175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n"); 176 pr_debug("problem adding event period\n");
177 return -1; 177 return -1;
178 } 178 }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
180 return 0; 180 return 0;
181} 181}
182 182
183static int process_read_event(event_t *event, struct perf_session *session __used) 183static int process_read_event(event_t *event, struct sample_data *sample __used,
184 struct perf_session *session __used)
184{ 185{
185 struct perf_event_attr *attr; 186 struct perf_event_attr *attr;
186 187
@@ -442,6 +443,8 @@ static const struct option options[] = {
442 "dump raw trace in ASCII"), 443 "dump raw trace in ASCII"),
443 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 444 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
444 "file", "vmlinux pathname"), 445 "file", "vmlinux pathname"),
446 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
447 "file", "kallsyms pathname"),
445 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 448 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
446 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 449 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
447 "load module symbols - WARNING: use only with -k and LIVE kernel"), 450 "load module symbols - WARNING: use only with -k and LIVE kernel"),
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 55f3b5dcc731..c7753940aea0 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1607} 1607}
1608 1608
1609static int process_sample_event(event_t *event, struct perf_session *session) 1609static int process_sample_event(event_t *event, struct sample_data *sample,
1610 struct perf_session *session)
1610{ 1611{
1611 struct sample_data data;
1612 struct thread *thread; 1612 struct thread *thread;
1613 1613
1614 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1614 if (!(session->sample_type & PERF_SAMPLE_RAW))
1615 return 0; 1615 return 0;
1616 1616
1617 memset(&data, 0, sizeof(data)); 1617 thread = perf_session__findnew(session, sample->pid);
1618 data.time = -1;
1619 data.cpu = -1;
1620 data.period = -1;
1621
1622 event__parse_sample(event, session->sample_type, &data);
1623
1624 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
1625 data.pid, data.tid, data.ip, data.period);
1626
1627 thread = perf_session__findnew(session, data.pid);
1628 if (thread == NULL) { 1618 if (thread == NULL) {
1629 pr_debug("problem processing %d event, skipping it.\n", 1619 pr_debug("problem processing %d event, skipping it.\n",
1630 event->header.type); 1620 event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
1633 1623
1634 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1624 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1635 1625
1636 if (profile_cpu != -1 && profile_cpu != (int)data.cpu) 1626 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
1637 return 0; 1627 return 0;
1638 1628
1639 process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread); 1629 process_raw_event(event, session, sample->raw_data, sample->cpu,
1630 sample->time, thread);
1640 1631
1641 return 0; 1632 return 0;
1642} 1633}
@@ -1869,6 +1860,9 @@ static int __cmd_record(int argc, const char **argv)
1869 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1860 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1870 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1861 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1871 1862
1863 if (rec_argv)
1864 return -ENOMEM;
1865
1872 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1866 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1873 rec_argv[i] = strdup(record_args[i]); 1867 rec_argv[i] = strdup(record_args[i]);
1874 1868
@@ -1888,10 +1882,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1888 usage_with_options(sched_usage, sched_options); 1882 usage_with_options(sched_usage, sched_options);
1889 1883
1890 /* 1884 /*
1891 * Aliased to 'perf trace' for now: 1885 * Aliased to 'perf script' for now:
1892 */ 1886 */
1893 if (!strcmp(argv[0], "trace")) 1887 if (!strcmp(argv[0], "script"))
1894 return cmd_trace(argc, argv, prefix); 1888 return cmd_script(argc, argv, prefix);
1895 1889
1896 symbol__init(); 1890 symbol__init();
1897 if (!strncmp(argv[0], "rec", 3)) { 1891 if (!strncmp(argv[0], "rec", 3)) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c
index 86cfe3800e6b..54f1ea808db5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-script.c
@@ -56,29 +56,18 @@ static void setup_scripting(void)
56 56
57static int cleanup_scripting(void) 57static int cleanup_scripting(void)
58{ 58{
59 pr_debug("\nperf trace script stopped\n"); 59 pr_debug("\nperf script stopped\n");
60 60
61 return scripting_ops->stop_script(); 61 return scripting_ops->stop_script();
62} 62}
63 63
64static char const *input_name = "perf.data"; 64static char const *input_name = "perf.data";
65 65
66static int process_sample_event(event_t *event, struct perf_session *session) 66static int process_sample_event(event_t *event, struct sample_data *sample,
67 struct perf_session *session)
67{ 68{
68 struct sample_data data; 69 struct thread *thread = perf_session__findnew(session, event->ip.pid);
69 struct thread *thread;
70 70
71 memset(&data, 0, sizeof(data));
72 data.time = -1;
73 data.cpu = -1;
74 data.period = 1;
75
76 event__parse_sample(event, session->sample_type, &data);
77
78 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
79 data.pid, data.tid, data.ip, data.period);
80
81 thread = perf_session__findnew(session, event->ip.pid);
82 if (thread == NULL) { 71 if (thread == NULL) {
83 pr_debug("problem processing %d event, skipping it.\n", 72 pr_debug("problem processing %d event, skipping it.\n",
84 event->header.type); 73 event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
87 76
88 if (session->sample_type & PERF_SAMPLE_RAW) { 77 if (session->sample_type & PERF_SAMPLE_RAW) {
89 if (debug_mode) { 78 if (debug_mode) {
90 if (data.time < last_timestamp) { 79 if (sample->time < last_timestamp) {
91 pr_err("Samples misordered, previous: %llu " 80 pr_err("Samples misordered, previous: %llu "
92 "this: %llu\n", last_timestamp, 81 "this: %llu\n", last_timestamp,
93 data.time); 82 sample->time);
94 nr_unordered++; 83 nr_unordered++;
95 } 84 }
96 last_timestamp = data.time; 85 last_timestamp = sample->time;
97 return 0; 86 return 0;
98 } 87 }
99 /* 88 /*
@@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
101 * field, although it should be the same than this perf 90 * field, although it should be the same than this perf
102 * event pid 91 * event pid
103 */ 92 */
104 scripting_ops->process_event(data.cpu, data.raw_data, 93 scripting_ops->process_event(sample->cpu, sample->raw_data,
105 data.raw_size, 94 sample->raw_size,
106 data.time, thread->comm); 95 sample->time, thread->comm);
107 } 96 }
108 97
109 session->hists.stats.total_period += data.period; 98 session->hists.stats.total_period += sample->period;
110 return 0; 99 return 0;
111} 100}
112 101
113static u64 nr_lost; 102static u64 nr_lost;
114 103
115static int process_lost_event(event_t *event, struct perf_session *session __used) 104static int process_lost_event(event_t *event, struct sample_data *sample __used,
105 struct perf_session *session __used)
116{ 106{
117 nr_lost += event->lost.lost; 107 nr_lost += event->lost.lost;
118 108
@@ -137,7 +127,7 @@ static void sig_handler(int sig __unused)
137 session_done = 1; 127 session_done = 1;
138} 128}
139 129
140static int __cmd_trace(struct perf_session *session) 130static int __cmd_script(struct perf_session *session)
141{ 131{
142 int ret; 132 int ret;
143 133
@@ -247,7 +237,7 @@ static void list_available_languages(void)
247 237
248 fprintf(stderr, "\n"); 238 fprintf(stderr, "\n");
249 fprintf(stderr, "Scripting language extensions (used in " 239 fprintf(stderr, "Scripting language extensions (used in "
250 "perf trace -s [spec:]script.[spec]):\n\n"); 240 "perf script -s [spec:]script.[spec]):\n\n");
251 241
252 list_for_each_entry(s, &script_specs, node) 242 list_for_each_entry(s, &script_specs, node)
253 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); 243 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +291,34 @@ static int parse_scriptname(const struct option *opt __used,
301 return 0; 291 return 0;
302} 292}
303 293
304#define for_each_lang(scripts_dir, lang_dirent, lang_next) \ 294/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
295static int is_directory(const char *base_path, const struct dirent *dent)
296{
297 char path[PATH_MAX];
298 struct stat st;
299
300 sprintf(path, "%s/%s", base_path, dent->d_name);
301 if (stat(path, &st))
302 return 0;
303
304 return S_ISDIR(st.st_mode);
305}
306
307#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
305 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ 308 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \
306 lang_next) \ 309 lang_next) \
307 if (lang_dirent.d_type == DT_DIR && \ 310 if ((lang_dirent.d_type == DT_DIR || \
311 (lang_dirent.d_type == DT_UNKNOWN && \
312 is_directory(scripts_path, &lang_dirent))) && \
308 (strcmp(lang_dirent.d_name, ".")) && \ 313 (strcmp(lang_dirent.d_name, ".")) && \
309 (strcmp(lang_dirent.d_name, ".."))) 314 (strcmp(lang_dirent.d_name, "..")))
310 315
311#define for_each_script(lang_dir, script_dirent, script_next) \ 316#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
312 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ 317 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \
313 script_next) \ 318 script_next) \
314 if (script_dirent.d_type != DT_DIR) 319 if (script_dirent.d_type != DT_DIR && \
320 (script_dirent.d_type != DT_UNKNOWN || \
321 !is_directory(lang_path, &script_dirent)))
315 322
316 323
317#define RECORD_SUFFIX "-record" 324#define RECORD_SUFFIX "-record"
@@ -380,10 +387,10 @@ out_delete_desc:
380 return NULL; 387 return NULL;
381} 388}
382 389
383static char *ends_with(char *str, const char *suffix) 390static const char *ends_with(const char *str, const char *suffix)
384{ 391{
385 size_t suffix_len = strlen(suffix); 392 size_t suffix_len = strlen(suffix);
386 char *p = str; 393 const char *p = str;
387 394
388 if (strlen(str) > suffix_len) { 395 if (strlen(str) > suffix_len) {
389 p = str + strlen(str) - suffix_len; 396 p = str + strlen(str) - suffix_len;
@@ -466,16 +473,16 @@ static int list_available_scripts(const struct option *opt __used,
466 if (!scripts_dir) 473 if (!scripts_dir)
467 return -1; 474 return -1;
468 475
469 for_each_lang(scripts_dir, lang_dirent, lang_next) { 476 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
470 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 477 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
471 lang_dirent.d_name); 478 lang_dirent.d_name);
472 lang_dir = opendir(lang_path); 479 lang_dir = opendir(lang_path);
473 if (!lang_dir) 480 if (!lang_dir)
474 continue; 481 continue;
475 482
476 for_each_script(lang_dir, script_dirent, script_next) { 483 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
477 script_root = strdup(script_dirent.d_name); 484 script_root = strdup(script_dirent.d_name);
478 str = ends_with(script_root, REPORT_SUFFIX); 485 str = (char *)ends_with(script_root, REPORT_SUFFIX);
479 if (str) { 486 if (str) {
480 *str = '\0'; 487 *str = '\0';
481 desc = script_desc__findnew(script_root); 488 desc = script_desc__findnew(script_root);
@@ -514,16 +521,16 @@ static char *get_script_path(const char *script_root, const char *suffix)
514 if (!scripts_dir) 521 if (!scripts_dir)
515 return NULL; 522 return NULL;
516 523
517 for_each_lang(scripts_dir, lang_dirent, lang_next) { 524 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
518 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 525 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
519 lang_dirent.d_name); 526 lang_dirent.d_name);
520 lang_dir = opendir(lang_path); 527 lang_dir = opendir(lang_path);
521 if (!lang_dir) 528 if (!lang_dir)
522 continue; 529 continue;
523 530
524 for_each_script(lang_dir, script_dirent, script_next) { 531 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
525 __script_root = strdup(script_dirent.d_name); 532 __script_root = strdup(script_dirent.d_name);
526 str = ends_with(__script_root, suffix); 533 str = (char *)ends_with(__script_root, suffix);
527 if (str) { 534 if (str) {
528 *str = '\0'; 535 *str = '\0';
529 if (strcmp(__script_root, script_root)) 536 if (strcmp(__script_root, script_root))
@@ -543,7 +550,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
543 550
544static bool is_top_script(const char *script_path) 551static bool is_top_script(const char *script_path)
545{ 552{
546 return ends_with((char *)script_path, "top") == NULL ? false : true; 553 return ends_with(script_path, "top") == NULL ? false : true;
547} 554}
548 555
549static int has_required_arg(char *script_path) 556static int has_required_arg(char *script_path)
@@ -569,12 +576,12 @@ out:
569 return n_args; 576 return n_args;
570} 577}
571 578
572static const char * const trace_usage[] = { 579static const char * const script_usage[] = {
573 "perf trace [<options>]", 580 "perf script [<options>]",
574 "perf trace [<options>] record <script> [<record-options>] <command>", 581 "perf script [<options>] record <script> [<record-options>] <command>",
575 "perf trace [<options>] report <script> [script-args]", 582 "perf script [<options>] report <script> [script-args]",
576 "perf trace [<options>] <script> [<record-options>] <command>", 583 "perf script [<options>] <script> [<record-options>] <command>",
577 "perf trace [<options>] <top-script> [script-args]", 584 "perf script [<options>] <top-script> [script-args]",
578 NULL 585 NULL
579}; 586};
580 587
@@ -591,7 +598,7 @@ static const struct option options[] = {
591 "script file name (lang:script name, script name, or *)", 598 "script file name (lang:script name, script name, or *)",
592 parse_scriptname), 599 parse_scriptname),
593 OPT_STRING('g', "gen-script", &generate_script_lang, "lang", 600 OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
594 "generate perf-trace.xx script in specified language"), 601 "generate perf-script.xx script in specified language"),
595 OPT_STRING('i', "input", &input_name, "file", 602 OPT_STRING('i', "input", &input_name, "file",
596 "input file name"), 603 "input file name"),
597 OPT_BOOLEAN('d', "debug-mode", &debug_mode, 604 OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +621,7 @@ static bool have_cmd(int argc, const char **argv)
614 return argc != 0; 621 return argc != 0;
615} 622}
616 623
617int cmd_trace(int argc, const char **argv, const char *prefix __used) 624int cmd_script(int argc, const char **argv, const char *prefix __used)
618{ 625{
619 char *rec_script_path = NULL; 626 char *rec_script_path = NULL;
620 char *rep_script_path = NULL; 627 char *rep_script_path = NULL;
@@ -626,7 +633,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
626 633
627 setup_scripting(); 634 setup_scripting();
628 635
629 argc = parse_options(argc, argv, options, trace_usage, 636 argc = parse_options(argc, argv, options, script_usage,
630 PARSE_OPT_STOP_AT_NON_OPTION); 637 PARSE_OPT_STOP_AT_NON_OPTION);
631 638
632 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { 639 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +647,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
640 if (!rep_script_path) { 647 if (!rep_script_path) {
641 fprintf(stderr, 648 fprintf(stderr,
642 "Please specify a valid report script" 649 "Please specify a valid report script"
643 "(see 'perf trace -l' for listing)\n"); 650 "(see 'perf script -l' for listing)\n");
644 return -1; 651 return -1;
645 } 652 }
646 } 653 }
@@ -658,8 +665,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
658 665
659 if (!rec_script_path && !rep_script_path) { 666 if (!rec_script_path && !rep_script_path) {
660 fprintf(stderr, " Couldn't find script %s\n\n See perf" 667 fprintf(stderr, " Couldn't find script %s\n\n See perf"
661 " trace -l for available scripts.\n", argv[0]); 668 " script -l for available scripts.\n", argv[0]);
662 usage_with_options(trace_usage, options); 669 usage_with_options(script_usage, options);
663 } 670 }
664 671
665 if (is_top_script(argv[0])) { 672 if (is_top_script(argv[0])) {
@@ -671,9 +678,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
671 rec_args = (argc - 1) - rep_args; 678 rec_args = (argc - 1) - rep_args;
672 if (rec_args < 0) { 679 if (rec_args < 0) {
673 fprintf(stderr, " %s script requires options." 680 fprintf(stderr, " %s script requires options."
674 "\n\n See perf trace -l for available " 681 "\n\n See perf script -l for available "
675 "scripts and options.\n", argv[0]); 682 "scripts and options.\n", argv[0]);
676 usage_with_options(trace_usage, options); 683 usage_with_options(script_usage, options);
677 } 684 }
678 } 685 }
679 686
@@ -806,7 +813,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
806 return -1; 813 return -1;
807 } 814 }
808 815
809 err = scripting_ops->generate_script("perf-trace"); 816 err = scripting_ops->generate_script("perf-script");
810 goto out; 817 goto out;
811 } 818 }
812 819
@@ -814,10 +821,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
814 err = scripting_ops->start_script(script_name, argc, argv); 821 err = scripting_ops->start_script(script_name, argc, argv);
815 if (err) 822 if (err)
816 goto out; 823 goto out;
817 pr_debug("perf trace started with script %s\n\n", script_name); 824 pr_debug("perf script started with script %s\n\n", script_name);
818 } 825 }
819 826
820 err = __cmd_trace(session); 827 err = __cmd_script(session);
821 828
822 perf_session__delete(session); 829 perf_session__delete(session);
823 cleanup_scripting(); 830 cleanup_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44f9502..7ff746da7e6c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,8 @@
52#include <math.h> 52#include <math.h>
53#include <locale.h> 53#include <locale.h>
54 54
55#define DEFAULT_SEPARATOR " "
56
55static struct perf_event_attr default_attrs[] = { 57static struct perf_event_attr default_attrs[] = {
56 58
57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 59 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -75,20 +77,30 @@ static int run_idx = 0;
75static int run_count = 1; 77static int run_count = 1;
76static bool no_inherit = false; 78static bool no_inherit = false;
77static bool scale = true; 79static bool scale = true;
80static bool no_aggr = false;
78static pid_t target_pid = -1; 81static pid_t target_pid = -1;
79static pid_t target_tid = -1; 82static pid_t target_tid = -1;
80static pid_t *all_tids = NULL; 83static pid_t *all_tids = NULL;
81static int thread_num = 0; 84static int thread_num = 0;
82static pid_t child_pid = -1; 85static pid_t child_pid = -1;
83static bool null_run = false; 86static bool null_run = false;
84static bool big_num = false; 87static bool big_num = true;
88static int big_num_opt = -1;
85static const char *cpu_list; 89static const char *cpu_list;
90static const char *csv_sep = NULL;
91static bool csv_output = false;
86 92
87 93
88static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 94static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
89 95
90static int event_scaled[MAX_COUNTERS]; 96static int event_scaled[MAX_COUNTERS];
91 97
98static struct {
99 u64 val;
100 u64 ena;
101 u64 run;
102} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
103
92static volatile int done = 0; 104static volatile int done = 0;
93 105
94struct stats 106struct stats
@@ -136,19 +148,19 @@ static double stddev_stats(struct stats *stats)
136} 148}
137 149
138struct stats event_res_stats[MAX_COUNTERS][3]; 150struct stats event_res_stats[MAX_COUNTERS][3];
139struct stats runtime_nsecs_stats; 151struct stats runtime_nsecs_stats[MAX_NR_CPUS];
152struct stats runtime_cycles_stats[MAX_NR_CPUS];
153struct stats runtime_branches_stats[MAX_NR_CPUS];
140struct stats walltime_nsecs_stats; 154struct stats walltime_nsecs_stats;
141struct stats runtime_cycles_stats;
142struct stats runtime_branches_stats;
143 155
144#define MATCH_EVENT(t, c, counter) \ 156#define MATCH_EVENT(t, c, counter) \
145 (attrs[counter].type == PERF_TYPE_##t && \ 157 (attrs[counter].type == PERF_TYPE_##t && \
146 attrs[counter].config == PERF_COUNT_##c) 158 attrs[counter].config == PERF_COUNT_##c)
147 159
148#define ERR_PERF_OPEN \ 160#define ERR_PERF_OPEN \
149"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" 161"counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information."
150 162
151static int create_perf_stat_counter(int counter) 163static int create_perf_stat_counter(int counter, bool *perm_err)
152{ 164{
153 struct perf_event_attr *attr = attrs + counter; 165 struct perf_event_attr *attr = attrs + counter;
154 int thread; 166 int thread;
@@ -164,11 +176,14 @@ static int create_perf_stat_counter(int counter)
164 for (cpu = 0; cpu < nr_cpus; cpu++) { 176 for (cpu = 0; cpu < nr_cpus; cpu++) {
165 fd[cpu][counter][0] = sys_perf_event_open(attr, 177 fd[cpu][counter][0] = sys_perf_event_open(attr,
166 -1, cpumap[cpu], -1, 0); 178 -1, cpumap[cpu], -1, 0);
167 if (fd[cpu][counter][0] < 0) 179 if (fd[cpu][counter][0] < 0) {
168 pr_debug(ERR_PERF_OPEN, counter, 180 if (errno == EPERM || errno == EACCES)
181 *perm_err = true;
182 error(ERR_PERF_OPEN, counter,
169 fd[cpu][counter][0], strerror(errno)); 183 fd[cpu][counter][0], strerror(errno));
170 else 184 } else {
171 ++ncreated; 185 ++ncreated;
186 }
172 } 187 }
173 } else { 188 } else {
174 attr->inherit = !no_inherit; 189 attr->inherit = !no_inherit;
@@ -179,12 +194,15 @@ static int create_perf_stat_counter(int counter)
179 for (thread = 0; thread < thread_num; thread++) { 194 for (thread = 0; thread < thread_num; thread++) {
180 fd[0][counter][thread] = sys_perf_event_open(attr, 195 fd[0][counter][thread] = sys_perf_event_open(attr,
181 all_tids[thread], -1, -1, 0); 196 all_tids[thread], -1, -1, 0);
182 if (fd[0][counter][thread] < 0) 197 if (fd[0][counter][thread] < 0) {
183 pr_debug(ERR_PERF_OPEN, counter, 198 if (errno == EPERM || errno == EACCES)
199 *perm_err = true;
200 error(ERR_PERF_OPEN, counter,
184 fd[0][counter][thread], 201 fd[0][counter][thread],
185 strerror(errno)); 202 strerror(errno));
186 else 203 } else {
187 ++ncreated; 204 ++ncreated;
205 }
188 } 206 }
189 } 207 }
190 208
@@ -205,8 +223,9 @@ static inline int nsec_counter(int counter)
205 223
206/* 224/*
207 * Read out the results of a single counter: 225 * Read out the results of a single counter:
226 * aggregate counts across CPUs in system-wide mode
208 */ 227 */
209static void read_counter(int counter) 228static void read_counter_aggr(int counter)
210{ 229{
211 u64 count[3], single_count[3]; 230 u64 count[3], single_count[3];
212 int cpu; 231 int cpu;
@@ -264,11 +283,58 @@ static void read_counter(int counter)
264 * Save the full runtime - to allow normalization during printout: 283 * Save the full runtime - to allow normalization during printout:
265 */ 284 */
266 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 285 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
267 update_stats(&runtime_nsecs_stats, count[0]); 286 update_stats(&runtime_nsecs_stats[0], count[0]);
268 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 287 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
269 update_stats(&runtime_cycles_stats, count[0]); 288 update_stats(&runtime_cycles_stats[0], count[0]);
270 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 289 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
271 update_stats(&runtime_branches_stats, count[0]); 290 update_stats(&runtime_branches_stats[0], count[0]);
291}
292
293/*
294 * Read out the results of a single counter:
295 * do not aggregate counts across CPUs in system-wide mode
296 */
297static void read_counter(int counter)
298{
299 u64 count[3];
300 int cpu;
301 size_t res, nv;
302
303 count[0] = count[1] = count[2] = 0;
304
305 nv = scale ? 3 : 1;
306
307 for (cpu = 0; cpu < nr_cpus; cpu++) {
308
309 if (fd[cpu][counter][0] < 0)
310 continue;
311
312 res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
313
314 assert(res == nv * sizeof(u64));
315
316 close(fd[cpu][counter][0]);
317 fd[cpu][counter][0] = -1;
318
319 if (scale) {
320 if (count[2] == 0) {
321 count[0] = 0;
322 } else if (count[2] < count[1]) {
323 count[0] = (unsigned long long)
324 ((double)count[0] * count[1] / count[2] + 0.5);
325 }
326 }
327 cpu_counts[cpu][counter].val = count[0]; /* scaled count */
328 cpu_counts[cpu][counter].ena = count[1];
329 cpu_counts[cpu][counter].run = count[2];
330
331 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
332 update_stats(&runtime_nsecs_stats[cpu], count[0]);
333 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
334 update_stats(&runtime_cycles_stats[cpu], count[0]);
335 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
336 update_stats(&runtime_branches_stats[cpu], count[0]);
337 }
272} 338}
273 339
274static int run_perf_stat(int argc __used, const char **argv) 340static int run_perf_stat(int argc __used, const char **argv)
@@ -277,6 +343,7 @@ static int run_perf_stat(int argc __used, const char **argv)
277 int status = 0; 343 int status = 0;
278 int counter, ncreated = 0; 344 int counter, ncreated = 0;
279 int child_ready_pipe[2], go_pipe[2]; 345 int child_ready_pipe[2], go_pipe[2];
346 bool perm_err = false;
280 const bool forks = (argc > 0); 347 const bool forks = (argc > 0);
281 char buf; 348 char buf;
282 349
@@ -335,12 +402,15 @@ static int run_perf_stat(int argc __used, const char **argv)
335 } 402 }
336 403
337 for (counter = 0; counter < nr_counters; counter++) 404 for (counter = 0; counter < nr_counters; counter++)
338 ncreated += create_perf_stat_counter(counter); 405 ncreated += create_perf_stat_counter(counter, &perm_err);
339 406
340 if (ncreated == 0) { 407 if (ncreated < nr_counters) {
341 pr_err("No permission to collect %sstats.\n" 408 if (perm_err)
342 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", 409 error("You may not have permission to collect %sstats.\n"
343 system_wide ? "system-wide " : ""); 410 "\t Consider tweaking"
411 " /proc/sys/kernel/perf_event_paranoid or running as root.",
412 system_wide ? "system-wide " : "");
413 die("Not all events could be opened.\n");
344 if (child_pid != -1) 414 if (child_pid != -1)
345 kill(child_pid, SIGTERM); 415 kill(child_pid, SIGTERM);
346 return -1; 416 return -1;
@@ -362,9 +432,13 @@ static int run_perf_stat(int argc __used, const char **argv)
362 432
363 update_stats(&walltime_nsecs_stats, t1 - t0); 433 update_stats(&walltime_nsecs_stats, t1 - t0);
364 434
365 for (counter = 0; counter < nr_counters; counter++) 435 if (no_aggr) {
366 read_counter(counter); 436 for (counter = 0; counter < nr_counters; counter++)
367 437 read_counter(counter);
438 } else {
439 for (counter = 0; counter < nr_counters; counter++)
440 read_counter_aggr(counter);
441 }
368 return WEXITSTATUS(status); 442 return WEXITSTATUS(status);
369} 443}
370 444
@@ -377,11 +451,21 @@ static void print_noise(int counter, double avg)
377 100 * stddev_stats(&event_res_stats[counter][0]) / avg); 451 100 * stddev_stats(&event_res_stats[counter][0]) / avg);
378} 452}
379 453
380static void nsec_printout(int counter, double avg) 454static void nsec_printout(int cpu, int counter, double avg)
381{ 455{
382 double msecs = avg / 1e6; 456 double msecs = avg / 1e6;
457 char cpustr[16] = { '\0', };
458 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
459
460 if (no_aggr)
461 sprintf(cpustr, "CPU%*d%s",
462 csv_output ? 0 : -4,
463 cpumap[cpu], csv_sep);
383 464
384 fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); 465 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter));
466
467 if (csv_output)
468 return;
385 469
386 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { 470 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
387 fprintf(stderr, " # %10.3f CPUs ", 471 fprintf(stderr, " # %10.3f CPUs ",
@@ -389,33 +473,49 @@ static void nsec_printout(int counter, double avg)
389 } 473 }
390} 474}
391 475
392static void abs_printout(int counter, double avg) 476static void abs_printout(int cpu, int counter, double avg)
393{ 477{
394 double total, ratio = 0.0; 478 double total, ratio = 0.0;
479 char cpustr[16] = { '\0', };
480 const char *fmt;
481
482 if (csv_output)
483 fmt = "%s%.0f%s%s";
484 else if (big_num)
485 fmt = "%s%'18.0f%s%-24s";
486 else
487 fmt = "%s%18.0f%s%-24s";
395 488
396 if (big_num) 489 if (no_aggr)
397 fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); 490 sprintf(cpustr, "CPU%*d%s",
491 csv_output ? 0 : -4,
492 cpumap[cpu], csv_sep);
398 else 493 else
399 fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); 494 cpu = 0;
495
496 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter));
497
498 if (csv_output)
499 return;
400 500
401 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { 501 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
402 total = avg_stats(&runtime_cycles_stats); 502 total = avg_stats(&runtime_cycles_stats[cpu]);
403 503
404 if (total) 504 if (total)
405 ratio = avg / total; 505 ratio = avg / total;
406 506
407 fprintf(stderr, " # %10.3f IPC ", ratio); 507 fprintf(stderr, " # %10.3f IPC ", ratio);
408 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && 508 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
409 runtime_branches_stats.n != 0) { 509 runtime_branches_stats[cpu].n != 0) {
410 total = avg_stats(&runtime_branches_stats); 510 total = avg_stats(&runtime_branches_stats[cpu]);
411 511
412 if (total) 512 if (total)
413 ratio = avg * 100 / total; 513 ratio = avg * 100 / total;
414 514
415 fprintf(stderr, " # %10.3f %% ", ratio); 515 fprintf(stderr, " # %10.3f %% ", ratio);
416 516
417 } else if (runtime_nsecs_stats.n != 0) { 517 } else if (runtime_nsecs_stats[cpu].n != 0) {
418 total = avg_stats(&runtime_nsecs_stats); 518 total = avg_stats(&runtime_nsecs_stats[cpu]);
419 519
420 if (total) 520 if (total)
421 ratio = 1000.0 * avg / total; 521 ratio = 1000.0 * avg / total;
@@ -426,22 +526,29 @@ static void abs_printout(int counter, double avg)
426 526
427/* 527/*
428 * Print out the results of a single counter: 528 * Print out the results of a single counter:
529 * aggregated counts in system-wide mode
429 */ 530 */
430static void print_counter(int counter) 531static void print_counter_aggr(int counter)
431{ 532{
432 double avg = avg_stats(&event_res_stats[counter][0]); 533 double avg = avg_stats(&event_res_stats[counter][0]);
433 int scaled = event_scaled[counter]; 534 int scaled = event_scaled[counter];
434 535
435 if (scaled == -1) { 536 if (scaled == -1) {
436 fprintf(stderr, " %18s %-24s\n", 537 fprintf(stderr, "%*s%s%-24s\n",
437 "<not counted>", event_name(counter)); 538 csv_output ? 0 : 18,
539 "<not counted>", csv_sep, event_name(counter));
438 return; 540 return;
439 } 541 }
440 542
441 if (nsec_counter(counter)) 543 if (nsec_counter(counter))
442 nsec_printout(counter, avg); 544 nsec_printout(-1, counter, avg);
443 else 545 else
444 abs_printout(counter, avg); 546 abs_printout(-1, counter, avg);
547
548 if (csv_output) {
549 fputc('\n', stderr);
550 return;
551 }
445 552
446 print_noise(counter, avg); 553 print_noise(counter, avg);
447 554
@@ -458,40 +565,91 @@ static void print_counter(int counter)
458 fprintf(stderr, "\n"); 565 fprintf(stderr, "\n");
459} 566}
460 567
568/*
569 * Print out the results of a single counter:
570 * does not use aggregated count in system-wide
571 */
572static void print_counter(int counter)
573{
574 u64 ena, run, val;
575 int cpu;
576
577 for (cpu = 0; cpu < nr_cpus; cpu++) {
578 val = cpu_counts[cpu][counter].val;
579 ena = cpu_counts[cpu][counter].ena;
580 run = cpu_counts[cpu][counter].run;
581 if (run == 0 || ena == 0) {
582 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
583 csv_output ? 0 : -4,
584 cpumap[cpu], csv_sep,
585 csv_output ? 0 : 18,
586 "<not counted>", csv_sep,
587 event_name(counter));
588
589 fprintf(stderr, "\n");
590 continue;
591 }
592
593 if (nsec_counter(counter))
594 nsec_printout(cpu, counter, val);
595 else
596 abs_printout(cpu, counter, val);
597
598 if (!csv_output) {
599 print_noise(counter, 1.0);
600
601 if (run != ena) {
602 fprintf(stderr, " (scaled from %.2f%%)",
603 100.0 * run / ena);
604 }
605 }
606 fprintf(stderr, "\n");
607 }
608}
609
461static void print_stat(int argc, const char **argv) 610static void print_stat(int argc, const char **argv)
462{ 611{
463 int i, counter; 612 int i, counter;
464 613
465 fflush(stdout); 614 fflush(stdout);
466 615
467 fprintf(stderr, "\n"); 616 if (!csv_output) {
468 fprintf(stderr, " Performance counter stats for "); 617 fprintf(stderr, "\n");
469 if(target_pid == -1 && target_tid == -1) { 618 fprintf(stderr, " Performance counter stats for ");
470 fprintf(stderr, "\'%s", argv[0]); 619 if(target_pid == -1 && target_tid == -1) {
471 for (i = 1; i < argc; i++) 620 fprintf(stderr, "\'%s", argv[0]);
472 fprintf(stderr, " %s", argv[i]); 621 for (i = 1; i < argc; i++)
473 } else if (target_pid != -1) 622 fprintf(stderr, " %s", argv[i]);
474 fprintf(stderr, "process id \'%d", target_pid); 623 } else if (target_pid != -1)
475 else 624 fprintf(stderr, "process id \'%d", target_pid);
476 fprintf(stderr, "thread id \'%d", target_tid); 625 else
477 626 fprintf(stderr, "thread id \'%d", target_tid);
478 fprintf(stderr, "\'"); 627
479 if (run_count > 1) 628 fprintf(stderr, "\'");
480 fprintf(stderr, " (%d runs)", run_count); 629 if (run_count > 1)
481 fprintf(stderr, ":\n\n"); 630 fprintf(stderr, " (%d runs)", run_count);
631 fprintf(stderr, ":\n\n");
632 }
482 633
483 for (counter = 0; counter < nr_counters; counter++) 634 if (no_aggr) {
484 print_counter(counter); 635 for (counter = 0; counter < nr_counters; counter++)
636 print_counter(counter);
637 } else {
638 for (counter = 0; counter < nr_counters; counter++)
639 print_counter_aggr(counter);
640 }
485 641
486 fprintf(stderr, "\n"); 642 if (!csv_output) {
487 fprintf(stderr, " %18.9f seconds time elapsed", 643 fprintf(stderr, "\n");
488 avg_stats(&walltime_nsecs_stats)/1e9); 644 fprintf(stderr, " %18.9f seconds time elapsed",
489 if (run_count > 1) { 645 avg_stats(&walltime_nsecs_stats)/1e9);
490 fprintf(stderr, " ( +- %7.3f%% )", 646 if (run_count > 1) {
647 fprintf(stderr, " ( +- %7.3f%% )",
491 100*stddev_stats(&walltime_nsecs_stats) / 648 100*stddev_stats(&walltime_nsecs_stats) /
492 avg_stats(&walltime_nsecs_stats)); 649 avg_stats(&walltime_nsecs_stats));
650 }
651 fprintf(stderr, "\n\n");
493 } 652 }
494 fprintf(stderr, "\n\n");
495} 653}
496 654
497static volatile int signr = -1; 655static volatile int signr = -1;
@@ -521,6 +679,13 @@ static const char * const stat_usage[] = {
521 NULL 679 NULL
522}; 680};
523 681
682static int stat__set_big_num(const struct option *opt __used,
683 const char *s __used, int unset)
684{
685 big_num_opt = unset ? 0 : 1;
686 return 0;
687}
688
524static const struct option options[] = { 689static const struct option options[] = {
525 OPT_CALLBACK('e', "event", NULL, "event", 690 OPT_CALLBACK('e', "event", NULL, "event",
526 "event selector. use 'perf list' to list available events", 691 "event selector. use 'perf list' to list available events",
@@ -541,10 +706,15 @@ static const struct option options[] = {
541 "repeat command and print average + stddev (max: 100)"), 706 "repeat command and print average + stddev (max: 100)"),
542 OPT_BOOLEAN('n', "null", &null_run, 707 OPT_BOOLEAN('n', "null", &null_run,
543 "null run - dont start any counters"), 708 "null run - dont start any counters"),
544 OPT_BOOLEAN('B', "big-num", &big_num, 709 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
545 "print large numbers with thousands\' separators"), 710 "print large numbers with thousands\' separators",
711 stat__set_big_num),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu", 712 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"), 713 "list of cpus to monitor in system-wide"),
714 OPT_BOOLEAN('A', "no-aggr", &no_aggr,
715 "disable CPU count aggregation"),
716 OPT_STRING('x', "field-separator", &csv_sep, "separator",
717 "print counts with custom separator"),
548 OPT_END() 718 OPT_END()
549}; 719};
550 720
@@ -557,11 +727,34 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
557 727
558 argc = parse_options(argc, argv, options, stat_usage, 728 argc = parse_options(argc, argv, options, stat_usage,
559 PARSE_OPT_STOP_AT_NON_OPTION); 729 PARSE_OPT_STOP_AT_NON_OPTION);
730
731 if (csv_sep)
732 csv_output = true;
733 else
734 csv_sep = DEFAULT_SEPARATOR;
735
736 /*
737 * let the spreadsheet do the pretty-printing
738 */
739 if (csv_output) {
740 /* User explicitely passed -B? */
741 if (big_num_opt == 1) {
742 fprintf(stderr, "-B option not supported with -x\n");
743 usage_with_options(stat_usage, options);
744 } else /* Nope, so disable big number formatting */
745 big_num = false;
746 } else if (big_num_opt == 0) /* User passed --no-big-num */
747 big_num = false;
748
560 if (!argc && target_pid == -1 && target_tid == -1) 749 if (!argc && target_pid == -1 && target_tid == -1)
561 usage_with_options(stat_usage, options); 750 usage_with_options(stat_usage, options);
562 if (run_count <= 0) 751 if (run_count <= 0)
563 usage_with_options(stat_usage, options); 752 usage_with_options(stat_usage, options);
564 753
754 /* no_aggr is for system-wide only */
755 if (no_aggr && !system_wide)
756 usage_with_options(stat_usage, options);
757
565 /* Set attrs and nr_counters if no event is selected and !null_run */ 758 /* Set attrs and nr_counters if no event is selected and !null_run */
566 if (!null_run && !nr_counters) { 759 if (!null_run && !nr_counters) {
567 memcpy(attrs, default_attrs, sizeof(default_attrs)); 760 memcpy(attrs, default_attrs, sizeof(default_attrs));
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9bcc38f0b706..d2fc46103f83 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS];
272static u64 cpus_pstate_start_times[MAX_CPUS]; 272static u64 cpus_pstate_start_times[MAX_CPUS];
273static u64 cpus_pstate_state[MAX_CPUS]; 273static u64 cpus_pstate_state[MAX_CPUS];
274 274
275static int process_comm_event(event_t *event, struct perf_session *session __used) 275static int process_comm_event(event_t *event, struct sample_data *sample __used,
276 struct perf_session *session __used)
276{ 277{
277 pid_set_comm(event->comm.tid, event->comm.comm); 278 pid_set_comm(event->comm.tid, event->comm.comm);
278 return 0; 279 return 0;
279} 280}
280 281
281static int process_fork_event(event_t *event, struct perf_session *session __used) 282static int process_fork_event(event_t *event, struct sample_data *sample __used,
283 struct perf_session *session __used)
282{ 284{
283 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 285 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
284 return 0; 286 return 0;
285} 287}
286 288
287static int process_exit_event(event_t *event, struct perf_session *session __used) 289static int process_exit_event(event_t *event, struct sample_data *sample __used,
290 struct perf_session *session __used)
288{ 291{
289 pid_exit(event->fork.pid, event->fork.time); 292 pid_exit(event->fork.pid, event->fork.time);
290 return 0; 293 return 0;
@@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
470} 473}
471 474
472 475
473static int process_sample_event(event_t *event, struct perf_session *session) 476static int process_sample_event(event_t *event __used,
477 struct sample_data *sample,
478 struct perf_session *session)
474{ 479{
475 struct sample_data data;
476 struct trace_entry *te; 480 struct trace_entry *te;
477 481
478 memset(&data, 0, sizeof(data));
479
480 event__parse_sample(event, session->sample_type, &data);
481
482 if (session->sample_type & PERF_SAMPLE_TIME) { 482 if (session->sample_type & PERF_SAMPLE_TIME) {
483 if (!first_time || first_time > data.time) 483 if (!first_time || first_time > sample->time)
484 first_time = data.time; 484 first_time = sample->time;
485 if (last_time < data.time) 485 if (last_time < sample->time)
486 last_time = data.time; 486 last_time = sample->time;
487 } 487 }
488 488
489 te = (void *)data.raw_data; 489 te = (void *)sample->raw_data;
490 if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) { 490 if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
491 char *event_str; 491 char *event_str;
492 struct power_entry *pe; 492 struct power_entry *pe;
493 493
@@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session)
499 return 0; 499 return 0;
500 500
501 if (strcmp(event_str, "power:power_start") == 0) 501 if (strcmp(event_str, "power:power_start") == 0)
502 c_state_start(pe->cpu_id, data.time, pe->value); 502 c_state_start(pe->cpu_id, sample->time, pe->value);
503 503
504 if (strcmp(event_str, "power:power_end") == 0) 504 if (strcmp(event_str, "power:power_end") == 0)
505 c_state_end(pe->cpu_id, data.time); 505 c_state_end(pe->cpu_id, sample->time);
506 506
507 if (strcmp(event_str, "power:power_frequency") == 0) 507 if (strcmp(event_str, "power:power_frequency") == 0)
508 p_state_change(pe->cpu_id, data.time, pe->value); 508 p_state_change(pe->cpu_id, sample->time, pe->value);
509 509
510 if (strcmp(event_str, "sched:sched_wakeup") == 0) 510 if (strcmp(event_str, "sched:sched_wakeup") == 0)
511 sched_wakeup(data.cpu, data.time, data.pid, te); 511 sched_wakeup(sample->cpu, sample->time, sample->pid, te);
512 512
513 if (strcmp(event_str, "sched:sched_switch") == 0) 513 if (strcmp(event_str, "sched:sched_switch") == 0)
514 sched_switch(data.cpu, data.time, te); 514 sched_switch(sample->cpu, sample->time, te);
515 } 515 }
516 return 0; 516 return 0;
517} 517}
@@ -989,6 +989,9 @@ static int __cmd_record(int argc, const char **argv)
989 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 989 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
990 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 990 rec_argv = calloc(rec_argc + 1, sizeof(char *));
991 991
992 if (rec_argv == NULL)
993 return -ENOMEM;
994
992 for (i = 0; i < ARRAY_SIZE(record_args); i++) 995 for (i = 0; i < ARRAY_SIZE(record_args); i++)
993 rec_argv[i] = strdup(record_args[i]); 996 rec_argv[i] = strdup(record_args[i]);
994 997
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd625808c2a5..0515ce9d3d3e 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym)
977} 977}
978 978
979static void event__process_sample(const event_t *self, 979static void event__process_sample(const event_t *self,
980 struct perf_session *session, int counter) 980 struct sample_data *sample,
981 struct perf_session *session, int counter)
981{ 982{
982 u64 ip = self->ip.ip; 983 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 984 struct sym_entry *syme;
984 struct addr_location al; 985 struct addr_location al;
985 struct sample_data data;
986 struct machine *machine; 986 struct machine *machine;
987 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 987 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
988 988
@@ -1025,7 +1025,7 @@ static void event__process_sample(const event_t *self,
1025 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1025 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1026 exact_samples++; 1026 exact_samples++;
1027 1027
1028 if (event__preprocess_sample(self, session, &al, &data, 1028 if (event__preprocess_sample(self, session, &al, sample,
1029 symbol_filter) < 0 || 1029 symbol_filter) < 0 ||
1030 al.filtered) 1030 al.filtered)
1031 return; 1031 return;
@@ -1105,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1105 unsigned int head = mmap_read_head(md); 1105 unsigned int head = mmap_read_head(md);
1106 unsigned int old = md->prev; 1106 unsigned int old = md->prev;
1107 unsigned char *data = md->base + page_size; 1107 unsigned char *data = md->base + page_size;
1108 struct sample_data sample;
1108 int diff; 1109 int diff;
1109 1110
1110 /* 1111 /*
@@ -1152,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1152 event = &event_copy; 1153 event = &event_copy;
1153 } 1154 }
1154 1155
1156 event__parse_sample(event, self, &sample);
1155 if (event->header.type == PERF_RECORD_SAMPLE) 1157 if (event->header.type == PERF_RECORD_SAMPLE)
1156 event__process_sample(event, self, md->counter); 1158 event__process_sample(event, &sample, self, md->counter);
1157 else 1159 else
1158 event__process(event, self); 1160 event__process(event, &sample, self);
1159 old += size; 1161 old += size;
1160 } 1162 }
1161 1163
@@ -1214,7 +1216,9 @@ try_again:
1214 int err = errno; 1216 int err = errno;
1215 1217
1216 if (err == EPERM || err == EACCES) 1218 if (err == EPERM || err == EACCES)
1217 die("No permission - are you root?\n"); 1219 die("Permission error - are you root?\n"
1220 "\t Consider tweaking"
1221 " /proc/sys/kernel/perf_event_paranoid.\n");
1218 /* 1222 /*
1219 * If it's cycles then fall back to hrtimer 1223 * If it's cycles then fall back to hrtimer
1220 * based cpu-clock-tick sw counter, which 1224 * based cpu-clock-tick sw counter, which
@@ -1231,7 +1235,7 @@ try_again:
1231 goto try_again; 1235 goto try_again;
1232 } 1236 }
1233 printf("\n"); 1237 printf("\n");
1234 error("perfcounter syscall returned with %d (%s)\n", 1238 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
1235 fd[i][counter][thread_index], strerror(err)); 1239 fd[i][counter][thread_index], strerror(err));
1236 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1240 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1237 exit(-1); 1241 exit(-1);
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 921245b28583..c7798c7f24ed 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
27extern int cmd_stat(int argc, const char **argv, const char *prefix); 27extern int cmd_stat(int argc, const char **argv, const char *prefix);
28extern int cmd_timechart(int argc, const char **argv, const char *prefix); 28extern int cmd_timechart(int argc, const char **argv, const char *prefix);
29extern int cmd_top(int argc, const char **argv, const char *prefix); 29extern int cmd_top(int argc, const char **argv, const char *prefix);
30extern int cmd_trace(int argc, const char **argv, const char *prefix); 30extern int cmd_script(int argc, const char **argv, const char *prefix);
31extern int cmd_version(int argc, const char **argv, const char *prefix); 31extern int cmd_version(int argc, const char **argv, const char *prefix);
32extern int cmd_probe(int argc, const char **argv, const char *prefix); 32extern int cmd_probe(int argc, const char **argv, const char *prefix);
33extern int cmd_kmem(int argc, const char **argv, const char *prefix); 33extern int cmd_kmem(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 949d77fc0b97..16b5088cf8f4 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,7 +16,7 @@ perf-report mainporcelain common
16perf-stat mainporcelain common 16perf-stat mainporcelain common
17perf-timechart mainporcelain common 17perf-timechart mainporcelain common
18perf-top mainporcelain common 18perf-top mainporcelain common
19perf-trace mainporcelain common 19perf-script mainporcelain common
20perf-probe mainporcelain common 20perf-probe mainporcelain common
21perf-kmem mainporcelain common 21perf-kmem mainporcelain common
22perf-lock mainporcelain common 22perf-lock mainporcelain common
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index b253db634f04..b041ca67a2cb 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -9,8 +9,8 @@ endef
9ifndef NO_DWARF 9ifndef NO_DWARF
10define SOURCE_DWARF 10define SOURCE_DWARF
11#include <dwarf.h> 11#include <dwarf.h>
12#include <libdw.h> 12#include <elfutils/libdw.h>
13#include <version.h> 13#include <elfutils/version.h>
14#ifndef _ELFUTILS_PREREQ 14#ifndef _ELFUTILS_PREREQ
15#error 15#error
16#endif 16#endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cdd6c03f1e14..595d0f4a7103 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -323,7 +323,7 @@ static void handle_internal_command(int argc, const char **argv)
323 { "top", cmd_top, 0 }, 323 { "top", cmd_top, 0 },
324 { "annotate", cmd_annotate, 0 }, 324 { "annotate", cmd_annotate, 0 },
325 { "version", cmd_version, 0 }, 325 { "version", cmd_version, 0 },
326 { "trace", cmd_trace, 0 }, 326 { "script", cmd_script, 0 },
327 { "sched", cmd_sched, 0 }, 327 { "sched", cmd_sched, 0 },
328 { "probe", cmd_probe, 0 }, 328 { "probe", cmd_probe, 0 },
329 { "kmem", cmd_kmem, 0 }, 329 { "kmem", cmd_kmem, 0 },
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 957085dd5d8d..315067b8f552 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Context.c. Python interfaces for perf trace. 2 * Context.c. Python interfaces for perf script.
3 * 3 *
4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e437edb72417..deffb8c96071 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,7 +14,9 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16 16
17static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) 17static int build_id__mark_dso_hit(event_t *event,
18 struct sample_data *sample __used,
19 struct perf_session *session)
18{ 20{
19 struct addr_location al; 21 struct addr_location al;
20 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 22 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
35 return 0; 37 return 0;
36} 38}
37 39
38static int event__exit_del_thread(event_t *self, struct perf_session *session) 40static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
41 struct perf_session *session)
39{ 42{
40 struct thread *thread = perf_session__findnew(session, self->fork.tid); 43 struct thread *thread = perf_session__findnew(session, self->fork.tid);
41 44
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c8d81b00089d..01bbe8ecec3f 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
46 return ret; 46 return ret;
47} 47}
48 48
49static int dump_printf_color(const char *fmt, const char *color, ...) 49#ifdef NO_NEWT_SUPPORT
50void ui__warning(const char *format, ...)
50{ 51{
51 va_list args; 52 va_list args;
52 int ret = 0;
53 53
54 if (dump_trace) { 54 va_start(args, format);
55 va_start(args, color); 55 vfprintf(stderr, format, args);
56 ret = color_vfprintf(stdout, color, fmt, args); 56 va_end(args);
57 va_end(args);
58 }
59
60 return ret;
61} 57}
62 58#endif
63 59
64void trace_event(event_t *event) 60void trace_event(event_t *event)
65{ 61{
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
70 if (!dump_trace) 66 if (!dump_trace)
71 return; 67 return;
72 68
73 dump_printf("."); 69 printf(".");
74 dump_printf_color("\n. ... raw event: size %d bytes\n", color, 70 color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
75 event->header.size); 71 event->header.size);
76 72
77 for (i = 0; i < event->header.size; i++) { 73 for (i = 0; i < event->header.size; i++) {
78 if ((i & 15) == 0) { 74 if ((i & 15) == 0) {
79 dump_printf("."); 75 printf(".");
80 dump_printf_color(" %04x: ", color, i); 76 color_fprintf(stdout, color, " %04x: ", i);
81 } 77 }
82 78
83 dump_printf_color(" %02x", color, raw_event[i]); 79 color_fprintf(stdout, color, " %02x", raw_event[i]);
84 80
85 if (((i & 15) == 15) || i == event->header.size-1) { 81 if (((i & 15) == 15) || i == event->header.size-1) {
86 dump_printf_color(" ", color); 82 color_fprintf(stdout, color, " ");
87 for (j = 0; j < 15-(i & 15); j++) 83 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 84 color_fprintf(stdout, color, " ");
89 for (j = i & ~15; j <= i; j++) { 85 for (j = i & ~15; j <= i; j++) {
90 dump_printf_color("%c", color, 86 color_fprintf(stdout, color, "%c",
91 isprint(raw_event[j]) ? 87 isprint(raw_event[j]) ?
92 raw_event[j] : '.'); 88 raw_event[j] : '.');
93 } 89 }
94 dump_printf_color("\n", color); 90 color_fprintf(stdout, color, "\n");
95 } 91 }
96 } 92 }
97 dump_printf(".\n"); 93 printf(".\n");
98} 94}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7b514082bbaf..ca35fd66b5df 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
35#include "ui/progress.h" 35#include "ui/progress.h"
36#endif 36#endif
37 37
38void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
39
38#endif /* __PERF_DEBUG_H */ 40#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dab9e754a281..183aedd4db83 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -7,7 +7,7 @@
7#include "strlist.h" 7#include "strlist.h"
8#include "thread.h" 8#include "thread.h"
9 9
10const char *event__name[] = { 10static const char *event__name[] = {
11 [0] = "TOTAL", 11 [0] = "TOTAL",
12 [PERF_RECORD_MMAP] = "MMAP", 12 [PERF_RECORD_MMAP] = "MMAP",
13 [PERF_RECORD_LOST] = "LOST", 13 [PERF_RECORD_LOST] = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
25 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
25}; 26};
26 27
27static pid_t event__synthesize_comm(pid_t pid, int full, 28const char *event__get_event_name(unsigned int id)
29{
30 if (id >= ARRAY_SIZE(event__name))
31 return "INVALID";
32 if (!event__name[id])
33 return "UNKNOWN";
34 return event__name[id];
35}
36
37static struct sample_data synth_sample = {
38 .pid = -1,
39 .tid = -1,
40 .time = -1,
41 .stream_id = -1,
42 .cpu = -1,
43 .period = 1,
44};
45
46static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
28 event__handler_t process, 47 event__handler_t process,
29 struct perf_session *session) 48 struct perf_session *session)
30{ 49{
31 event_t ev;
32 char filename[PATH_MAX]; 50 char filename[PATH_MAX];
33 char bf[BUFSIZ]; 51 char bf[BUFSIZ];
34 FILE *fp; 52 FILE *fp;
@@ -49,34 +67,39 @@ out_race:
49 return 0; 67 return 0;
50 } 68 }
51 69
52 memset(&ev.comm, 0, sizeof(ev.comm)); 70 memset(&event->comm, 0, sizeof(event->comm));
53 while (!ev.comm.comm[0] || !ev.comm.pid) { 71
54 if (fgets(bf, sizeof(bf), fp) == NULL) 72 while (!event->comm.comm[0] || !event->comm.pid) {
55 goto out_failure; 73 if (fgets(bf, sizeof(bf), fp) == NULL) {
74 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
75 goto out;
76 }
56 77
57 if (memcmp(bf, "Name:", 5) == 0) { 78 if (memcmp(bf, "Name:", 5) == 0) {
58 char *name = bf + 5; 79 char *name = bf + 5;
59 while (*name && isspace(*name)) 80 while (*name && isspace(*name))
60 ++name; 81 ++name;
61 size = strlen(name) - 1; 82 size = strlen(name) - 1;
62 memcpy(ev.comm.comm, name, size++); 83 memcpy(event->comm.comm, name, size++);
63 } else if (memcmp(bf, "Tgid:", 5) == 0) { 84 } else if (memcmp(bf, "Tgid:", 5) == 0) {
64 char *tgids = bf + 5; 85 char *tgids = bf + 5;
65 while (*tgids && isspace(*tgids)) 86 while (*tgids && isspace(*tgids))
66 ++tgids; 87 ++tgids;
67 tgid = ev.comm.pid = atoi(tgids); 88 tgid = event->comm.pid = atoi(tgids);
68 } 89 }
69 } 90 }
70 91
71 ev.comm.header.type = PERF_RECORD_COMM; 92 event->comm.header.type = PERF_RECORD_COMM;
72 size = ALIGN(size, sizeof(u64)); 93 size = ALIGN(size, sizeof(u64));
73 ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); 94 memset(event->comm.comm + size, 0, session->id_hdr_size);
74 95 event->comm.header.size = (sizeof(event->comm) -
96 (sizeof(event->comm.comm) - size) +
97 session->id_hdr_size);
75 if (!full) { 98 if (!full) {
76 ev.comm.tid = pid; 99 event->comm.tid = pid;
77 100
78 process(&ev, session); 101 process(event, &synth_sample, session);
79 goto out_fclose; 102 goto out;
80 } 103 }
81 104
82 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 105 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
91 if (*end) 114 if (*end)
92 continue; 115 continue;
93 116
94 ev.comm.tid = pid; 117 event->comm.tid = pid;
95 118
96 process(&ev, session); 119 process(event, &synth_sample, session);
97 } 120 }
98 closedir(tasks);
99 121
100out_fclose: 122 closedir(tasks);
123out:
101 fclose(fp); 124 fclose(fp);
102 return tgid;
103 125
104out_failure: 126 return tgid;
105 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
106 return -1;
107} 127}
108 128
109static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, 129static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
110 event__handler_t process, 130 event__handler_t process,
111 struct perf_session *session) 131 struct perf_session *session)
112{ 132{
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
124 return -1; 144 return -1;
125 } 145 }
126 146
147 event->header.type = PERF_RECORD_MMAP;
148 /*
149 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
150 */
151 event->header.misc = PERF_RECORD_MISC_USER;
152
127 while (1) { 153 while (1) {
128 char bf[BUFSIZ], *pbf = bf; 154 char bf[BUFSIZ], *pbf = bf;
129 event_t ev = {
130 .header = {
131 .type = PERF_RECORD_MMAP,
132 /*
133 * Just like the kernel, see __perf_event_mmap
134 * in kernel/perf_event.c
135 */
136 .misc = PERF_RECORD_MISC_USER,
137 },
138 };
139 int n; 155 int n;
140 size_t size; 156 size_t size;
141 if (fgets(bf, sizeof(bf), fp) == NULL) 157 if (fgets(bf, sizeof(bf), fp) == NULL)
142 break; 158 break;
143 159
144 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ 160 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
145 n = hex2u64(pbf, &ev.mmap.start); 161 n = hex2u64(pbf, &event->mmap.start);
146 if (n < 0) 162 if (n < 0)
147 continue; 163 continue;
148 pbf += n + 1; 164 pbf += n + 1;
149 n = hex2u64(pbf, &ev.mmap.len); 165 n = hex2u64(pbf, &event->mmap.len);
150 if (n < 0) 166 if (n < 0)
151 continue; 167 continue;
152 pbf += n + 3; 168 pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
161 continue; 177 continue;
162 178
163 pbf += 3; 179 pbf += 3;
164 n = hex2u64(pbf, &ev.mmap.pgoff); 180 n = hex2u64(pbf, &event->mmap.pgoff);
165 181
166 size = strlen(execname); 182 size = strlen(execname);
167 execname[size - 1] = '\0'; /* Remove \n */ 183 execname[size - 1] = '\0'; /* Remove \n */
168 memcpy(ev.mmap.filename, execname, size); 184 memcpy(event->mmap.filename, execname, size);
169 size = ALIGN(size, sizeof(u64)); 185 size = ALIGN(size, sizeof(u64));
170 ev.mmap.len -= ev.mmap.start; 186 event->mmap.len -= event->mmap.start;
171 ev.mmap.header.size = (sizeof(ev.mmap) - 187 event->mmap.header.size = (sizeof(event->mmap) -
172 (sizeof(ev.mmap.filename) - size)); 188 (sizeof(event->mmap.filename) - size));
173 ev.mmap.pid = tgid; 189 memset(event->mmap.filename + size, 0, session->id_hdr_size);
174 ev.mmap.tid = pid; 190 event->mmap.header.size += session->id_hdr_size;
175 191 event->mmap.pid = tgid;
176 process(&ev, session); 192 event->mmap.tid = pid;
193
194 process(event, &synth_sample, session);
177 } 195 }
178 } 196 }
179 197
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
187{ 205{
188 struct rb_node *nd; 206 struct rb_node *nd;
189 struct map_groups *kmaps = &machine->kmaps; 207 struct map_groups *kmaps = &machine->kmaps;
190 u16 misc; 208 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
209
210 if (event == NULL) {
211 pr_debug("Not enough memory synthesizing mmap event "
212 "for kernel modules\n");
213 return -1;
214 }
215
216 event->header.type = PERF_RECORD_MMAP;
191 217
192 /* 218 /*
193 * kernel uses 0 for user space maps, see kernel/perf_event.c 219 * kernel uses 0 for user space maps, see kernel/perf_event.c
194 * __perf_event_mmap 220 * __perf_event_mmap
195 */ 221 */
196 if (machine__is_host(machine)) 222 if (machine__is_host(machine))
197 misc = PERF_RECORD_MISC_KERNEL; 223 event->header.misc = PERF_RECORD_MISC_KERNEL;
198 else 224 else
199 misc = PERF_RECORD_MISC_GUEST_KERNEL; 225 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
200 226
201 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); 227 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
202 nd; nd = rb_next(nd)) { 228 nd; nd = rb_next(nd)) {
203 event_t ev;
204 size_t size; 229 size_t size;
205 struct map *pos = rb_entry(nd, struct map, rb_node); 230 struct map *pos = rb_entry(nd, struct map, rb_node);
206 231
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
208 continue; 233 continue;
209 234
210 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); 235 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
211 memset(&ev, 0, sizeof(ev)); 236 event->mmap.header.type = PERF_RECORD_MMAP;
212 ev.mmap.header.misc = misc; 237 event->mmap.header.size = (sizeof(event->mmap) -
213 ev.mmap.header.type = PERF_RECORD_MMAP; 238 (sizeof(event->mmap.filename) - size));
214 ev.mmap.header.size = (sizeof(ev.mmap) - 239 memset(event->mmap.filename + size, 0, session->id_hdr_size);
215 (sizeof(ev.mmap.filename) - size)); 240 event->mmap.header.size += session->id_hdr_size;
216 ev.mmap.start = pos->start; 241 event->mmap.start = pos->start;
217 ev.mmap.len = pos->end - pos->start; 242 event->mmap.len = pos->end - pos->start;
218 ev.mmap.pid = machine->pid; 243 event->mmap.pid = machine->pid;
219 244
220 memcpy(ev.mmap.filename, pos->dso->long_name, 245 memcpy(event->mmap.filename, pos->dso->long_name,
221 pos->dso->long_name_len + 1); 246 pos->dso->long_name_len + 1);
222 process(&ev, session); 247 process(event, &synth_sample, session);
223 } 248 }
224 249
250 free(event);
225 return 0; 251 return 0;
226} 252}
227 253
228int event__synthesize_thread(pid_t pid, event__handler_t process, 254static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
229 struct perf_session *session) 255 pid_t pid, event__handler_t process,
256 struct perf_session *session)
230{ 257{
231 pid_t tgid = event__synthesize_comm(pid, 1, process, session); 258 pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
259 session);
232 if (tgid == -1) 260 if (tgid == -1)
233 return -1; 261 return -1;
234 return event__synthesize_mmap_events(pid, tgid, process, session); 262 return event__synthesize_mmap_events(mmap_event, pid, tgid,
263 process, session);
264}
265
266int event__synthesize_thread(pid_t pid, event__handler_t process,
267 struct perf_session *session)
268{
269 event_t *comm_event, *mmap_event;
270 int err = -1;
271
272 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
273 if (comm_event == NULL)
274 goto out;
275
276 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
277 if (mmap_event == NULL)
278 goto out_free_comm;
279
280 err = __event__synthesize_thread(comm_event, mmap_event, pid,
281 process, session);
282 free(mmap_event);
283out_free_comm:
284 free(comm_event);
285out:
286 return err;
235} 287}
236 288
237void event__synthesize_threads(event__handler_t process, 289int event__synthesize_threads(event__handler_t process,
238 struct perf_session *session) 290 struct perf_session *session)
239{ 291{
240 DIR *proc; 292 DIR *proc;
241 struct dirent dirent, *next; 293 struct dirent dirent, *next;
294 event_t *comm_event, *mmap_event;
295 int err = -1;
296
297 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
298 if (comm_event == NULL)
299 goto out;
300
301 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
302 if (mmap_event == NULL)
303 goto out_free_comm;
242 304
243 proc = opendir("/proc"); 305 proc = opendir("/proc");
306 if (proc == NULL)
307 goto out_free_mmap;
244 308
245 while (!readdir_r(proc, &dirent, &next) && next) { 309 while (!readdir_r(proc, &dirent, &next) && next) {
246 char *end; 310 char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
249 if (*end) /* only interested in proper numerical dirents */ 313 if (*end) /* only interested in proper numerical dirents */
250 continue; 314 continue;
251 315
252 event__synthesize_thread(pid, process, session); 316 __event__synthesize_thread(comm_event, mmap_event, pid,
317 process, session);
253 } 318 }
254 319
255 closedir(proc); 320 closedir(proc);
321 err = 0;
322out_free_mmap:
323 free(mmap_event);
324out_free_comm:
325 free(comm_event);
326out:
327 return err;
256} 328}
257 329
258struct process_symbol_args { 330struct process_symbol_args {
@@ -286,18 +358,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
286 char path[PATH_MAX]; 358 char path[PATH_MAX];
287 char name_buff[PATH_MAX]; 359 char name_buff[PATH_MAX];
288 struct map *map; 360 struct map *map;
289 361 int err;
290 event_t ev = {
291 .header = {
292 .type = PERF_RECORD_MMAP,
293 },
294 };
295 /* 362 /*
296 * We should get this from /sys/kernel/sections/.text, but till that is 363 * We should get this from /sys/kernel/sections/.text, but till that is
297 * available use this, and after it is use this as a fallback for older 364 * available use this, and after it is use this as a fallback for older
298 * kernels. 365 * kernels.
299 */ 366 */
300 struct process_symbol_args args = { .name = symbol_name, }; 367 struct process_symbol_args args = { .name = symbol_name, };
368 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
369
370 if (event == NULL) {
371 pr_debug("Not enough memory synthesizing mmap event "
372 "for kernel modules\n");
373 return -1;
374 }
301 375
302 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); 376 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
303 if (machine__is_host(machine)) { 377 if (machine__is_host(machine)) {
@@ -305,10 +379,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
305 * kernel uses PERF_RECORD_MISC_USER for user space maps, 379 * kernel uses PERF_RECORD_MISC_USER for user space maps,
306 * see kernel/perf_event.c __perf_event_mmap 380 * see kernel/perf_event.c __perf_event_mmap
307 */ 381 */
308 ev.header.misc = PERF_RECORD_MISC_KERNEL; 382 event->header.misc = PERF_RECORD_MISC_KERNEL;
309 filename = "/proc/kallsyms"; 383 filename = "/proc/kallsyms";
310 } else { 384 } else {
311 ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 385 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
312 if (machine__is_default_guest(machine)) 386 if (machine__is_default_guest(machine))
313 filename = (char *) symbol_conf.default_guest_kallsyms; 387 filename = (char *) symbol_conf.default_guest_kallsyms;
314 else { 388 else {
@@ -321,17 +395,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
321 return -ENOENT; 395 return -ENOENT;
322 396
323 map = machine->vmlinux_maps[MAP__FUNCTION]; 397 map = machine->vmlinux_maps[MAP__FUNCTION];
324 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), 398 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
325 "%s%s", mmap_name, symbol_name) + 1; 399 "%s%s", mmap_name, symbol_name) + 1;
326 size = ALIGN(size, sizeof(u64)); 400 size = ALIGN(size, sizeof(u64));
327 ev.mmap.header.size = (sizeof(ev.mmap) - 401 event->mmap.header.type = PERF_RECORD_MMAP;
328 (sizeof(ev.mmap.filename) - size)); 402 event->mmap.header.size = (sizeof(event->mmap) -
329 ev.mmap.pgoff = args.start; 403 (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
330 ev.mmap.start = map->start; 404 event->mmap.pgoff = args.start;
331 ev.mmap.len = map->end - ev.mmap.start; 405 event->mmap.start = map->start;
332 ev.mmap.pid = machine->pid; 406 event->mmap.len = map->end - event->mmap.start;
333 407 event->mmap.pid = machine->pid;
334 return process(&ev, session); 408
409 err = process(event, &synth_sample, session);
410 free(event);
411
412 return err;
335} 413}
336 414
337static void thread__comm_adjust(struct thread *self, struct hists *hists) 415static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +439,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
361 return 0; 439 return 0;
362} 440}
363 441
364int event__process_comm(event_t *self, struct perf_session *session) 442int event__process_comm(event_t *self, struct sample_data *sample __used,
443 struct perf_session *session)
365{ 444{
366 struct thread *thread = perf_session__findnew(session, self->comm.tid); 445 struct thread *thread = perf_session__findnew(session, self->comm.tid);
367 446
@@ -376,7 +455,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
376 return 0; 455 return 0;
377} 456}
378 457
379int event__process_lost(event_t *self, struct perf_session *session) 458int event__process_lost(event_t *self, struct sample_data *sample __used,
459 struct perf_session *session)
380{ 460{
381 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); 461 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
382 session->hists.stats.total_lost += self->lost.lost; 462 session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +472,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
392 * a zero sized synthesized MMAP event for the kernel. 472 * a zero sized synthesized MMAP event for the kernel.
393 */ 473 */
394 if (maps[MAP__FUNCTION]->end == 0) 474 if (maps[MAP__FUNCTION]->end == 0)
395 maps[MAP__FUNCTION]->end = ~0UL; 475 maps[MAP__FUNCTION]->end = ~0ULL;
396} 476}
397 477
398static int event__process_kernel_mmap(event_t *self, 478static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +565,8 @@ out_problem:
485 return -1; 565 return -1;
486} 566}
487 567
488int event__process_mmap(event_t *self, struct perf_session *session) 568int event__process_mmap(event_t *self, struct sample_data *sample __used,
569 struct perf_session *session)
489{ 570{
490 struct machine *machine; 571 struct machine *machine;
491 struct thread *thread; 572 struct thread *thread;
@@ -526,7 +607,8 @@ out_problem:
526 return 0; 607 return 0;
527} 608}
528 609
529int event__process_task(event_t *self, struct perf_session *session) 610int event__process_task(event_t *self, struct sample_data *sample __used,
611 struct perf_session *session)
530{ 612{
531 struct thread *thread = perf_session__findnew(session, self->fork.tid); 613 struct thread *thread = perf_session__findnew(session, self->fork.tid);
532 struct thread *parent = perf_session__findnew(session, self->fork.ptid); 614 struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +630,19 @@ int event__process_task(event_t *self, struct perf_session *session)
548 return 0; 630 return 0;
549} 631}
550 632
551int event__process(event_t *event, struct perf_session *session) 633int event__process(event_t *event, struct sample_data *sample,
634 struct perf_session *session)
552{ 635{
553 switch (event->header.type) { 636 switch (event->header.type) {
554 case PERF_RECORD_COMM: 637 case PERF_RECORD_COMM:
555 event__process_comm(event, session); 638 event__process_comm(event, sample, session);
556 break; 639 break;
557 case PERF_RECORD_MMAP: 640 case PERF_RECORD_MMAP:
558 event__process_mmap(event, session); 641 event__process_mmap(event, sample, session);
559 break; 642 break;
560 case PERF_RECORD_FORK: 643 case PERF_RECORD_FORK:
561 case PERF_RECORD_EXIT: 644 case PERF_RECORD_EXIT:
562 event__process_task(event, session); 645 event__process_task(event, sample, session);
563 break; 646 break;
564 default: 647 default:
565 break; 648 break;
@@ -674,32 +757,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
674 symbol_filter_t filter) 757 symbol_filter_t filter)
675{ 758{
676 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 759 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
677 struct thread *thread; 760 struct thread *thread = perf_session__findnew(session, self->ip.pid);
678
679 event__parse_sample(self, session->sample_type, data);
680
681 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
682 self->header.misc, data->pid, data->tid, data->ip,
683 data->period, data->cpu);
684
685 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
686 unsigned int i;
687
688 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
689 761
690 if (!ip_callchain__valid(data->callchain, self)) {
691 pr_debug("call-chain problem with event, "
692 "skipping it.\n");
693 goto out_filtered;
694 }
695
696 if (dump_trace) {
697 for (i = 0; i < data->callchain->nr; i++)
698 dump_printf("..... %2d: %016Lx\n",
699 i, data->callchain->ips[i]);
700 }
701 }
702 thread = perf_session__findnew(session, self->ip.pid);
703 if (thread == NULL) 762 if (thread == NULL)
704 return -1; 763 return -1;
705 764
@@ -766,9 +825,65 @@ out_filtered:
766 return 0; 825 return 0;
767} 826}
768 827
769int event__parse_sample(const event_t *event, u64 type, struct sample_data *data) 828static int event__parse_id_sample(const event_t *event,
829 struct perf_session *session,
830 struct sample_data *sample)
770{ 831{
771 const u64 *array = event->sample.array; 832 const u64 *array;
833 u64 type;
834
835 sample->cpu = sample->pid = sample->tid = -1;
836 sample->stream_id = sample->id = sample->time = -1ULL;
837
838 if (!session->sample_id_all)
839 return 0;
840
841 array = event->sample.array;
842 array += ((event->header.size -
843 sizeof(event->header)) / sizeof(u64)) - 1;
844 type = session->sample_type;
845
846 if (type & PERF_SAMPLE_CPU) {
847 u32 *p = (u32 *)array;
848 sample->cpu = *p;
849 array--;
850 }
851
852 if (type & PERF_SAMPLE_STREAM_ID) {
853 sample->stream_id = *array;
854 array--;
855 }
856
857 if (type & PERF_SAMPLE_ID) {
858 sample->id = *array;
859 array--;
860 }
861
862 if (type & PERF_SAMPLE_TIME) {
863 sample->time = *array;
864 array--;
865 }
866
867 if (type & PERF_SAMPLE_TID) {
868 u32 *p = (u32 *)array;
869 sample->pid = p[0];
870 sample->tid = p[1];
871 }
872
873 return 0;
874}
875
876int event__parse_sample(const event_t *event, struct perf_session *session,
877 struct sample_data *data)
878{
879 const u64 *array;
880 u64 type;
881
882 if (event->header.type != PERF_RECORD_SAMPLE)
883 return event__parse_id_sample(event, session, data);
884
885 array = event->sample.array;
886 type = session->sample_type;
772 887
773 if (type & PERF_SAMPLE_IP) { 888 if (type & PERF_SAMPLE_IP) {
774 data->ip = event->ip.ip; 889 data->ip = event->ip.ip;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8e790dae7026..2b7e91902f10 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -85,6 +85,7 @@ struct build_id_event {
85}; 85};
86 86
87enum perf_user_event_type { /* above any possible kernel type */ 87enum perf_user_event_type { /* above any possible kernel type */
88 PERF_RECORD_USER_TYPE_START = 64,
88 PERF_RECORD_HEADER_ATTR = 64, 89 PERF_RECORD_HEADER_ATTR = 64,
89 PERF_RECORD_HEADER_EVENT_TYPE = 65, 90 PERF_RECORD_HEADER_EVENT_TYPE = 65,
90 PERF_RECORD_HEADER_TRACING_DATA = 66, 91 PERF_RECORD_HEADER_TRACING_DATA = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
135 136
136struct perf_session; 137struct perf_session;
137 138
138typedef int (*event__handler_t)(event_t *event, struct perf_session *session); 139typedef int (*event__handler_synth_t)(event_t *event,
140 struct perf_session *session);
141typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
142 struct perf_session *session);
139 143
140int event__synthesize_thread(pid_t pid, event__handler_t process, 144int event__synthesize_thread(pid_t pid, event__handler_t process,
141 struct perf_session *session); 145 struct perf_session *session);
142void event__synthesize_threads(event__handler_t process, 146int event__synthesize_threads(event__handler_t process,
143 struct perf_session *session); 147 struct perf_session *session);
144int event__synthesize_kernel_mmap(event__handler_t process, 148int event__synthesize_kernel_mmap(event__handler_t process,
145 struct perf_session *session, 149 struct perf_session *session,
146 struct machine *machine, 150 struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
150 struct perf_session *session, 154 struct perf_session *session,
151 struct machine *machine); 155 struct machine *machine);
152 156
153int event__process_comm(event_t *self, struct perf_session *session); 157int event__process_comm(event_t *self, struct sample_data *sample,
154int event__process_lost(event_t *self, struct perf_session *session); 158 struct perf_session *session);
155int event__process_mmap(event_t *self, struct perf_session *session); 159int event__process_lost(event_t *self, struct sample_data *sample,
156int event__process_task(event_t *self, struct perf_session *session); 160 struct perf_session *session);
157int event__process(event_t *event, struct perf_session *session); 161int event__process_mmap(event_t *self, struct sample_data *sample,
162 struct perf_session *session);
163int event__process_task(event_t *self, struct sample_data *sample,
164 struct perf_session *session);
165int event__process(event_t *event, struct sample_data *sample,
166 struct perf_session *session);
158 167
159struct addr_location; 168struct addr_location;
160int event__preprocess_sample(const event_t *self, struct perf_session *session, 169int event__preprocess_sample(const event_t *self, struct perf_session *session,
161 struct addr_location *al, struct sample_data *data, 170 struct addr_location *al, struct sample_data *data,
162 symbol_filter_t filter); 171 symbol_filter_t filter);
163int event__parse_sample(const event_t *event, u64 type, struct sample_data *data); 172int event__parse_sample(const event_t *event, struct perf_session *session,
173 struct sample_data *sample);
164 174
165extern const char *event__name[]; 175const char *event__get_event_name(unsigned int id);
166 176
167#endif /* __PERF_RECORD_H */ 177#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 64a85bafde63..16a16021eaa6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
152 set_bit(feat, self->adds_features); 152 set_bit(feat, self->adds_features);
153} 153}
154 154
155void perf_header__clear_feat(struct perf_header *self, int feat)
156{
157 clear_bit(feat, self->adds_features);
158}
159
155bool perf_header__has_feat(const struct perf_header *self, int feat) 160bool perf_header__has_feat(const struct perf_header *self, int feat)
156{ 161{
157 return test_bit(feat, self->adds_features); 162 return test_bit(feat, self->adds_features);
@@ -431,8 +436,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
431 int idx = 0, err; 436 int idx = 0, err;
432 437
433 session = container_of(self, struct perf_session, header); 438 session = container_of(self, struct perf_session, header);
434 if (perf_session__read_build_ids(session, true)) 439
435 perf_header__set_feat(self, HEADER_BUILD_ID); 440 if (perf_header__has_feat(self, HEADER_BUILD_ID &&
441 !perf_session__read_build_ids(session, true)))
442 perf_header__clear_feat(self, HEADER_BUILD_ID);
436 443
437 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 444 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
438 if (!nr_sections) 445 if (!nr_sections)
@@ -939,6 +946,24 @@ u64 perf_header__sample_type(struct perf_header *header)
939 return type; 946 return type;
940} 947}
941 948
949bool perf_header__sample_id_all(const struct perf_header *header)
950{
951 bool value = false, first = true;
952 int i;
953
954 for (i = 0; i < header->attrs; i++) {
955 struct perf_header_attr *attr = header->attr[i];
956
957 if (first) {
958 value = attr->attr.sample_id_all;
959 first = false;
960 } else if (value != attr->attr.sample_id_all)
961 die("non matching sample_id_all");
962 }
963
964 return value;
965}
966
942struct perf_event_attr * 967struct perf_event_attr *
943perf_header__find_attr(u64 id, struct perf_header *header) 968perf_header__find_attr(u64 id, struct perf_header *header)
944{ 969{
@@ -985,21 +1010,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
985 1010
986 ev = malloc(size); 1011 ev = malloc(size);
987 1012
1013 if (ev == NULL)
1014 return -ENOMEM;
1015
988 ev->attr.attr = *attr; 1016 ev->attr.attr = *attr;
989 memcpy(ev->attr.id, id, ids * sizeof(u64)); 1017 memcpy(ev->attr.id, id, ids * sizeof(u64));
990 1018
991 ev->attr.header.type = PERF_RECORD_HEADER_ATTR; 1019 ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
992 ev->attr.header.size = size; 1020 ev->attr.header.size = size;
993 1021
994 err = process(ev, session); 1022 err = process(ev, NULL, session);
995 1023
996 free(ev); 1024 free(ev);
997 1025
998 return err; 1026 return err;
999} 1027}
1000 1028
1001int event__synthesize_attrs(struct perf_header *self, 1029int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
1002 event__handler_t process,
1003 struct perf_session *session) 1030 struct perf_session *session)
1004{ 1031{
1005 struct perf_header_attr *attr; 1032 struct perf_header_attr *attr;
@@ -1069,7 +1096,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
1069 ev.event_type.header.size = sizeof(ev.event_type) - 1096 ev.event_type.header.size = sizeof(ev.event_type) -
1070 (sizeof(ev.event_type.event_type.name) - size); 1097 (sizeof(ev.event_type.event_type.name) - size);
1071 1098
1072 err = process(&ev, session); 1099 err = process(&ev, NULL, session);
1073 1100
1074 return err; 1101 return err;
1075} 1102}
@@ -1124,7 +1151,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
1124 ev.tracing_data.header.size = sizeof(ev.tracing_data); 1151 ev.tracing_data.header.size = sizeof(ev.tracing_data);
1125 ev.tracing_data.size = aligned_size; 1152 ev.tracing_data.size = aligned_size;
1126 1153
1127 process(&ev, session); 1154 process(&ev, NULL, session);
1128 1155
1129 err = read_tracing_data(fd, pattrs, nb_events); 1156 err = read_tracing_data(fd, pattrs, nb_events);
1130 write_padded(fd, NULL, 0, padding); 1157 write_padded(fd, NULL, 0, padding);
@@ -1184,7 +1211,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
1184 ev.build_id.header.size = sizeof(ev.build_id) + len; 1211 ev.build_id.header.size = sizeof(ev.build_id) + len;
1185 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); 1212 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
1186 1213
1187 err = process(&ev, session); 1214 err = process(&ev, NULL, session);
1188 1215
1189 return err; 1216 return err;
1190} 1217}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 402ac2454cf8..6335965e1f93 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
82 82
83u64 perf_header__sample_type(struct perf_header *header); 83u64 perf_header__sample_type(struct perf_header *header);
84bool perf_header__sample_id_all(const struct perf_header *header);
84struct perf_event_attr * 85struct perf_event_attr *
85perf_header__find_attr(u64 id, struct perf_header *header); 86perf_header__find_attr(u64 id, struct perf_header *header);
86void perf_header__set_feat(struct perf_header *self, int feat); 87void perf_header__set_feat(struct perf_header *self, int feat);
88void perf_header__clear_feat(struct perf_header *self, int feat);
87bool perf_header__has_feat(const struct perf_header *self, int feat); 89bool perf_header__has_feat(const struct perf_header *self, int feat);
88 90
89int perf_header__process_sections(struct perf_header *self, int fd, 91int perf_header__process_sections(struct perf_header *self, int fd,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2022e8740994..a3b84160c42e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1168,10 +1168,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
1168 size_t ret = 0; 1168 size_t ret = 0;
1169 1169
1170 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 1170 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
1171 if (!event__name[i]) 1171 const char *name = event__get_event_name(i);
1172
1173 if (!strcmp(name, "UNKNOWN"))
1172 continue; 1174 continue;
1173 ret += fprintf(fp, "%10s events: %10d\n", 1175
1174 event__name[i], self->stats.nr_events[i]); 1176 ret += fprintf(fp, "%16s events: %10d\n", name,
1177 self->stats.nr_events[i]);
1175 } 1178 }
1176 1179
1177 return ret; 1180 return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 587d375d3430..ee789856a8c9 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,8 +52,10 @@ struct sym_priv {
52struct events_stats { 52struct events_stats {
53 u64 total_period; 53 u64 total_period;
54 u64 total_lost; 54 u64 total_lost;
55 u64 total_invalid_chains;
55 u32 nr_events[PERF_RECORD_HEADER_MAX]; 56 u32 nr_events[PERF_RECORD_HEADER_MAX];
56 u32 nr_unknown_events; 57 u32 nr_unknown_events;
58 u32 nr_invalid_chains;
57}; 59};
58 60
59enum hist_column { 61enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000000..acffd5e4d1d4
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,9 @@
1
2#ifndef PERF_CPUFEATURE_H
3#define PERF_CPUFEATURE_H
4
5/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define X86_FEATURE_REP_GOOD 0
8
9#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000000..bb4198e7837a
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,11 @@
1
2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H
4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define CFI_STARTPROC
8#define CFI_ENDPROC
9
10#endif /* PERF_DWARF2_H */
11
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index bb4ac2e05385..8be0b968ca0b 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); 13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
14} 14}
15 15
16static inline void clear_bit(int nr, unsigned long *addr)
17{
18 addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
19}
20
16static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) 21static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
17{ 22{
18 return ((1UL << (nr % BITS_PER_LONG)) & 23 return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000000..06387cffe125
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,13 @@
1
2#ifndef PERF_LINUX_LINKAGE_H_
3#define PERF_LINUX_LINKAGE_H_
4
5/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
6
7#define ENTRY(name) \
8 .globl name; \
9 name:
10
11#define ENDPROC(name)
12
13#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4af5bd59cfd1..c305305a3884 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
434 id = atoll(id_buf); 434 id = atoll(id_buf);
435 attr->config = id; 435 attr->config = id;
436 attr->type = PERF_TYPE_TRACEPOINT; 436 attr->type = PERF_TYPE_TRACEPOINT;
437 *strp = evt_name + evt_length; 437 *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
438 438
439 attr->sample_type |= PERF_SAMPLE_RAW; 439 attr->sample_type |= PERF_SAMPLE_RAW;
440 attr->sample_type |= PERF_SAMPLE_TIME; 440 attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
495 struct perf_event_attr *attr) 495 struct perf_event_attr *attr)
496{ 496{
497 const char *evt_name; 497 const char *evt_name;
498 char *flags; 498 char *flags = NULL, *comma_loc;
499 char sys_name[MAX_EVENT_LENGTH]; 499 char sys_name[MAX_EVENT_LENGTH];
500 unsigned int sys_length, evt_length; 500 unsigned int sys_length, evt_length;
501 501
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
514 sys_name[sys_length] = '\0'; 514 sys_name[sys_length] = '\0';
515 evt_name = evt_name + 1; 515 evt_name = evt_name + 1;
516 516
517 comma_loc = strchr(evt_name, ',');
518 if (comma_loc) {
519 /* take the event name up to the comma */
520 evt_name = strndup(evt_name, comma_loc - evt_name);
521 }
517 flags = strchr(evt_name, ':'); 522 flags = strchr(evt_name, ':');
518 if (flags) { 523 if (flags) {
519 /* split it out: */ 524 /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
524 evt_length = strlen(evt_name); 529 evt_length = strlen(evt_name);
525 if (evt_length >= MAX_EVENT_LENGTH) 530 if (evt_length >= MAX_EVENT_LENGTH)
526 return EVT_FAILED; 531 return EVT_FAILED;
527
528 if (strpbrk(evt_name, "*?")) { 532 if (strpbrk(evt_name, "*?")) {
529 *strp = evt_name + evt_length; 533 *strp += strlen(sys_name) + evt_length;
530 return parse_multiple_tracepoint_event(sys_name, evt_name, 534 return parse_multiple_tracepoint_event(sys_name, evt_name,
531 flags); 535 flags);
532 } else 536 } else
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index c7d72dce54b2..abc31a1dac1a 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -119,6 +119,10 @@ struct option {
119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } 119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ 120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } 121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
122#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
123 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
124 .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
125 .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
122 126
123/* parse_options() will filter out the processed options and leave the 127/* parse_options() will filter out the processed options and leave the
124 * non-option argments in argv[]. 128 * non-option argments in argv[].
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bba69d455699..beaefc3c1223 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
34 bool externs); 34 bool externs);
35 35
36#include <dwarf.h> 36#include <dwarf.h>
37#include <libdw.h> 37#include <elfutils/libdw.h>
38#include <libdwfl.h> 38#include <elfutils/libdwfl.h>
39#include <version.h> 39#include <elfutils/version.h>
40 40
41struct probe_finder { 41struct probe_finder {
42 struct perf_probe_event *pev; /* Target probe event */ 42 struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b059dc50cc2d..93680818e244 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. 2 * trace-event-perl. Feed perf script events to an embedded Perl interpreter.
3 * 3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
411 return -1; 411 return -1;
412 } 412 }
413 413
414 fprintf(ofp, "# perf trace event handlers, " 414 fprintf(ofp, "# perf script event handlers, "
415 "generated by perf trace -g perl\n"); 415 "generated by perf script -g perl\n");
416 416
417 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 417 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
418 " License version 2\n\n"); 418 " License version 2\n\n");
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 33a632523743..c6d99334bdfa 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
442 fprintf(stderr, "couldn't open %s\n", fname); 442 fprintf(stderr, "couldn't open %s\n", fname);
443 return -1; 443 return -1;
444 } 444 }
445 fprintf(ofp, "# perf trace event handlers, " 445 fprintf(ofp, "# perf script event handlers, "
446 "generated by perf trace -g python\n"); 446 "generated by perf script -g python\n");
447 447
448 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 448 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
449 " License version 2\n\n"); 449 " License version 2\n\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fa9d652c2dc3..b59abf5aba36 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -65,9 +65,49 @@ out_close:
65 return -1; 65 return -1;
66} 66}
67 67
68static void perf_session__id_header_size(struct perf_session *session)
69{
70 struct sample_data *data;
71 u64 sample_type = session->sample_type;
72 u16 size = 0;
73
74 if (!session->sample_id_all)
75 goto out;
76
77 if (sample_type & PERF_SAMPLE_TID)
78 size += sizeof(data->tid) * 2;
79
80 if (sample_type & PERF_SAMPLE_TIME)
81 size += sizeof(data->time);
82
83 if (sample_type & PERF_SAMPLE_ID)
84 size += sizeof(data->id);
85
86 if (sample_type & PERF_SAMPLE_STREAM_ID)
87 size += sizeof(data->stream_id);
88
89 if (sample_type & PERF_SAMPLE_CPU)
90 size += sizeof(data->cpu) * 2;
91out:
92 session->id_hdr_size = size;
93}
94
95void perf_session__set_sample_id_all(struct perf_session *session, bool value)
96{
97 session->sample_id_all = value;
98 perf_session__id_header_size(session);
99}
100
101void perf_session__set_sample_type(struct perf_session *session, u64 type)
102{
103 session->sample_type = type;
104}
105
68void perf_session__update_sample_type(struct perf_session *self) 106void perf_session__update_sample_type(struct perf_session *self)
69{ 107{
70 self->sample_type = perf_header__sample_type(&self->header); 108 self->sample_type = perf_header__sample_type(&self->header);
109 self->sample_id_all = perf_header__sample_id_all(&self->header);
110 perf_session__id_header_size(self);
71} 111}
72 112
73int perf_session__create_kernel_maps(struct perf_session *self) 113int perf_session__create_kernel_maps(struct perf_session *self)
@@ -101,10 +141,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
101 INIT_LIST_HEAD(&self->dead_threads); 141 INIT_LIST_HEAD(&self->dead_threads);
102 self->hists_tree = RB_ROOT; 142 self->hists_tree = RB_ROOT;
103 self->last_match = NULL; 143 self->last_match = NULL;
104 self->mmap_window = 32; 144 /*
145 * On 64bit we can mmap the data file in one go. No need for tiny mmap
146 * slices. On 32bit we use 32MB.
147 */
148#if BITS_PER_LONG == 64
149 self->mmap_window = ULLONG_MAX;
150#else
151 self->mmap_window = 32 * 1024 * 1024ULL;
152#endif
105 self->machines = RB_ROOT; 153 self->machines = RB_ROOT;
106 self->repipe = repipe; 154 self->repipe = repipe;
107 INIT_LIST_HEAD(&self->ordered_samples.samples_head); 155 INIT_LIST_HEAD(&self->ordered_samples.samples);
156 INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
157 INIT_LIST_HEAD(&self->ordered_samples.to_free);
108 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 158 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
109 159
110 if (mode == O_RDONLY) { 160 if (mode == O_RDONLY) {
@@ -230,7 +280,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
230 return syms; 280 return syms;
231} 281}
232 282
283static int process_event_synth_stub(event_t *event __used,
284 struct perf_session *session __used)
285{
286 dump_printf(": unhandled!\n");
287 return 0;
288}
289
233static int process_event_stub(event_t *event __used, 290static int process_event_stub(event_t *event __used,
291 struct sample_data *sample __used,
234 struct perf_session *session __used) 292 struct perf_session *session __used)
235{ 293{
236 dump_printf(": unhandled!\n"); 294 dump_printf(": unhandled!\n");
@@ -262,7 +320,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
262 if (handler->exit == NULL) 320 if (handler->exit == NULL)
263 handler->exit = process_event_stub; 321 handler->exit = process_event_stub;
264 if (handler->lost == NULL) 322 if (handler->lost == NULL)
265 handler->lost = process_event_stub; 323 handler->lost = event__process_lost;
266 if (handler->read == NULL) 324 if (handler->read == NULL)
267 handler->read = process_event_stub; 325 handler->read = process_event_stub;
268 if (handler->throttle == NULL) 326 if (handler->throttle == NULL)
@@ -270,13 +328,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
270 if (handler->unthrottle == NULL) 328 if (handler->unthrottle == NULL)
271 handler->unthrottle = process_event_stub; 329 handler->unthrottle = process_event_stub;
272 if (handler->attr == NULL) 330 if (handler->attr == NULL)
273 handler->attr = process_event_stub; 331 handler->attr = process_event_synth_stub;
274 if (handler->event_type == NULL) 332 if (handler->event_type == NULL)
275 handler->event_type = process_event_stub; 333 handler->event_type = process_event_synth_stub;
276 if (handler->tracing_data == NULL) 334 if (handler->tracing_data == NULL)
277 handler->tracing_data = process_event_stub; 335 handler->tracing_data = process_event_synth_stub;
278 if (handler->build_id == NULL) 336 if (handler->build_id == NULL)
279 handler->build_id = process_event_stub; 337 handler->build_id = process_event_synth_stub;
280 if (handler->finished_round == NULL) { 338 if (handler->finished_round == NULL) {
281 if (handler->ordered_samples) 339 if (handler->ordered_samples)
282 handler->finished_round = process_finished_round; 340 handler->finished_round = process_finished_round;
@@ -386,33 +444,61 @@ static event__swap_op event__swap_ops[] = {
386 444
387struct sample_queue { 445struct sample_queue {
388 u64 timestamp; 446 u64 timestamp;
389 struct sample_event *event; 447 u64 file_offset;
448 event_t *event;
390 struct list_head list; 449 struct list_head list;
391}; 450};
392 451
452static void perf_session_free_sample_buffers(struct perf_session *session)
453{
454 struct ordered_samples *os = &session->ordered_samples;
455
456 while (!list_empty(&os->to_free)) {
457 struct sample_queue *sq;
458
459 sq = list_entry(os->to_free.next, struct sample_queue, list);
460 list_del(&sq->list);
461 free(sq);
462 }
463}
464
465static int perf_session_deliver_event(struct perf_session *session,
466 event_t *event,
467 struct sample_data *sample,
468 struct perf_event_ops *ops,
469 u64 file_offset);
470
393static void flush_sample_queue(struct perf_session *s, 471static void flush_sample_queue(struct perf_session *s,
394 struct perf_event_ops *ops) 472 struct perf_event_ops *ops)
395{ 473{
396 struct list_head *head = &s->ordered_samples.samples_head; 474 struct ordered_samples *os = &s->ordered_samples;
397 u64 limit = s->ordered_samples.next_flush; 475 struct list_head *head = &os->samples;
398 struct sample_queue *tmp, *iter; 476 struct sample_queue *tmp, *iter;
477 struct sample_data sample;
478 u64 limit = os->next_flush;
479 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
399 480
400 if (!ops->ordered_samples || !limit) 481 if (!ops->ordered_samples || !limit)
401 return; 482 return;
402 483
403 list_for_each_entry_safe(iter, tmp, head, list) { 484 list_for_each_entry_safe(iter, tmp, head, list) {
404 if (iter->timestamp > limit) 485 if (iter->timestamp > limit)
405 return; 486 break;
406
407 if (iter == s->ordered_samples.last_inserted)
408 s->ordered_samples.last_inserted = NULL;
409 487
410 ops->sample((event_t *)iter->event, s); 488 event__parse_sample(iter->event, s, &sample);
489 perf_session_deliver_event(s, iter->event, &sample, ops,
490 iter->file_offset);
411 491
412 s->ordered_samples.last_flush = iter->timestamp; 492 os->last_flush = iter->timestamp;
413 list_del(&iter->list); 493 list_del(&iter->list);
414 free(iter->event); 494 list_add(&iter->list, &os->sample_cache);
415 free(iter); 495 }
496
497 if (list_empty(head)) {
498 os->last_sample = NULL;
499 } else if (last_ts <= limit) {
500 os->last_sample =
501 list_entry(head->prev, struct sample_queue, list);
416 } 502 }
417} 503}
418 504
@@ -465,178 +551,265 @@ static int process_finished_round(event_t *event __used,
465 return 0; 551 return 0;
466} 552}
467 553
468static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
469{
470 struct sample_queue *iter;
471
472 list_for_each_entry_reverse(iter, head, list) {
473 if (iter->timestamp < new->timestamp) {
474 list_add(&new->list, &iter->list);
475 return;
476 }
477 }
478
479 list_add(&new->list, head);
480}
481
482static void __queue_sample_before(struct sample_queue *new,
483 struct sample_queue *iter,
484 struct list_head *head)
485{
486 list_for_each_entry_continue_reverse(iter, head, list) {
487 if (iter->timestamp < new->timestamp) {
488 list_add(&new->list, &iter->list);
489 return;
490 }
491 }
492
493 list_add(&new->list, head);
494}
495
496static void __queue_sample_after(struct sample_queue *new,
497 struct sample_queue *iter,
498 struct list_head *head)
499{
500 list_for_each_entry_continue(iter, head, list) {
501 if (iter->timestamp > new->timestamp) {
502 list_add_tail(&new->list, &iter->list);
503 return;
504 }
505 }
506 list_add_tail(&new->list, head);
507}
508
509/* The queue is ordered by time */ 554/* The queue is ordered by time */
510static void __queue_sample_event(struct sample_queue *new, 555static void __queue_event(struct sample_queue *new, struct perf_session *s)
511 struct perf_session *s)
512{ 556{
513 struct sample_queue *last_inserted = s->ordered_samples.last_inserted; 557 struct ordered_samples *os = &s->ordered_samples;
514 struct list_head *head = &s->ordered_samples.samples_head; 558 struct sample_queue *sample = os->last_sample;
559 u64 timestamp = new->timestamp;
560 struct list_head *p;
515 561
562 os->last_sample = new;
516 563
517 if (!last_inserted) { 564 if (!sample) {
518 __queue_sample_end(new, head); 565 list_add(&new->list, &os->samples);
566 os->max_timestamp = timestamp;
519 return; 567 return;
520 } 568 }
521 569
522 /* 570 /*
523 * Most of the time the current event has a timestamp 571 * last_sample might point to some random place in the list as it's
524 * very close to the last event inserted, unless we just switched 572 * the last queued event. We expect that the new event is close to
525 * to another event buffer. Having a sorting based on a list and 573 * this.
526 * on the last inserted event that is close to the current one is
527 * probably more efficient than an rbtree based sorting.
528 */ 574 */
529 if (last_inserted->timestamp >= new->timestamp) 575 if (sample->timestamp <= timestamp) {
530 __queue_sample_before(new, last_inserted, head); 576 while (sample->timestamp <= timestamp) {
531 else 577 p = sample->list.next;
532 __queue_sample_after(new, last_inserted, head); 578 if (p == &os->samples) {
579 list_add_tail(&new->list, &os->samples);
580 os->max_timestamp = timestamp;
581 return;
582 }
583 sample = list_entry(p, struct sample_queue, list);
584 }
585 list_add_tail(&new->list, &sample->list);
586 } else {
587 while (sample->timestamp > timestamp) {
588 p = sample->list.prev;
589 if (p == &os->samples) {
590 list_add(&new->list, &os->samples);
591 return;
592 }
593 sample = list_entry(p, struct sample_queue, list);
594 }
595 list_add(&new->list, &sample->list);
596 }
533} 597}
534 598
535static int queue_sample_event(event_t *event, struct sample_data *data, 599#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
536 struct perf_session *s) 600
601static int perf_session_queue_event(struct perf_session *s, event_t *event,
602 struct sample_data *data, u64 file_offset)
537{ 603{
604 struct ordered_samples *os = &s->ordered_samples;
605 struct list_head *sc = &os->sample_cache;
538 u64 timestamp = data->time; 606 u64 timestamp = data->time;
539 struct sample_queue *new; 607 struct sample_queue *new;
540 608
609 if (!timestamp || timestamp == ~0ULL)
610 return -ETIME;
541 611
542 if (timestamp < s->ordered_samples.last_flush) { 612 if (timestamp < s->ordered_samples.last_flush) {
543 printf("Warning: Timestamp below last timeslice flush\n"); 613 printf("Warning: Timestamp below last timeslice flush\n");
544 return -EINVAL; 614 return -EINVAL;
545 } 615 }
546 616
547 new = malloc(sizeof(*new)); 617 if (!list_empty(sc)) {
548 if (!new) 618 new = list_entry(sc->next, struct sample_queue, list);
549 return -ENOMEM; 619 list_del(&new->list);
620 } else if (os->sample_buffer) {
621 new = os->sample_buffer + os->sample_buffer_idx;
622 if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
623 os->sample_buffer = NULL;
624 } else {
625 os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
626 if (!os->sample_buffer)
627 return -ENOMEM;
628 list_add(&os->sample_buffer->list, &os->to_free);
629 os->sample_buffer_idx = 2;
630 new = os->sample_buffer + 1;
631 }
550 632
551 new->timestamp = timestamp; 633 new->timestamp = timestamp;
634 new->file_offset = file_offset;
635 new->event = event;
552 636
553 new->event = malloc(event->header.size); 637 __queue_event(new, s);
554 if (!new->event) {
555 free(new);
556 return -ENOMEM;
557 }
558 638
559 memcpy(new->event, event, event->header.size); 639 return 0;
640}
560 641
561 __queue_sample_event(new, s); 642static void callchain__printf(struct sample_data *sample)
562 s->ordered_samples.last_inserted = new; 643{
644 unsigned int i;
563 645
564 if (new->timestamp > s->ordered_samples.max_timestamp) 646 printf("... chain: nr:%Lu\n", sample->callchain->nr);
565 s->ordered_samples.max_timestamp = new->timestamp;
566 647
567 return 0; 648 for (i = 0; i < sample->callchain->nr; i++)
649 printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
568} 650}
569 651
570static int perf_session__process_sample(event_t *event, struct perf_session *s, 652static void perf_session__print_tstamp(struct perf_session *session,
571 struct perf_event_ops *ops) 653 event_t *event,
654 struct sample_data *sample)
572{ 655{
573 struct sample_data data; 656 if (event->header.type != PERF_RECORD_SAMPLE &&
657 !session->sample_id_all) {
658 fputs("-1 -1 ", stdout);
659 return;
660 }
574 661
575 if (!ops->ordered_samples) 662 if ((session->sample_type & PERF_SAMPLE_CPU))
576 return ops->sample(event, s); 663 printf("%u ", sample->cpu);
577 664
578 bzero(&data, sizeof(struct sample_data)); 665 if (session->sample_type & PERF_SAMPLE_TIME)
579 event__parse_sample(event, s->sample_type, &data); 666 printf("%Lu ", sample->time);
667}
580 668
581 queue_sample_event(event, &data, s); 669static void dump_event(struct perf_session *session, event_t *event,
670 u64 file_offset, struct sample_data *sample)
671{
672 if (!dump_trace)
673 return;
582 674
583 return 0; 675 printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
676 event->header.type);
677
678 trace_event(event);
679
680 if (sample)
681 perf_session__print_tstamp(session, event, sample);
682
683 printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
684 event__get_event_name(event->header.type));
584} 685}
585 686
586static int perf_session__process_event(struct perf_session *self, 687static void dump_sample(struct perf_session *session, event_t *event,
587 event_t *event, 688 struct sample_data *sample)
588 struct perf_event_ops *ops,
589 u64 offset, u64 head)
590{ 689{
591 trace_event(event); 690 if (!dump_trace)
691 return;
592 692
593 if (event->header.type < PERF_RECORD_HEADER_MAX) { 693 printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
594 dump_printf("%#Lx [%#x]: PERF_RECORD_%s", 694 sample->pid, sample->tid, sample->ip, sample->period);
595 offset + head, event->header.size,
596 event__name[event->header.type]);
597 hists__inc_nr_events(&self->hists, event->header.type);
598 }
599 695
600 if (self->header.needs_swap && event__swap_ops[event->header.type]) 696 if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
601 event__swap_ops[event->header.type](event); 697 callchain__printf(sample);
698}
699
700static int perf_session_deliver_event(struct perf_session *session,
701 event_t *event,
702 struct sample_data *sample,
703 struct perf_event_ops *ops,
704 u64 file_offset)
705{
706 dump_event(session, event, file_offset, sample);
602 707
603 switch (event->header.type) { 708 switch (event->header.type) {
604 case PERF_RECORD_SAMPLE: 709 case PERF_RECORD_SAMPLE:
605 return perf_session__process_sample(event, self, ops); 710 dump_sample(session, event, sample);
711 return ops->sample(event, sample, session);
606 case PERF_RECORD_MMAP: 712 case PERF_RECORD_MMAP:
607 return ops->mmap(event, self); 713 return ops->mmap(event, sample, session);
608 case PERF_RECORD_COMM: 714 case PERF_RECORD_COMM:
609 return ops->comm(event, self); 715 return ops->comm(event, sample, session);
610 case PERF_RECORD_FORK: 716 case PERF_RECORD_FORK:
611 return ops->fork(event, self); 717 return ops->fork(event, sample, session);
612 case PERF_RECORD_EXIT: 718 case PERF_RECORD_EXIT:
613 return ops->exit(event, self); 719 return ops->exit(event, sample, session);
614 case PERF_RECORD_LOST: 720 case PERF_RECORD_LOST:
615 return ops->lost(event, self); 721 return ops->lost(event, sample, session);
616 case PERF_RECORD_READ: 722 case PERF_RECORD_READ:
617 return ops->read(event, self); 723 return ops->read(event, sample, session);
618 case PERF_RECORD_THROTTLE: 724 case PERF_RECORD_THROTTLE:
619 return ops->throttle(event, self); 725 return ops->throttle(event, sample, session);
620 case PERF_RECORD_UNTHROTTLE: 726 case PERF_RECORD_UNTHROTTLE:
621 return ops->unthrottle(event, self); 727 return ops->unthrottle(event, sample, session);
728 default:
729 ++session->hists.stats.nr_unknown_events;
730 return -1;
731 }
732}
733
734static int perf_session__preprocess_sample(struct perf_session *session,
735 event_t *event, struct sample_data *sample)
736{
737 if (event->header.type != PERF_RECORD_SAMPLE ||
738 !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
739 return 0;
740
741 if (!ip_callchain__valid(sample->callchain, event)) {
742 pr_debug("call-chain problem with event, skipping it.\n");
743 ++session->hists.stats.nr_invalid_chains;
744 session->hists.stats.total_invalid_chains += sample->period;
745 return -EINVAL;
746 }
747 return 0;
748}
749
750static int perf_session__process_user_event(struct perf_session *session, event_t *event,
751 struct perf_event_ops *ops, u64 file_offset)
752{
753 dump_event(session, event, file_offset, NULL);
754
755 /* These events are processed right away */
756 switch (event->header.type) {
622 case PERF_RECORD_HEADER_ATTR: 757 case PERF_RECORD_HEADER_ATTR:
623 return ops->attr(event, self); 758 return ops->attr(event, session);
624 case PERF_RECORD_HEADER_EVENT_TYPE: 759 case PERF_RECORD_HEADER_EVENT_TYPE:
625 return ops->event_type(event, self); 760 return ops->event_type(event, session);
626 case PERF_RECORD_HEADER_TRACING_DATA: 761 case PERF_RECORD_HEADER_TRACING_DATA:
627 /* setup for reading amidst mmap */ 762 /* setup for reading amidst mmap */
628 lseek(self->fd, offset + head, SEEK_SET); 763 lseek(session->fd, file_offset, SEEK_SET);
629 return ops->tracing_data(event, self); 764 return ops->tracing_data(event, session);
630 case PERF_RECORD_HEADER_BUILD_ID: 765 case PERF_RECORD_HEADER_BUILD_ID:
631 return ops->build_id(event, self); 766 return ops->build_id(event, session);
632 case PERF_RECORD_FINISHED_ROUND: 767 case PERF_RECORD_FINISHED_ROUND:
633 return ops->finished_round(event, self, ops); 768 return ops->finished_round(event, session, ops);
634 default: 769 default:
635 ++self->hists.stats.nr_unknown_events; 770 return -EINVAL;
636 return -1;
637 } 771 }
638} 772}
639 773
774static int perf_session__process_event(struct perf_session *session,
775 event_t *event,
776 struct perf_event_ops *ops,
777 u64 file_offset)
778{
779 struct sample_data sample;
780 int ret;
781
782 if (session->header.needs_swap && event__swap_ops[event->header.type])
783 event__swap_ops[event->header.type](event);
784
785 if (event->header.type >= PERF_RECORD_HEADER_MAX)
786 return -EINVAL;
787
788 hists__inc_nr_events(&session->hists, event->header.type);
789
790 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
791 return perf_session__process_user_event(session, event, ops, file_offset);
792
793 /*
794 * For all kernel events we get the sample data
795 */
796 event__parse_sample(event, session, &sample);
797
798 /* Preprocess sample records - precheck callchains */
799 if (perf_session__preprocess_sample(session, event, &sample))
800 return 0;
801
802 if (ops->ordered_samples) {
803 ret = perf_session_queue_event(session, event, &sample,
804 file_offset);
805 if (ret != -ETIME)
806 return ret;
807 }
808
809 return perf_session_deliver_event(session, event, &sample, ops,
810 file_offset);
811}
812
640void perf_event_header__bswap(struct perf_event_header *self) 813void perf_event_header__bswap(struct perf_event_header *self)
641{ 814{
642 self->type = bswap_32(self->type); 815 self->type = bswap_32(self->type);
@@ -724,8 +897,7 @@ more:
724 } 897 }
725 898
726 if (size == 0 || 899 if (size == 0 ||
727 (skip = perf_session__process_event(self, &event, ops, 900 (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
728 0, head)) < 0) {
729 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 901 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
730 head, event.header.size, event.header.type); 902 head, event.header.size, event.header.type);
731 /* 903 /*
@@ -740,9 +912,6 @@ more:
740 912
741 head += size; 913 head += size;
742 914
743 dump_printf("\n%#Lx [%#x]: event: %d\n",
744 head, event.header.size, event.header.type);
745
746 if (skip > 0) 915 if (skip > 0)
747 head += skip; 916 head += skip;
748 917
@@ -751,82 +920,90 @@ more:
751done: 920done:
752 err = 0; 921 err = 0;
753out_err: 922out_err:
923 perf_session_free_sample_buffers(self);
754 return err; 924 return err;
755} 925}
756 926
757int __perf_session__process_events(struct perf_session *self, 927int __perf_session__process_events(struct perf_session *session,
758 u64 data_offset, u64 data_size, 928 u64 data_offset, u64 data_size,
759 u64 file_size, struct perf_event_ops *ops) 929 u64 file_size, struct perf_event_ops *ops)
760{ 930{
761 int err, mmap_prot, mmap_flags; 931 u64 head, page_offset, file_offset, file_pos, progress_next;
762 u64 head, shift; 932 int err, mmap_prot, mmap_flags, map_idx = 0;
763 u64 offset = 0; 933 struct ui_progress *progress;
764 size_t page_size; 934 size_t page_size, mmap_size;
935 char *buf, *mmaps[8];
765 event_t *event; 936 event_t *event;
766 uint32_t size; 937 uint32_t size;
767 char *buf;
768 struct ui_progress *progress = ui_progress__new("Processing events...",
769 self->size);
770 if (progress == NULL)
771 return -1;
772 938
773 perf_event_ops__fill_defaults(ops); 939 perf_event_ops__fill_defaults(ops);
774 940
775 page_size = sysconf(_SC_PAGESIZE); 941 page_size = sysconf(_SC_PAGESIZE);
776 942
777 head = data_offset; 943 page_offset = page_size * (data_offset / page_size);
778 shift = page_size * (head / page_size); 944 file_offset = page_offset;
779 offset += shift; 945 head = data_offset - page_offset;
780 head -= shift; 946
947 if (data_offset + data_size < file_size)
948 file_size = data_offset + data_size;
949
950 progress_next = file_size / 16;
951 progress = ui_progress__new("Processing events...", file_size);
952 if (progress == NULL)
953 return -1;
954
955 mmap_size = session->mmap_window;
956 if (mmap_size > file_size)
957 mmap_size = file_size;
958
959 memset(mmaps, 0, sizeof(mmaps));
781 960
782 mmap_prot = PROT_READ; 961 mmap_prot = PROT_READ;
783 mmap_flags = MAP_SHARED; 962 mmap_flags = MAP_SHARED;
784 963
785 if (self->header.needs_swap) { 964 if (session->header.needs_swap) {
786 mmap_prot |= PROT_WRITE; 965 mmap_prot |= PROT_WRITE;
787 mmap_flags = MAP_PRIVATE; 966 mmap_flags = MAP_PRIVATE;
788 } 967 }
789remap: 968remap:
790 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, 969 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
791 mmap_flags, self->fd, offset); 970 file_offset);
792 if (buf == MAP_FAILED) { 971 if (buf == MAP_FAILED) {
793 pr_err("failed to mmap file\n"); 972 pr_err("failed to mmap file\n");
794 err = -errno; 973 err = -errno;
795 goto out_err; 974 goto out_err;
796 } 975 }
976 mmaps[map_idx] = buf;
977 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
978 file_pos = file_offset + head;
797 979
798more: 980more:
799 event = (event_t *)(buf + head); 981 event = (event_t *)(buf + head);
800 ui_progress__update(progress, offset);
801 982
802 if (self->header.needs_swap) 983 if (session->header.needs_swap)
803 perf_event_header__bswap(&event->header); 984 perf_event_header__bswap(&event->header);
804 size = event->header.size; 985 size = event->header.size;
805 if (size == 0) 986 if (size == 0)
806 size = 8; 987 size = 8;
807 988
808 if (head + event->header.size >= page_size * self->mmap_window) { 989 if (head + event->header.size >= mmap_size) {
809 int munmap_ret; 990 if (mmaps[map_idx]) {
810 991 munmap(mmaps[map_idx], mmap_size);
811 shift = page_size * (head / page_size); 992 mmaps[map_idx] = NULL;
812 993 }
813 munmap_ret = munmap(buf, page_size * self->mmap_window);
814 assert(munmap_ret == 0);
815 994
816 offset += shift; 995 page_offset = page_size * (head / page_size);
817 head -= shift; 996 file_offset += page_offset;
997 head -= page_offset;
818 goto remap; 998 goto remap;
819 } 999 }
820 1000
821 size = event->header.size; 1001 size = event->header.size;
822 1002
823 dump_printf("\n%#Lx [%#x]: event: %d\n",
824 offset + head, event->header.size, event->header.type);
825
826 if (size == 0 || 1003 if (size == 0 ||
827 perf_session__process_event(self, event, ops, offset, head) < 0) { 1004 perf_session__process_event(session, event, ops, file_pos) < 0) {
828 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 1005 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
829 offset + head, event->header.size, 1006 file_offset + head, event->header.size,
830 event->header.type); 1007 event->header.type);
831 /* 1008 /*
832 * assume we lost track of the stream, check alignment, and 1009 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1016,49 @@ more:
839 } 1016 }
840 1017
841 head += size; 1018 head += size;
1019 file_pos += size;
842 1020
843 if (offset + head >= data_offset + data_size) 1021 if (file_pos >= progress_next) {
844 goto done; 1022 progress_next += file_size / 16;
1023 ui_progress__update(progress, file_pos);
1024 }
845 1025
846 if (offset + head < file_size) 1026 if (file_pos < file_size)
847 goto more; 1027 goto more;
848done: 1028
849 err = 0; 1029 err = 0;
850 /* do the final flush for ordered samples */ 1030 /* do the final flush for ordered samples */
851 self->ordered_samples.next_flush = ULLONG_MAX; 1031 session->ordered_samples.next_flush = ULLONG_MAX;
852 flush_sample_queue(self, ops); 1032 flush_sample_queue(session, ops);
853out_err: 1033out_err:
854 ui_progress__delete(progress); 1034 ui_progress__delete(progress);
1035
1036 if (ops->lost == event__process_lost &&
1037 session->hists.stats.total_lost != 0) {
1038 ui__warning("Processed %Lu events and LOST %Lu!\n\n"
1039 "Check IO/CPU overload!\n\n",
1040 session->hists.stats.total_period,
1041 session->hists.stats.total_lost);
1042 }
1043
1044 if (session->hists.stats.nr_unknown_events != 0) {
1045 ui__warning("Found %u unknown events!\n\n"
1046 "Is this an older tool processing a perf.data "
1047 "file generated by a more recent tool?\n\n"
1048 "If that is not the case, consider "
1049 "reporting to linux-kernel@vger.kernel.org.\n\n",
1050 session->hists.stats.nr_unknown_events);
1051 }
1052
1053 if (session->hists.stats.nr_invalid_chains != 0) {
1054 ui__warning("Found invalid callchains!\n\n"
1055 "%u out of %u events were discarded for this reason.\n\n"
1056 "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1057 session->hists.stats.nr_invalid_chains,
1058 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
1059 }
1060
1061 perf_session_free_sample_buffers(session);
855 return err; 1062 return err;
856} 1063}
857 1064
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2a863f..ac36f99f14af 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,12 @@ struct ordered_samples {
17 u64 last_flush; 17 u64 last_flush;
18 u64 next_flush; 18 u64 next_flush;
19 u64 max_timestamp; 19 u64 max_timestamp;
20 struct list_head samples_head; 20 struct list_head samples;
21 struct sample_queue *last_inserted; 21 struct list_head sample_cache;
22 struct list_head to_free;
23 struct sample_queue *sample_buffer;
24 struct sample_queue *last_sample;
25 int sample_buffer_idx;
22}; 26};
23 27
24struct perf_session { 28struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
42 int fd; 46 int fd;
43 bool fd_pipe; 47 bool fd_pipe;
44 bool repipe; 48 bool repipe;
49 bool sample_id_all;
50 u16 id_hdr_size;
45 int cwdlen; 51 int cwdlen;
46 char *cwd; 52 char *cwd;
47 struct ordered_samples ordered_samples; 53 struct ordered_samples ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
50 56
51struct perf_event_ops; 57struct perf_event_ops;
52 58
53typedef int (*event_op)(event_t *self, struct perf_session *session); 59typedef int (*event_op)(event_t *self, struct sample_data *sample,
60 struct perf_session *session);
61typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
54typedef int (*event_op2)(event_t *self, struct perf_session *session, 62typedef int (*event_op2)(event_t *self, struct perf_session *session,
55 struct perf_event_ops *ops); 63 struct perf_event_ops *ops);
56 64
@@ -63,8 +71,8 @@ struct perf_event_ops {
63 lost, 71 lost,
64 read, 72 read,
65 throttle, 73 throttle,
66 unthrottle, 74 unthrottle;
67 attr, 75 event_synth_op attr,
68 event_type, 76 event_type,
69 tracing_data, 77 tracing_data,
70 build_id; 78 build_id;
@@ -100,6 +108,8 @@ int perf_session__create_kernel_maps(struct perf_session *self);
100 108
101int do_read(int fd, void *buf, size_t size); 109int do_read(int fd, void *buf, size_t size);
102void perf_session__update_sample_type(struct perf_session *self); 110void perf_session__update_sample_type(struct perf_session *self);
111void perf_session__set_sample_id_all(struct perf_session *session, bool value);
112void perf_session__set_sample_type(struct perf_session *session, u64 type);
103void perf_session__remove_thread(struct perf_session *self, struct thread *th); 113void perf_session__remove_thread(struct perf_session *self, struct thread *th);
104 114
105static inline 115static inline
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553cc67d..f44fa541d56e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
170 return repsep_snprintf(bf, size, "%-*s", width, dso_name); 170 return repsep_snprintf(bf, size, "%-*s", width, dso_name);
171 } 171 }
172 172
173 return repsep_snprintf(bf, size, "%*Lx", width, self->ip); 173 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
174} 174}
175 175
176/* --sort symbol */ 176/* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
196 196
197 if (verbose) { 197 if (verbose) {
198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; 198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
199 ret += repsep_snprintf(bf, size, "%*Lx %c ", 199 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
200 BITS_PER_LONG / 4, self->ip, o); 200 BITS_PER_LONG / 4, self->ip, o);
201 } 201 }
202 202
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
205 ret += repsep_snprintf(bf + ret, size - ret, "%s", 205 ret += repsep_snprintf(bf + ret, size - ret, "%s",
206 self->ms.sym->name); 206 self->ms.sym->name);
207 else 207 else
208 ret += repsep_snprintf(bf + ret, size - ret, "%*Lx", 208 ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
209 BITS_PER_LONG / 4, self->ip); 209 BITS_PER_LONG / 4, self->ip);
210 210
211 return ret; 211 return ret;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d628c8d1cf5e..ceefa6568def 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -121,7 +121,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
121 * We still haven't the actual symbols, so guess the 121 * We still haven't the actual symbols, so guess the
122 * last map final address. 122 * last map final address.
123 */ 123 */
124 curr->end = ~0UL; 124 curr->end = ~0ULL;
125} 125}
126 126
127static void map_groups__fixup_end(struct map_groups *self) 127static void map_groups__fixup_end(struct map_groups *self)
@@ -1836,8 +1836,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1836 const char *kallsyms_filename = NULL; 1836 const char *kallsyms_filename = NULL;
1837 char *kallsyms_allocated_filename = NULL; 1837 char *kallsyms_allocated_filename = NULL;
1838 /* 1838 /*
1839 * Step 1: if the user specified a vmlinux filename, use it and only 1839 * Step 1: if the user specified a kallsyms or vmlinux filename, use
1840 * it, reporting errors to the user if it cannot be used. 1840 * it and only it, reporting errors to the user if it cannot be used.
1841 * 1841 *
1842 * For instance, try to analyse an ARM perf.data file _without_ a 1842 * For instance, try to analyse an ARM perf.data file _without_ a
1843 * build-id, or if the user specifies the wrong path to the right 1843 * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1850,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1850 * validation in dso__load_vmlinux and will bail out if they don't 1850 * validation in dso__load_vmlinux and will bail out if they don't
1851 * match. 1851 * match.
1852 */ 1852 */
1853 if (symbol_conf.kallsyms_name != NULL) {
1854 kallsyms_filename = symbol_conf.kallsyms_name;
1855 goto do_kallsyms;
1856 }
1857
1853 if (symbol_conf.vmlinux_name != NULL) { 1858 if (symbol_conf.vmlinux_name != NULL) {
1854 err = dso__load_vmlinux(self, map, 1859 err = dso__load_vmlinux(self, map,
1855 symbol_conf.vmlinux_name, filter); 1860 symbol_conf.vmlinux_name, filter);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 038f2201ee09..12defbe18c13 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -72,6 +72,7 @@ struct symbol_conf {
72 show_cpu_utilization, 72 show_cpu_utilization,
73 initialized; 73 initialized;
74 const char *vmlinux_name, 74 const char *vmlinux_name,
75 *kallsyms_name,
75 *source_prefix, 76 *source_prefix,
76 *field_sep; 77 *field_sep;
77 const char *default_guest_vmlinux_name, 78 const char *default_guest_vmlinux_name,
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 056c69521a38..7b5a8926624e 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -104,10 +104,24 @@ out_destroy_form:
104 return rc; 104 return rc;
105} 105}
106 106
107static const char yes[] = "Yes", no[] = "No"; 107static const char yes[] = "Yes", no[] = "No",
108 warning_str[] = "Warning!", ok[] = "Ok";
108 109
109bool ui__dialog_yesno(const char *msg) 110bool ui__dialog_yesno(const char *msg)
110{ 111{
111 /* newtWinChoice should really be accepting const char pointers... */ 112 /* newtWinChoice should really be accepting const char pointers... */
112 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; 113 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
113} 114}
115
116void ui__warning(const char *format, ...)
117{
118 va_list args;
119
120 va_start(args, format);
121 if (use_browser > 0)
122 newtWinMessagev((char *)warning_str, (char *)ok,
123 (char *)format, args);
124 else
125 vfprintf(stderr, format, args);
126 va_end(args);
127}