diff options
138 files changed, 3121 insertions, 2699 deletions
@@ -2365,8 +2365,6 @@ E: acme@redhat.com | |||
2365 | W: http://oops.ghostprotocols.net:81/blog/ | 2365 | W: http://oops.ghostprotocols.net:81/blog/ |
2366 | P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 | 2366 | P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 |
2367 | D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks | 2367 | D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks |
2368 | S: R. Brasílio Itiberê, 4270/1010 - Água Verde | ||
2369 | S: 80240-060 - Curitiba - Paraná | ||
2370 | S: Brazil | 2368 | S: Brazil |
2371 | 2369 | ||
2372 | N: Karsten Merker | 2370 | N: Karsten Merker |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8b61c9360999..316c723a950c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1579 | 1579 | ||
1580 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels | 1580 | nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels |
1581 | Format: [panic,][num] | 1581 | Format: [panic,][num] |
1582 | Valid num: 0,1,2 | 1582 | Valid num: 0 |
1583 | 0 - turn nmi_watchdog off | 1583 | 0 - turn nmi_watchdog off |
1584 | 1 - use the IO-APIC timer for the NMI watchdog | ||
1585 | 2 - use the local APIC for the NMI watchdog using | ||
1586 | a performance counter. Note: This will use one | ||
1587 | performance counter and the local APIC's performance | ||
1588 | vector. | ||
1589 | When panic is specified, panic when an NMI watchdog | 1584 | When panic is specified, panic when an NMI watchdog |
1590 | timeout occurs. | 1585 | timeout occurs. |
1591 | This is useful when you use a panic=... timeout and | 1586 | This is useful when you use a panic=... timeout and |
1592 | need the box quickly up again. | 1587 | need the box quickly up again. |
1593 | Instead of 1 and 2 it is possible to use the following | ||
1594 | symbolic names: lapic and ioapic | ||
1595 | Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic | ||
1596 | 1588 | ||
1597 | netpoll.carrier_timeout= | 1589 | netpoll.carrier_timeout= |
1598 | [NET] Specifies amount of time (in seconds) that | 1590 | [NET] Specifies amount of time (in seconds) that |
diff --git a/MAINTAINERS b/MAINTAINERS index 6a588873cf8d..f1f803c6674a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4612,7 +4612,7 @@ PERFORMANCE EVENTS SUBSYSTEM | |||
4612 | M: Peter Zijlstra <a.p.zijlstra@chello.nl> | 4612 | M: Peter Zijlstra <a.p.zijlstra@chello.nl> |
4613 | M: Paul Mackerras <paulus@samba.org> | 4613 | M: Paul Mackerras <paulus@samba.org> |
4614 | M: Ingo Molnar <mingo@elte.hu> | 4614 | M: Ingo Molnar <mingo@elte.hu> |
4615 | M: Arnaldo Carvalho de Melo <acme@redhat.com> | 4615 | M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> |
4616 | S: Supported | 4616 | S: Supported |
4617 | F: kernel/perf_event*.c | 4617 | F: kernel/perf_event*.c |
4618 | F: include/linux/perf_event.h | 4618 | F: include/linux/perf_event.h |
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index fe792ca818f6..5996e7a6757e 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h | |||
@@ -1,10 +1,4 @@ | |||
1 | #ifndef __ASM_ALPHA_PERF_EVENT_H | 1 | #ifndef __ASM_ALPHA_PERF_EVENT_H |
2 | #define __ASM_ALPHA_PERF_EVENT_H | 2 | #define __ASM_ALPHA_PERF_EVENT_H |
3 | 3 | ||
4 | #ifdef CONFIG_PERF_EVENTS | ||
5 | extern void init_hw_perf_events(void); | ||
6 | #else | ||
7 | static inline void init_hw_perf_events(void) { } | ||
8 | #endif | ||
9 | |||
10 | #endif /* __ASM_ALPHA_PERF_EVENT_H */ | 4 | #endif /* __ASM_ALPHA_PERF_EVENT_H */ |
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 5f77afb88e89..4c8bb374eb0a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c | |||
@@ -112,8 +112,6 @@ init_IRQ(void) | |||
112 | wrent(entInt, 0); | 112 | wrent(entInt, 0); |
113 | 113 | ||
114 | alpha_mv.init_irq(); | 114 | alpha_mv.init_irq(); |
115 | |||
116 | init_hw_perf_events(); | ||
117 | } | 115 | } |
118 | 116 | ||
119 | /* | 117 | /* |
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 1cc49683fb69..90561c45e7d8 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/kdebug.h> | 15 | #include <linux/kdebug.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/init.h> | ||
17 | 18 | ||
18 | #include <asm/hwrpb.h> | 19 | #include <asm/hwrpb.h> |
19 | #include <asm/atomic.h> | 20 | #include <asm/atomic.h> |
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, | |||
863 | /* | 864 | /* |
864 | * Init call to initialise performance events at kernel startup. | 865 | * Init call to initialise performance events at kernel startup. |
865 | */ | 866 | */ |
866 | void __init init_hw_perf_events(void) | 867 | int __init init_hw_perf_events(void) |
867 | { | 868 | { |
868 | pr_info("Performance events: "); | 869 | pr_info("Performance events: "); |
869 | 870 | ||
870 | if (!supported_cpu()) { | 871 | if (!supported_cpu()) { |
871 | pr_cont("No support for your CPU.\n"); | 872 | pr_cont("No support for your CPU.\n"); |
872 | return; | 873 | return 0; |
873 | } | 874 | } |
874 | 875 | ||
875 | pr_cont("Supported CPU type!\n"); | 876 | pr_cont("Supported CPU type!\n"); |
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void) | |||
881 | /* And set up PMU specification */ | 882 | /* And set up PMU specification */ |
882 | alpha_pmu = &ev67_pmu; | 883 | alpha_pmu = &ev67_pmu; |
883 | 884 | ||
884 | perf_pmu_register(&pmu); | 885 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
885 | } | ||
886 | 886 | ||
887 | return 0; | ||
888 | } | ||
889 | early_initcall(init_hw_perf_events); | ||
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..fdfa4976b0bf 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void) | |||
3034 | pr_info("no hardware support available\n"); | 3034 | pr_info("no hardware support available\n"); |
3035 | } | 3035 | } |
3036 | 3036 | ||
3037 | perf_pmu_register(&pmu); | 3037 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
3038 | 3038 | ||
3039 | return 0; | 3039 | return 0; |
3040 | } | 3040 | } |
3041 | arch_initcall(init_hw_perf_events); | 3041 | early_initcall(init_hw_perf_events); |
3042 | 3042 | ||
3043 | /* | 3043 | /* |
3044 | * Callchain handling code. | 3044 | * Callchain handling code. |
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 5c7c6fc07565..183e0d226669 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void) | |||
1047 | 1047 | ||
1048 | return 0; | 1048 | return 0; |
1049 | } | 1049 | } |
1050 | arch_initcall(init_hw_perf_events); | 1050 | early_initcall(init_hw_perf_events); |
1051 | 1051 | ||
1052 | #endif /* defined(CONFIG_CPU_MIPS32)... */ | 1052 | #endif /* defined(CONFIG_CPU_MIPS32)... */ |
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d8943..b150b510510f 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c | |||
@@ -126,4 +126,4 @@ static int init_e500_pmu(void) | |||
126 | return register_fsl_emb_pmu(&e500_pmu); | 126 | return register_fsl_emb_pmu(&e500_pmu); |
127 | } | 127 | } |
128 | 128 | ||
129 | arch_initcall(init_e500_pmu); | 129 | early_initcall(init_e500_pmu); |
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f317..2cc5e0301d0b 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c | |||
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) | |||
414 | return register_power_pmu(&mpc7450_pmu); | 414 | return register_power_pmu(&mpc7450_pmu); |
415 | } | 415 | } |
416 | 416 | ||
417 | arch_initcall(init_mpc7450_pmu); | 417 | early_initcall(init_mpc7450_pmu); |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 3129c855933c..567480705789 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) | |||
1379 | freeze_events_kernel = MMCR0_FCHV; | 1379 | freeze_events_kernel = MMCR0_FCHV; |
1380 | #endif /* CONFIG_PPC64 */ | 1380 | #endif /* CONFIG_PPC64 */ |
1381 | 1381 | ||
1382 | perf_pmu_register(&power_pmu); | 1382 | perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); |
1383 | perf_cpu_notifier(power_pmu_notifier); | 1383 | perf_cpu_notifier(power_pmu_notifier); |
1384 | 1384 | ||
1385 | return 0; | 1385 | return 0; |
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 7ecca59ddf77..4dcf5f831e9d 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c | |||
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) | |||
681 | pr_info("%s performance monitor hardware support registered\n", | 681 | pr_info("%s performance monitor hardware support registered\n", |
682 | pmu->name); | 682 | pmu->name); |
683 | 683 | ||
684 | perf_pmu_register(&fsl_emb_pmu); | 684 | perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); |
685 | 685 | ||
686 | return 0; | 686 | return 0; |
687 | } | 687 | } |
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda635..ead8b3c2649e 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c | |||
@@ -613,4 +613,4 @@ static int init_power4_pmu(void) | |||
613 | return register_power_pmu(&power4_pmu); | 613 | return register_power_pmu(&power4_pmu); |
614 | } | 614 | } |
615 | 615 | ||
616 | arch_initcall(init_power4_pmu); | 616 | early_initcall(init_power4_pmu); |
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d411..eca0ac595cb6 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void) | |||
682 | return register_power_pmu(&power5p_pmu); | 682 | return register_power_pmu(&power5p_pmu); |
683 | } | 683 | } |
684 | 684 | ||
685 | arch_initcall(init_power5p_pmu); | 685 | early_initcall(init_power5p_pmu); |
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9dd..d5ff0f64a5e6 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c | |||
@@ -621,4 +621,4 @@ static int init_power5_pmu(void) | |||
621 | return register_power_pmu(&power5_pmu); | 621 | return register_power_pmu(&power5_pmu); |
622 | } | 622 | } |
623 | 623 | ||
624 | arch_initcall(init_power5_pmu); | 624 | early_initcall(init_power5_pmu); |
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8fb..31603927e376 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c | |||
@@ -544,4 +544,4 @@ static int init_power6_pmu(void) | |||
544 | return register_power_pmu(&power6_pmu); | 544 | return register_power_pmu(&power6_pmu); |
545 | } | 545 | } |
546 | 546 | ||
547 | arch_initcall(init_power6_pmu); | 547 | early_initcall(init_power6_pmu); |
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b40..593740fcb799 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c | |||
@@ -369,4 +369,4 @@ static int init_power7_pmu(void) | |||
369 | return register_power_pmu(&power7_pmu); | 369 | return register_power_pmu(&power7_pmu); |
370 | } | 370 | } |
371 | 371 | ||
372 | arch_initcall(init_power7_pmu); | 372 | early_initcall(init_power7_pmu); |
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4df..9a6e093858fe 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) | |||
494 | return register_power_pmu(&ppc970_pmu); | 494 | return register_power_pmu(&ppc970_pmu); |
495 | } | 495 | } |
496 | 496 | ||
497 | arch_initcall(init_ppc970_pmu); | 497 | early_initcall(init_ppc970_pmu); |
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c index dbf3b4bb71fe..748955df018d 100644 --- a/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/arch/sh/kernel/cpu/sh4/perf_event.c | |||
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void) | |||
250 | 250 | ||
251 | return register_sh_pmu(&sh7750_pmu); | 251 | return register_sh_pmu(&sh7750_pmu); |
252 | } | 252 | } |
253 | arch_initcall(sh7750_pmu_init); | 253 | early_initcall(sh7750_pmu_init); |
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c index 580276525731..17e6bebfede0 100644 --- a/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c | |||
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void) | |||
284 | 284 | ||
285 | return register_sh_pmu(&sh4a_pmu); | 285 | return register_sh_pmu(&sh4a_pmu); |
286 | } | 286 | } |
287 | arch_initcall(sh4a_pmu_init); | 287 | early_initcall(sh4a_pmu_init); |
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 5a4b33435650..2ee21a47b5af 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) | |||
389 | 389 | ||
390 | WARN_ON(_pmu->num_events > MAX_HWEVENTS); | 390 | WARN_ON(_pmu->num_events > MAX_HWEVENTS); |
391 | 391 | ||
392 | perf_pmu_register(&pmu); | 392 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
393 | perf_cpu_notifier(sh_pmu_notifier); | 393 | perf_cpu_notifier(sh_pmu_notifier); |
394 | return 0; | 394 | return 0; |
395 | } | 395 | } |
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 6e8bfa1786da..4d3dbe3703e9 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h | |||
@@ -4,8 +4,6 @@ | |||
4 | #ifdef CONFIG_PERF_EVENTS | 4 | #ifdef CONFIG_PERF_EVENTS |
5 | #include <asm/ptrace.h> | 5 | #include <asm/ptrace.h> |
6 | 6 | ||
7 | extern void init_hw_perf_events(void); | ||
8 | |||
9 | #define perf_arch_fetch_caller_regs(regs, ip) \ | 7 | #define perf_arch_fetch_caller_regs(regs, ip) \ |
10 | do { \ | 8 | do { \ |
11 | unsigned long _pstate, _asi, _pil, _i7, _fp; \ | 9 | unsigned long _pstate, _asi, _pil, _i7, _fp; \ |
@@ -26,8 +24,6 @@ do { \ | |||
26 | (regs)->u_regs[UREG_I6] = _fp; \ | 24 | (regs)->u_regs[UREG_I6] = _fp; \ |
27 | (regs)->u_regs[UREG_I7] = _i7; \ | 25 | (regs)->u_regs[UREG_I7] = _i7; \ |
28 | } while (0) | 26 | } while (0) |
29 | #else | ||
30 | static inline void init_hw_perf_events(void) { } | ||
31 | #endif | 27 | #endif |
32 | 28 | ||
33 | #endif | 29 | #endif |
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a4bd7ba74c89..300f810142f5 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c | |||
@@ -270,8 +270,6 @@ int __init nmi_init(void) | |||
270 | atomic_set(&nmi_active, -1); | 270 | atomic_set(&nmi_active, -1); |
271 | } | 271 | } |
272 | } | 272 | } |
273 | if (!err) | ||
274 | init_hw_perf_events(); | ||
275 | 273 | ||
276 | return err; | 274 | return err; |
277 | } | 275 | } |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0d6deb55a2ae..760578687e7c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void) | |||
1307 | return false; | 1307 | return false; |
1308 | } | 1308 | } |
1309 | 1309 | ||
1310 | void __init init_hw_perf_events(void) | 1310 | int __init init_hw_perf_events(void) |
1311 | { | 1311 | { |
1312 | pr_info("Performance events: "); | 1312 | pr_info("Performance events: "); |
1313 | 1313 | ||
1314 | if (!supported_pmu()) { | 1314 | if (!supported_pmu()) { |
1315 | pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); | 1315 | pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); |
1316 | return; | 1316 | return 0; |
1317 | } | 1317 | } |
1318 | 1318 | ||
1319 | pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); | 1319 | pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); |
1320 | 1320 | ||
1321 | perf_pmu_register(&pmu); | 1321 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
1322 | register_die_notifier(&perf_event_nmi_notifier); | 1322 | register_die_notifier(&perf_event_nmi_notifier); |
1323 | |||
1324 | return 0; | ||
1323 | } | 1325 | } |
1326 | early_initcall(init_hw_perf_events); | ||
1324 | 1327 | ||
1325 | void perf_callchain_kernel(struct perf_callchain_entry *entry, | 1328 | void perf_callchain_kernel(struct perf_callchain_entry *entry, |
1326 | struct pt_regs *regs) | 1329 | struct pt_regs *regs) |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 76561d20ea2f..4a2adaa9aefc 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
180 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 180 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
181 | * inconsistent instruction while you patch. | 181 | * inconsistent instruction while you patch. |
182 | */ | 182 | */ |
183 | struct text_poke_param { | ||
184 | void *addr; | ||
185 | const void *opcode; | ||
186 | size_t len; | ||
187 | }; | ||
188 | |||
183 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 189 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
184 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 190 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
191 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | ||
185 | 192 | ||
186 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 193 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
187 | #define IDEAL_NOP_SIZE_5 5 | 194 | #define IDEAL_NOP_SIZE_5 5 |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581b..f23eb2528464 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); | |||
28 | extern int __must_check __die(const char *, struct pt_regs *, long); | 28 | extern int __must_check __die(const char *, struct pt_regs *, long); |
29 | extern void show_registers(struct pt_regs *regs); | 29 | extern void show_registers(struct pt_regs *regs); |
30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | 30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, |
31 | unsigned long *sp, unsigned long bp); | 31 | unsigned long *sp); |
32 | extern void __show_regs(struct pt_regs *regs, int all); | 32 | extern void __show_regs(struct pt_regs *regs, int all); |
33 | extern void show_regs(struct pt_regs *regs); | 33 | extern void show_regs(struct pt_regs *regs); |
34 | extern unsigned long oops_begin(void); | 34 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b7..3545838cddeb 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -7,39 +7,13 @@ | |||
7 | 7 | ||
8 | #ifdef ARCH_HAS_NMI_WATCHDOG | 8 | #ifdef ARCH_HAS_NMI_WATCHDOG |
9 | 9 | ||
10 | /** | ||
11 | * do_nmi_callback | ||
12 | * | ||
13 | * Check to see if a callback exists and execute it. Return 1 | ||
14 | * if the handler exists and was handled successfully. | ||
15 | */ | ||
16 | int do_nmi_callback(struct pt_regs *regs, int cpu); | ||
17 | |||
18 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | 10 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); |
19 | extern int check_nmi_watchdog(void); | ||
20 | #if !defined(CONFIG_LOCKUP_DETECTOR) | ||
21 | extern int nmi_watchdog_enabled; | ||
22 | #endif | ||
23 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 11 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
24 | extern int reserve_perfctr_nmi(unsigned int); | 12 | extern int reserve_perfctr_nmi(unsigned int); |
25 | extern void release_perfctr_nmi(unsigned int); | 13 | extern void release_perfctr_nmi(unsigned int); |
26 | extern int reserve_evntsel_nmi(unsigned int); | 14 | extern int reserve_evntsel_nmi(unsigned int); |
27 | extern void release_evntsel_nmi(unsigned int); | 15 | extern void release_evntsel_nmi(unsigned int); |
28 | 16 | ||
29 | extern void setup_apic_nmi_watchdog(void *); | ||
30 | extern void stop_apic_nmi_watchdog(void *); | ||
31 | extern void disable_timer_nmi_watchdog(void); | ||
32 | extern void enable_timer_nmi_watchdog(void); | ||
33 | extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); | ||
34 | extern void cpu_nmi_set_wd_enabled(void); | ||
35 | |||
36 | extern atomic_t nmi_active; | ||
37 | extern unsigned int nmi_watchdog; | ||
38 | #define NMI_NONE 0 | ||
39 | #define NMI_IO_APIC 1 | ||
40 | #define NMI_LOCAL_APIC 2 | ||
41 | #define NMI_INVALID 3 | ||
42 | |||
43 | struct ctl_table; | 17 | struct ctl_table; |
44 | extern int proc_nmi_enabled(struct ctl_table *, int , | 18 | extern int proc_nmi_enabled(struct ctl_table *, int , |
45 | void __user *, size_t *, loff_t *); | 19 | void __user *, size_t *, loff_t *); |
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic; | |||
47 | 21 | ||
48 | void arch_trigger_all_cpu_backtrace(void); | 22 | void arch_trigger_all_cpu_backtrace(void); |
49 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 23 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
50 | |||
51 | static inline void localise_nmi_watchdog(void) | ||
52 | { | ||
53 | if (nmi_watchdog == NMI_IO_APIC) | ||
54 | nmi_watchdog = NMI_LOCAL_APIC; | ||
55 | } | ||
56 | |||
57 | /* check if nmi_watchdog is active (ie was specified at boot) */ | ||
58 | static inline int nmi_watchdog_active(void) | ||
59 | { | ||
60 | /* | ||
61 | * actually it should be: | ||
62 | * return (nmi_watchdog == NMI_LOCAL_APIC || | ||
63 | * nmi_watchdog == NMI_IO_APIC) | ||
64 | * but since they are power of two we could use a | ||
65 | * cheaper way --cvg | ||
66 | */ | ||
67 | return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); | ||
68 | } | ||
69 | #endif | 24 | #endif |
70 | 25 | ||
71 | void lapic_watchdog_stop(void); | ||
72 | int lapic_watchdog_init(unsigned nmi_hz); | ||
73 | int lapic_wd_event(unsigned nmi_hz); | ||
74 | unsigned lapic_adjust_nmi_hz(unsigned hz); | ||
75 | void disable_lapic_nmi_watchdog(void); | ||
76 | void enable_lapic_nmi_watchdog(void); | ||
77 | void stop_nmi(void); | 26 | void stop_nmi(void); |
78 | void restart_nmi(void); | 27 | void restart_nmi(void); |
79 | 28 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb3..d9d4dae305f6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -125,7 +125,6 @@ union cpuid10_edx { | |||
125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ | 125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ |
126 | 126 | ||
127 | #ifdef CONFIG_PERF_EVENTS | 127 | #ifdef CONFIG_PERF_EVENTS |
128 | extern void init_hw_perf_events(void); | ||
129 | extern void perf_events_lapic_init(void); | 128 | extern void perf_events_lapic_init(void); |
130 | 129 | ||
131 | #define PERF_EVENT_INDEX_OFFSET 0 | 130 | #define PERF_EVENT_INDEX_OFFSET 0 |
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); | |||
156 | } | 155 | } |
157 | 156 | ||
158 | #else | 157 | #else |
159 | static inline void init_hw_perf_events(void) { } | ||
160 | static inline void perf_events_lapic_init(void) { } | 158 | static inline void perf_events_lapic_init(void) { } |
161 | #endif | 159 | #endif |
162 | 160 | ||
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index a70cd216be5d..295e2ff18a6a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS { | |||
744 | }; | 744 | }; |
745 | 745 | ||
746 | /* | 746 | /* |
747 | * P4 PEBS specifics (Replay Event only) | ||
748 | * | ||
749 | * Format (bits): | ||
750 | * 0-6: metric from P4_PEBS_METRIC enum | ||
751 | * 7 : reserved | ||
752 | * 8 : reserved | ||
753 | * 9-11 : reserved | ||
754 | * | ||
755 | * Note we have UOP and PEBS bits reserved for now | 747 | * Note we have UOP and PEBS bits reserved for now |
756 | * just in case if we will need them once | 748 | * just in case if we will need them once |
757 | */ | 749 | */ |
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC { | |||
788 | P4_PEBS_METRIC__max | 780 | P4_PEBS_METRIC__max |
789 | }; | 781 | }; |
790 | 782 | ||
783 | /* | ||
784 | * Notes on internal configuration of ESCR+CCCR tuples | ||
785 | * | ||
786 | * Since P4 has quite the different architecture of | ||
787 | * performance registers in compare with "architectural" | ||
788 | * once and we have on 64 bits to keep configuration | ||
789 | * of performance event, the following trick is used. | ||
790 | * | ||
791 | * 1) Since both ESCR and CCCR registers have only low | ||
792 | * 32 bits valuable, we pack them into a single 64 bit | ||
793 | * configuration. Low 32 bits of such config correspond | ||
794 | * to low 32 bits of CCCR register and high 32 bits | ||
795 | * correspond to low 32 bits of ESCR register. | ||
796 | * | ||
797 | * 2) The meaning of every bit of such config field can | ||
798 | * be found in Intel SDM but it should be noted that | ||
799 | * we "borrow" some reserved bits for own usage and | ||
800 | * clean them or set to a proper value when we do | ||
801 | * a real write to hardware registers. | ||
802 | * | ||
803 | * 3) The format of bits of config is the following | ||
804 | * and should be either 0 or set to some predefined | ||
805 | * values: | ||
806 | * | ||
807 | * Low 32 bits | ||
808 | * ----------- | ||
809 | * 0-6: P4_PEBS_METRIC enum | ||
810 | * 7-11: reserved | ||
811 | * 12: reserved (Enable) | ||
812 | * 13-15: reserved (ESCR select) | ||
813 | * 16-17: Active Thread | ||
814 | * 18: Compare | ||
815 | * 19: Complement | ||
816 | * 20-23: Threshold | ||
817 | * 24: Edge | ||
818 | * 25: reserved (FORCE_OVF) | ||
819 | * 26: reserved (OVF_PMI_T0) | ||
820 | * 27: reserved (OVF_PMI_T1) | ||
821 | * 28-29: reserved | ||
822 | * 30: reserved (Cascade) | ||
823 | * 31: reserved (OVF) | ||
824 | * | ||
825 | * High 32 bits | ||
826 | * ------------ | ||
827 | * 0: reserved (T1_USR) | ||
828 | * 1: reserved (T1_OS) | ||
829 | * 2: reserved (T0_USR) | ||
830 | * 3: reserved (T0_OS) | ||
831 | * 4: Tag Enable | ||
832 | * 5-8: Tag Value | ||
833 | * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) | ||
834 | * 25-30: enum P4_EVENTS | ||
835 | * 31: reserved (HT thread) | ||
836 | */ | ||
837 | |||
791 | #endif /* PERF_EVENT_P4_H */ | 838 | #endif /* PERF_EVENT_P4_H */ |
792 | 839 | ||
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def60114906..6c22bf353f26 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h | |||
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) | |||
48 | setup_IO_APIC(); | 48 | setup_IO_APIC(); |
49 | else { | 49 | else { |
50 | nr_ioapics = 0; | 50 | nr_ioapics = 0; |
51 | localise_nmi_watchdog(); | ||
52 | } | 51 | } |
53 | #endif | 52 | #endif |
54 | } | 53 | } |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23dc..52b5c7ed3608 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #define _ASM_X86_STACKTRACE_H | 7 | #define _ASM_X86_STACKTRACE_H |
8 | 8 | ||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <linux/ptrace.h> | ||
10 | 11 | ||
11 | extern int kstack_depth_to_print; | 12 | extern int kstack_depth_to_print; |
12 | 13 | ||
@@ -46,7 +47,7 @@ struct stacktrace_ops { | |||
46 | }; | 47 | }; |
47 | 48 | ||
48 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
49 | unsigned long *stack, unsigned long bp, | 50 | unsigned long *stack, |
50 | const struct stacktrace_ops *ops, void *data); | 51 | const struct stacktrace_ops *ops, void *data); |
51 | 52 | ||
52 | #ifdef CONFIG_X86_32 | 53 | #ifdef CONFIG_X86_32 |
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
57 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 58 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
58 | #endif | 59 | #endif |
59 | 60 | ||
61 | #ifdef CONFIG_FRAME_POINTER | ||
62 | static inline unsigned long | ||
63 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
64 | { | ||
65 | unsigned long bp; | ||
66 | |||
67 | if (regs) | ||
68 | return regs->bp; | ||
69 | |||
70 | if (task == current) { | ||
71 | /* Grab bp right from our regs */ | ||
72 | get_bp(bp); | ||
73 | return bp; | ||
74 | } | ||
75 | |||
76 | /* bp is the last reg pushed by switch_to */ | ||
77 | return *(unsigned long *)task->thread.sp; | ||
78 | } | ||
79 | #else | ||
80 | static inline unsigned long | ||
81 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | #endif | ||
86 | |||
60 | extern void | 87 | extern void |
61 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
62 | unsigned long *stack, unsigned long bp, char *log_lvl); | 89 | unsigned long *stack, char *log_lvl); |
63 | 90 | ||
64 | extern void | 91 | extern void |
65 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
66 | unsigned long *sp, unsigned long bp, char *log_lvl); | 93 | unsigned long *sp, char *log_lvl); |
67 | 94 | ||
68 | extern unsigned int code_bytes; | 95 | extern unsigned int code_bytes; |
69 | 96 | ||
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f5..fa7b9176b76c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -10,12 +10,6 @@ | |||
10 | unsigned long long native_sched_clock(void); | 10 | unsigned long long native_sched_clock(void); |
11 | extern int recalibrate_cpu_khz(void); | 11 | extern int recalibrate_cpu_khz(void); |
12 | 12 | ||
13 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
14 | extern int timer_ack; | ||
15 | #else | ||
16 | # define timer_ack (0) | ||
17 | #endif | ||
18 | |||
19 | extern int no_timer_check; | 13 | extern int no_timer_check; |
20 | 14 | ||
21 | /* Accelerators for sched_clock() | 15 | /* Accelerators for sched_clock() |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5079f24c955a..553d0b0d639b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first; | |||
591 | static int wrote_text; | 591 | static int wrote_text; |
592 | 592 | ||
593 | struct text_poke_params { | 593 | struct text_poke_params { |
594 | void *addr; | 594 | struct text_poke_param *params; |
595 | const void *opcode; | 595 | int nparams; |
596 | size_t len; | ||
597 | }; | 596 | }; |
598 | 597 | ||
599 | static int __kprobes stop_machine_text_poke(void *data) | 598 | static int __kprobes stop_machine_text_poke(void *data) |
600 | { | 599 | { |
601 | struct text_poke_params *tpp = data; | 600 | struct text_poke_params *tpp = data; |
601 | struct text_poke_param *p; | ||
602 | int i; | ||
602 | 603 | ||
603 | if (atomic_dec_and_test(&stop_machine_first)) { | 604 | if (atomic_dec_and_test(&stop_machine_first)) { |
604 | text_poke(tpp->addr, tpp->opcode, tpp->len); | 605 | for (i = 0; i < tpp->nparams; i++) { |
606 | p = &tpp->params[i]; | ||
607 | text_poke(p->addr, p->opcode, p->len); | ||
608 | } | ||
605 | smp_wmb(); /* Make sure other cpus see that this has run */ | 609 | smp_wmb(); /* Make sure other cpus see that this has run */ |
606 | wrote_text = 1; | 610 | wrote_text = 1; |
607 | } else { | 611 | } else { |
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
610 | smp_mb(); /* Load wrote_text before following execution */ | 614 | smp_mb(); /* Load wrote_text before following execution */ |
611 | } | 615 | } |
612 | 616 | ||
613 | flush_icache_range((unsigned long)tpp->addr, | 617 | for (i = 0; i < tpp->nparams; i++) { |
614 | (unsigned long)tpp->addr + tpp->len); | 618 | p = &tpp->params[i]; |
619 | flush_icache_range((unsigned long)p->addr, | ||
620 | (unsigned long)p->addr + p->len); | ||
621 | } | ||
622 | |||
615 | return 0; | 623 | return 0; |
616 | } | 624 | } |
617 | 625 | ||
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
631 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | 639 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) |
632 | { | 640 | { |
633 | struct text_poke_params tpp; | 641 | struct text_poke_params tpp; |
642 | struct text_poke_param p; | ||
634 | 643 | ||
635 | tpp.addr = addr; | 644 | p.addr = addr; |
636 | tpp.opcode = opcode; | 645 | p.opcode = opcode; |
637 | tpp.len = len; | 646 | p.len = len; |
647 | tpp.params = &p; | ||
648 | tpp.nparams = 1; | ||
638 | atomic_set(&stop_machine_first, 1); | 649 | atomic_set(&stop_machine_first, 1); |
639 | wrote_text = 0; | 650 | wrote_text = 0; |
640 | /* Use __stop_machine() because the caller already got online_cpus. */ | 651 | /* Use __stop_machine() because the caller already got online_cpus. */ |
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
642 | return addr; | 653 | return addr; |
643 | } | 654 | } |
644 | 655 | ||
656 | /** | ||
657 | * text_poke_smp_batch - Update instructions on a live kernel on SMP | ||
658 | * @params: an array of text_poke parameters | ||
659 | * @n: the number of elements in params. | ||
660 | * | ||
661 | * Modify multi-byte instruction by using stop_machine() on SMP. Since the | ||
662 | * stop_machine() is heavy task, it is better to aggregate text_poke requests | ||
663 | * and do it once if possible. | ||
664 | * | ||
665 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
666 | */ | ||
667 | void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | ||
668 | { | ||
669 | struct text_poke_params tpp = {.params = params, .nparams = n}; | ||
670 | |||
671 | atomic_set(&stop_machine_first, 1); | ||
672 | wrote_text = 0; | ||
673 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
674 | } | ||
675 | |||
645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 676 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
646 | 677 | ||
647 | #ifdef CONFIG_X86_64 | 678 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c4..3966b564ea47 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -3,10 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o |
6 | ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) | 6 | obj-y += hw_nmi.o |
7 | obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o | ||
8 | endif | ||
9 | obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o | ||
10 | 7 | ||
11 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 8 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
12 | obj-$(CONFIG_SMP) += ipi.o | 9 | obj-$(CONFIG_SMP) += ipi.o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 78218135b48e..fb7657822aad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/cpu.h> | 32 | #include <linux/cpu.h> |
33 | #include <linux/dmi.h> | 33 | #include <linux/dmi.h> |
34 | #include <linux/nmi.h> | ||
35 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
36 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
37 | 36 | ||
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void) | |||
799 | * PIT/HPET going. Otherwise register lapic as a dummy | 798 | * PIT/HPET going. Otherwise register lapic as a dummy |
800 | * device. | 799 | * device. |
801 | */ | 800 | */ |
802 | if (nmi_watchdog != NMI_IO_APIC) | 801 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
803 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
804 | else | ||
805 | pr_warning("APIC timer registered as dummy," | ||
806 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
807 | 802 | ||
808 | /* Setup the lapic or request the broadcast */ | 803 | /* Setup the lapic or request the broadcast */ |
809 | setup_APIC_timer(); | 804 | setup_APIC_timer(); |
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void) | |||
1387 | } | 1382 | } |
1388 | #endif | 1383 | #endif |
1389 | 1384 | ||
1390 | setup_apic_nmi_watchdog(NULL); | ||
1391 | apic_pm_activate(); | 1385 | apic_pm_activate(); |
1392 | 1386 | ||
1393 | /* | 1387 | /* |
@@ -1758,17 +1752,10 @@ int __init APIC_init_uniprocessor(void) | |||
1758 | setup_IO_APIC(); | 1752 | setup_IO_APIC(); |
1759 | else { | 1753 | else { |
1760 | nr_ioapics = 0; | 1754 | nr_ioapics = 0; |
1761 | localise_nmi_watchdog(); | ||
1762 | } | 1755 | } |
1763 | #else | ||
1764 | localise_nmi_watchdog(); | ||
1765 | #endif | 1756 | #endif |
1766 | 1757 | ||
1767 | x86_init.timers.setup_percpu_clockev(); | 1758 | x86_init.timers.setup_percpu_clockev(); |
1768 | #ifdef CONFIG_X86_64 | ||
1769 | check_nmi_watchdog(); | ||
1770 | #endif | ||
1771 | |||
1772 | return 0; | 1759 | return 0; |
1773 | } | 1760 | } |
1774 | 1761 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 62f6e1e55b90..93da91df5b38 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -17,13 +17,24 @@ | |||
17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | 19 | ||
20 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
21 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
20 | u64 hw_nmi_get_sample_period(void) | 22 | u64 hw_nmi_get_sample_period(void) |
21 | { | 23 | { |
22 | return (u64)(cpu_khz) * 1000 * 60; | 24 | return (u64)(cpu_khz) * 1000 * 60; |
23 | } | 25 | } |
26 | #endif | ||
24 | 27 | ||
25 | #ifdef ARCH_HAS_NMI_WATCHDOG | 28 | #ifndef CONFIG_HARDLOCKUP_DETECTOR |
29 | void touch_nmi_watchdog(void) | ||
30 | { | ||
31 | touch_softlockup_watchdog(); | ||
32 | } | ||
33 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
34 | #endif | ||
35 | #endif | ||
26 | 36 | ||
37 | #ifdef arch_trigger_all_cpu_backtrace | ||
27 | /* For reliability, we're prepared to waste bits here. */ | 38 | /* For reliability, we're prepared to waste bits here. */ |
28 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | 39 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; |
29 | 40 | ||
@@ -91,18 +102,3 @@ static int __init register_trigger_all_cpu_backtrace(void) | |||
91 | } | 102 | } |
92 | early_initcall(register_trigger_all_cpu_backtrace); | 103 | early_initcall(register_trigger_all_cpu_backtrace); |
93 | #endif | 104 | #endif |
94 | |||
95 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
96 | #if defined(CONFIG_X86_LOCAL_APIC) | ||
97 | unsigned int nmi_watchdog = NMI_NONE; | ||
98 | EXPORT_SYMBOL(nmi_watchdog); | ||
99 | void acpi_nmi_enable(void) { return; } | ||
100 | void acpi_nmi_disable(void) { return; } | ||
101 | #endif | ||
102 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
103 | EXPORT_SYMBOL(nmi_active); | ||
104 | int unknown_nmi_panic; | ||
105 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
106 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
107 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
108 | int __init check_nmi_watchdog(void) { return 0; } | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fadcd743a74f..16c2db8750a2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -54,7 +54,6 @@ | |||
54 | #include <asm/dma.h> | 54 | #include <asm/dma.h> |
55 | #include <asm/timer.h> | 55 | #include <asm/timer.h> |
56 | #include <asm/i8259.h> | 56 | #include <asm/i8259.h> |
57 | #include <asm/nmi.h> | ||
58 | #include <asm/msidef.h> | 57 | #include <asm/msidef.h> |
59 | #include <asm/hypertransport.h> | 58 | #include <asm/hypertransport.h> |
60 | #include <asm/setup.h> | 59 | #include <asm/setup.h> |
@@ -2642,24 +2641,6 @@ static void lapic_register_intr(int irq) | |||
2642 | "edge"); | 2641 | "edge"); |
2643 | } | 2642 | } |
2644 | 2643 | ||
2645 | static void __init setup_nmi(void) | ||
2646 | { | ||
2647 | /* | ||
2648 | * Dirty trick to enable the NMI watchdog ... | ||
2649 | * We put the 8259A master into AEOI mode and | ||
2650 | * unmask on all local APICs LVT0 as NMI. | ||
2651 | * | ||
2652 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | ||
2653 | * is from Maciej W. Rozycki - so we do not have to EOI from | ||
2654 | * the NMI handler or the timer interrupt. | ||
2655 | */ | ||
2656 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | ||
2657 | |||
2658 | enable_NMI_through_LVT0(); | ||
2659 | |||
2660 | apic_printk(APIC_VERBOSE, " done.\n"); | ||
2661 | } | ||
2662 | |||
2663 | /* | 2644 | /* |
2664 | * This looks a bit hackish but it's about the only one way of sending | 2645 | * This looks a bit hackish but it's about the only one way of sending |
2665 | * a few INTA cycles to 8259As and any associated glue logic. ICR does | 2646 | * a few INTA cycles to 8259As and any associated glue logic. ICR does |
@@ -2765,15 +2746,6 @@ static inline void __init check_timer(void) | |||
2765 | */ | 2746 | */ |
2766 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2747 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2767 | legacy_pic->init(1); | 2748 | legacy_pic->init(1); |
2768 | #ifdef CONFIG_X86_32 | ||
2769 | { | ||
2770 | unsigned int ver; | ||
2771 | |||
2772 | ver = apic_read(APIC_LVR); | ||
2773 | ver = GET_APIC_VERSION(ver); | ||
2774 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2775 | } | ||
2776 | #endif | ||
2777 | 2749 | ||
2778 | pin1 = find_isa_irq_pin(0, mp_INT); | 2750 | pin1 = find_isa_irq_pin(0, mp_INT); |
2779 | apic1 = find_isa_irq_apic(0, mp_INT); | 2751 | apic1 = find_isa_irq_apic(0, mp_INT); |
@@ -2821,10 +2793,6 @@ static inline void __init check_timer(void) | |||
2821 | unmask_ioapic(cfg); | 2793 | unmask_ioapic(cfg); |
2822 | } | 2794 | } |
2823 | if (timer_irq_works()) { | 2795 | if (timer_irq_works()) { |
2824 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2825 | setup_nmi(); | ||
2826 | legacy_pic->unmask(0); | ||
2827 | } | ||
2828 | if (disable_timer_pin_1 > 0) | 2796 | if (disable_timer_pin_1 > 0) |
2829 | clear_IO_APIC_pin(0, pin1); | 2797 | clear_IO_APIC_pin(0, pin1); |
2830 | goto out; | 2798 | goto out; |
@@ -2850,11 +2818,6 @@ static inline void __init check_timer(void) | |||
2850 | if (timer_irq_works()) { | 2818 | if (timer_irq_works()) { |
2851 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2819 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2852 | timer_through_8259 = 1; | 2820 | timer_through_8259 = 1; |
2853 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2854 | legacy_pic->mask(0); | ||
2855 | setup_nmi(); | ||
2856 | legacy_pic->unmask(0); | ||
2857 | } | ||
2858 | goto out; | 2821 | goto out; |
2859 | } | 2822 | } |
2860 | /* | 2823 | /* |
@@ -2866,15 +2829,6 @@ static inline void __init check_timer(void) | |||
2866 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2829 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2867 | } | 2830 | } |
2868 | 2831 | ||
2869 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2870 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " | ||
2871 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2872 | nmi_watchdog = NMI_NONE; | ||
2873 | } | ||
2874 | #ifdef CONFIG_X86_32 | ||
2875 | timer_ack = 0; | ||
2876 | #endif | ||
2877 | |||
2878 | apic_printk(APIC_QUIET, KERN_INFO | 2832 | apic_printk(APIC_QUIET, KERN_INFO |
2879 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2833 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2880 | 2834 | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb742..000000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null | |||
@@ -1,567 +0,0 @@ | |||
1 | /* | ||
2 | * NMI watchdog support on APIC systems | ||
3 | * | ||
4 | * Started by Ingo Molnar <mingo@redhat.com> | ||
5 | * | ||
6 | * Fixes: | ||
7 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. | ||
8 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. | ||
9 | * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. | ||
10 | * Pavel Machek and | ||
11 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | ||
12 | */ | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <linux/nmi.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/percpu.h> | ||
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/cpumask.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/smp.h> | ||
30 | |||
31 | #include <asm/i8259.h> | ||
32 | #include <asm/io_apic.h> | ||
33 | #include <asm/proto.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <asm/mce.h> | ||
37 | |||
38 | #include <asm/mach_traps.h> | ||
39 | |||
40 | int unknown_nmi_panic; | ||
41 | int nmi_watchdog_enabled; | ||
42 | |||
43 | /* For reliability, we're prepared to waste bits here. */ | ||
44 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
45 | |||
46 | /* nmi_active: | ||
47 | * >0: the lapic NMI watchdog is active, but can be disabled | ||
48 | * <0: the lapic NMI watchdog has not been set up, and cannot | ||
49 | * be enabled | ||
50 | * 0: the lapic NMI watchdog is disabled, but can be enabled | ||
51 | */ | ||
52 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
53 | EXPORT_SYMBOL(nmi_active); | ||
54 | |||
55 | unsigned int nmi_watchdog = NMI_NONE; | ||
56 | EXPORT_SYMBOL(nmi_watchdog); | ||
57 | |||
58 | static int panic_on_timeout; | ||
59 | |||
60 | static unsigned int nmi_hz = HZ; | ||
61 | static DEFINE_PER_CPU(short, wd_enabled); | ||
62 | static int endflag __initdata; | ||
63 | |||
64 | static inline unsigned int get_nmi_count(int cpu) | ||
65 | { | ||
66 | return per_cpu(irq_stat, cpu).__nmi_count; | ||
67 | } | ||
68 | |||
69 | static inline int mce_in_progress(void) | ||
70 | { | ||
71 | #if defined(CONFIG_X86_MCE) | ||
72 | return atomic_read(&mce_entry) > 0; | ||
73 | #endif | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Take the local apic timer and PIT/HPET into account. We don't | ||
79 | * know which one is active, when we have highres/dyntick on | ||
80 | */ | ||
81 | static inline unsigned int get_timer_irqs(int cpu) | ||
82 | { | ||
83 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | ||
84 | per_cpu(irq_stat, cpu).irq0_irqs; | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_SMP | ||
88 | /* | ||
89 | * The performance counters used by NMI_LOCAL_APIC don't trigger when | ||
90 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
91 | * CPUs during the test make them busy. | ||
92 | */ | ||
93 | static __init void nmi_cpu_busy(void *data) | ||
94 | { | ||
95 | local_irq_enable_in_hardirq(); | ||
96 | /* | ||
97 | * Intentionally don't use cpu_relax here. This is | ||
98 | * to make sure that the performance counter really ticks, | ||
99 | * even if there is a simulator or similar that catches the | ||
100 | * pause instruction. On a real HT machine this is fine because | ||
101 | * all other CPUs are busy with "useless" delay loops and don't | ||
102 | * care if they get somewhat less cycles. | ||
103 | */ | ||
104 | while (endflag == 0) | ||
105 | mb(); | ||
106 | } | ||
107 | #endif | ||
108 | |||
109 | static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) | ||
110 | { | ||
111 | printk(KERN_CONT "\n"); | ||
112 | |||
113 | printk(KERN_WARNING | ||
114 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
115 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
116 | |||
117 | printk(KERN_WARNING | ||
118 | "Please report this to bugzilla.kernel.org,\n"); | ||
119 | printk(KERN_WARNING | ||
120 | "and attach the output of the 'dmesg' command.\n"); | ||
121 | |||
122 | per_cpu(wd_enabled, cpu) = 0; | ||
123 | atomic_dec(&nmi_active); | ||
124 | } | ||
125 | |||
126 | static void __acpi_nmi_disable(void *__unused) | ||
127 | { | ||
128 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
129 | } | ||
130 | |||
131 | int __init check_nmi_watchdog(void) | ||
132 | { | ||
133 | unsigned int *prev_nmi_count; | ||
134 | int cpu; | ||
135 | |||
136 | if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) | ||
137 | return 0; | ||
138 | |||
139 | prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); | ||
140 | if (!prev_nmi_count) | ||
141 | goto error; | ||
142 | |||
143 | printk(KERN_INFO "Testing NMI watchdog ... "); | ||
144 | |||
145 | #ifdef CONFIG_SMP | ||
146 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
147 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); | ||
148 | #endif | ||
149 | |||
150 | for_each_possible_cpu(cpu) | ||
151 | prev_nmi_count[cpu] = get_nmi_count(cpu); | ||
152 | local_irq_enable(); | ||
153 | mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ | ||
154 | |||
155 | for_each_online_cpu(cpu) { | ||
156 | if (!per_cpu(wd_enabled, cpu)) | ||
157 | continue; | ||
158 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) | ||
159 | report_broken_nmi(cpu, prev_nmi_count); | ||
160 | } | ||
161 | endflag = 1; | ||
162 | if (!atomic_read(&nmi_active)) { | ||
163 | kfree(prev_nmi_count); | ||
164 | atomic_set(&nmi_active, -1); | ||
165 | goto error; | ||
166 | } | ||
167 | printk("OK.\n"); | ||
168 | |||
169 | /* | ||
170 | * now that we know it works we can reduce NMI frequency to | ||
171 | * something more reasonable; makes a difference in some configs | ||
172 | */ | ||
173 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
174 | nmi_hz = lapic_adjust_nmi_hz(1); | ||
175 | |||
176 | kfree(prev_nmi_count); | ||
177 | return 0; | ||
178 | error: | ||
179 | if (nmi_watchdog == NMI_IO_APIC) { | ||
180 | if (!timer_through_8259) | ||
181 | legacy_pic->mask(0); | ||
182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_X86_32 | ||
186 | timer_ack = 0; | ||
187 | #endif | ||
188 | return -1; | ||
189 | } | ||
190 | |||
191 | static int __init setup_nmi_watchdog(char *str) | ||
192 | { | ||
193 | unsigned int nmi; | ||
194 | |||
195 | if (!strncmp(str, "panic", 5)) { | ||
196 | panic_on_timeout = 1; | ||
197 | str = strchr(str, ','); | ||
198 | if (!str) | ||
199 | return 1; | ||
200 | ++str; | ||
201 | } | ||
202 | |||
203 | if (!strncmp(str, "lapic", 5)) | ||
204 | nmi_watchdog = NMI_LOCAL_APIC; | ||
205 | else if (!strncmp(str, "ioapic", 6)) | ||
206 | nmi_watchdog = NMI_IO_APIC; | ||
207 | else { | ||
208 | get_option(&str, &nmi); | ||
209 | if (nmi >= NMI_INVALID) | ||
210 | return 0; | ||
211 | nmi_watchdog = nmi; | ||
212 | } | ||
213 | |||
214 | return 1; | ||
215 | } | ||
216 | __setup("nmi_watchdog=", setup_nmi_watchdog); | ||
217 | |||
218 | /* | ||
219 | * Suspend/resume support | ||
220 | */ | ||
221 | #ifdef CONFIG_PM | ||
222 | |||
223 | static int nmi_pm_active; /* nmi_active before suspend */ | ||
224 | |||
225 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | ||
226 | { | ||
227 | /* only CPU0 goes here, other CPUs should be offline */ | ||
228 | nmi_pm_active = atomic_read(&nmi_active); | ||
229 | stop_apic_nmi_watchdog(NULL); | ||
230 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int lapic_nmi_resume(struct sys_device *dev) | ||
235 | { | ||
236 | /* only CPU0 goes here, other CPUs should be offline */ | ||
237 | if (nmi_pm_active > 0) { | ||
238 | setup_apic_nmi_watchdog(NULL); | ||
239 | touch_nmi_watchdog(); | ||
240 | } | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static struct sysdev_class nmi_sysclass = { | ||
245 | .name = "lapic_nmi", | ||
246 | .resume = lapic_nmi_resume, | ||
247 | .suspend = lapic_nmi_suspend, | ||
248 | }; | ||
249 | |||
250 | static struct sys_device device_lapic_nmi = { | ||
251 | .id = 0, | ||
252 | .cls = &nmi_sysclass, | ||
253 | }; | ||
254 | |||
255 | static int __init init_lapic_nmi_sysfs(void) | ||
256 | { | ||
257 | int error; | ||
258 | |||
259 | /* | ||
260 | * should really be a BUG_ON but b/c this is an | ||
261 | * init call, it just doesn't work. -dcz | ||
262 | */ | ||
263 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
264 | return 0; | ||
265 | |||
266 | if (atomic_read(&nmi_active) < 0) | ||
267 | return 0; | ||
268 | |||
269 | error = sysdev_class_register(&nmi_sysclass); | ||
270 | if (!error) | ||
271 | error = sysdev_register(&device_lapic_nmi); | ||
272 | return error; | ||
273 | } | ||
274 | |||
275 | /* must come after the local APIC's device_initcall() */ | ||
276 | late_initcall(init_lapic_nmi_sysfs); | ||
277 | |||
278 | #endif /* CONFIG_PM */ | ||
279 | |||
280 | static void __acpi_nmi_enable(void *__unused) | ||
281 | { | ||
282 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Enable timer based NMIs on all CPUs: | ||
287 | */ | ||
288 | void acpi_nmi_enable(void) | ||
289 | { | ||
290 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
291 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Disable timer based NMIs on all CPUs: | ||
296 | */ | ||
297 | void acpi_nmi_disable(void) | ||
298 | { | ||
299 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
300 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
305 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
306 | */ | ||
307 | void cpu_nmi_set_wd_enabled(void) | ||
308 | { | ||
309 | __get_cpu_var(wd_enabled) = 1; | ||
310 | } | ||
311 | |||
312 | void setup_apic_nmi_watchdog(void *unused) | ||
313 | { | ||
314 | if (__get_cpu_var(wd_enabled)) | ||
315 | return; | ||
316 | |||
317 | /* cheap hack to support suspend/resume */ | ||
318 | /* if cpu0 is not active neither should the other cpus */ | ||
319 | if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) | ||
320 | return; | ||
321 | |||
322 | switch (nmi_watchdog) { | ||
323 | case NMI_LOCAL_APIC: | ||
324 | if (lapic_watchdog_init(nmi_hz) < 0) { | ||
325 | __get_cpu_var(wd_enabled) = 0; | ||
326 | return; | ||
327 | } | ||
328 | /* FALL THROUGH */ | ||
329 | case NMI_IO_APIC: | ||
330 | __get_cpu_var(wd_enabled) = 1; | ||
331 | atomic_inc(&nmi_active); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | void stop_apic_nmi_watchdog(void *unused) | ||
336 | { | ||
337 | /* only support LOCAL and IO APICs for now */ | ||
338 | if (!nmi_watchdog_active()) | ||
339 | return; | ||
340 | if (__get_cpu_var(wd_enabled) == 0) | ||
341 | return; | ||
342 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
343 | lapic_watchdog_stop(); | ||
344 | else | ||
345 | __acpi_nmi_disable(NULL); | ||
346 | __get_cpu_var(wd_enabled) = 0; | ||
347 | atomic_dec(&nmi_active); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * the best way to detect whether a CPU has a 'hard lockup' problem | ||
352 | * is to check it's local APIC timer IRQ counts. If they are not | ||
353 | * changing then that CPU has some problem. | ||
354 | * | ||
355 | * as these watchdog NMI IRQs are generated on every CPU, we only | ||
356 | * have to check the current processor. | ||
357 | * | ||
358 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
359 | * careful not to rely on unsafe variables. The printk might lock | ||
360 | * up though, so we have to break up any console locks first ... | ||
361 | * [when there will be more tty-related locks, break them up here too!] | ||
362 | */ | ||
363 | |||
364 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | ||
365 | static DEFINE_PER_CPU(long, alert_counter); | ||
366 | static DEFINE_PER_CPU(int, nmi_touch); | ||
367 | |||
368 | void touch_nmi_watchdog(void) | ||
369 | { | ||
370 | if (nmi_watchdog_active()) { | ||
371 | unsigned cpu; | ||
372 | |||
373 | /* | ||
374 | * Tell other CPUs to reset their alert counters. We cannot | ||
375 | * do it ourselves because the alert count increase is not | ||
376 | * atomic. | ||
377 | */ | ||
378 | for_each_present_cpu(cpu) { | ||
379 | if (per_cpu(nmi_touch, cpu) != 1) | ||
380 | per_cpu(nmi_touch, cpu) = 1; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Tickle the softlockup detector too: | ||
386 | */ | ||
387 | touch_softlockup_watchdog(); | ||
388 | } | ||
389 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
390 | |||
391 | notrace __kprobes int | ||
392 | nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | ||
393 | { | ||
394 | /* | ||
395 | * Since current_thread_info()-> is always on the stack, and we | ||
396 | * always switch the stack NMI-atomically, it's safe to use | ||
397 | * smp_processor_id(). | ||
398 | */ | ||
399 | unsigned int sum; | ||
400 | int touched = 0; | ||
401 | int cpu = smp_processor_id(); | ||
402 | int rc = 0; | ||
403 | |||
404 | sum = get_timer_irqs(cpu); | ||
405 | |||
406 | if (__get_cpu_var(nmi_touch)) { | ||
407 | __get_cpu_var(nmi_touch) = 0; | ||
408 | touched = 1; | ||
409 | } | ||
410 | |||
411 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | ||
412 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
413 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ | ||
414 | |||
415 | raw_spin_lock(&lock); | ||
416 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
417 | show_regs(regs); | ||
418 | dump_stack(); | ||
419 | raw_spin_unlock(&lock); | ||
420 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
421 | |||
422 | rc = 1; | ||
423 | } | ||
424 | |||
425 | /* Could check oops_in_progress here too, but it's safer not to */ | ||
426 | if (mce_in_progress()) | ||
427 | touched = 1; | ||
428 | |||
429 | /* if the none of the timers isn't firing, this cpu isn't doing much */ | ||
430 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | ||
431 | /* | ||
432 | * Ayiee, looks like this CPU is stuck ... | ||
433 | * wait a few IRQs (5 seconds) before doing the oops ... | ||
434 | */ | ||
435 | __this_cpu_inc(alert_counter); | ||
436 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) | ||
437 | /* | ||
438 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | ||
439 | */ | ||
440 | die_nmi("BUG: NMI Watchdog detected LOCKUP", | ||
441 | regs, panic_on_timeout); | ||
442 | } else { | ||
443 | __get_cpu_var(last_irq_sum) = sum; | ||
444 | __this_cpu_write(alert_counter, 0); | ||
445 | } | ||
446 | |||
447 | /* see if the nmi watchdog went off */ | ||
448 | if (!__get_cpu_var(wd_enabled)) | ||
449 | return rc; | ||
450 | switch (nmi_watchdog) { | ||
451 | case NMI_LOCAL_APIC: | ||
452 | rc |= lapic_wd_event(nmi_hz); | ||
453 | break; | ||
454 | case NMI_IO_APIC: | ||
455 | /* | ||
456 | * don't know how to accurately check for this. | ||
457 | * just assume it was a watchdog timer interrupt | ||
458 | * This matches the old behaviour. | ||
459 | */ | ||
460 | rc = 1; | ||
461 | break; | ||
462 | } | ||
463 | return rc; | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_SYSCTL | ||
467 | |||
468 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
469 | { | ||
470 | __get_cpu_var(wd_enabled) = 1; | ||
471 | atomic_inc(&nmi_active); | ||
472 | __acpi_nmi_enable(NULL); | ||
473 | } | ||
474 | |||
475 | static void enable_ioapic_nmi_watchdog(void) | ||
476 | { | ||
477 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
478 | touch_nmi_watchdog(); | ||
479 | } | ||
480 | |||
481 | static void disable_ioapic_nmi_watchdog(void) | ||
482 | { | ||
483 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
484 | } | ||
485 | |||
486 | static int __init setup_unknown_nmi_panic(char *str) | ||
487 | { | ||
488 | unknown_nmi_panic = 1; | ||
489 | return 1; | ||
490 | } | ||
491 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
492 | |||
493 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | ||
494 | { | ||
495 | unsigned char reason = get_nmi_reason(); | ||
496 | char buf[64]; | ||
497 | |||
498 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | ||
499 | die_nmi(buf, regs, 1); /* Always panic here */ | ||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * proc handler for /proc/sys/kernel/nmi | ||
505 | */ | ||
506 | int proc_nmi_enabled(struct ctl_table *table, int write, | ||
507 | void __user *buffer, size_t *length, loff_t *ppos) | ||
508 | { | ||
509 | int old_state; | ||
510 | |||
511 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; | ||
512 | old_state = nmi_watchdog_enabled; | ||
513 | proc_dointvec(table, write, buffer, length, ppos); | ||
514 | if (!!old_state == !!nmi_watchdog_enabled) | ||
515 | return 0; | ||
516 | |||
517 | if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { | ||
518 | printk(KERN_WARNING | ||
519 | "NMI watchdog is permanently disabled\n"); | ||
520 | return -EIO; | ||
521 | } | ||
522 | |||
523 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
524 | if (nmi_watchdog_enabled) | ||
525 | enable_lapic_nmi_watchdog(); | ||
526 | else | ||
527 | disable_lapic_nmi_watchdog(); | ||
528 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
529 | if (nmi_watchdog_enabled) | ||
530 | enable_ioapic_nmi_watchdog(); | ||
531 | else | ||
532 | disable_ioapic_nmi_watchdog(); | ||
533 | } else { | ||
534 | printk(KERN_WARNING | ||
535 | "NMI watchdog doesn't know what hardware to touch\n"); | ||
536 | return -EIO; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | #endif /* CONFIG_SYSCTL */ | ||
542 | |||
543 | int do_nmi_callback(struct pt_regs *regs, int cpu) | ||
544 | { | ||
545 | #ifdef CONFIG_SYSCTL | ||
546 | if (unknown_nmi_panic) | ||
547 | return unknown_nmi_panic_callback(regs, cpu); | ||
548 | #endif | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | void arch_trigger_all_cpu_backtrace(void) | ||
553 | { | ||
554 | int i; | ||
555 | |||
556 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
557 | |||
558 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
559 | apic->send_IPI_all(NMI_VECTOR); | ||
560 | |||
561 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
562 | for (i = 0; i < 10 * 1000; i++) { | ||
563 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
564 | break; | ||
565 | mdelay(1); | ||
566 | } | ||
567 | } | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda30938..1d59834396bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) | |||
894 | #else | 894 | #else |
895 | vgetcpu_set_mode(); | 895 | vgetcpu_set_mode(); |
896 | #endif | 896 | #endif |
897 | init_hw_perf_events(); | ||
898 | } | 897 | } |
899 | 898 | ||
900 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 899 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6d75b9145b13..0a360d146596 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) | |||
330 | { | 330 | { |
331 | int i; | 331 | int i; |
332 | 332 | ||
333 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
334 | disable_lapic_nmi_watchdog(); | ||
335 | |||
336 | for (i = 0; i < x86_pmu.num_counters; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
337 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
338 | goto perfctr_fail; | 335 | goto perfctr_fail; |
@@ -355,9 +352,6 @@ perfctr_fail: | |||
355 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
356 | release_perfctr_nmi(x86_pmu.perfctr + i); | 353 | release_perfctr_nmi(x86_pmu.perfctr + i); |
357 | 354 | ||
358 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
359 | enable_lapic_nmi_watchdog(); | ||
360 | |||
361 | return false; | 355 | return false; |
362 | } | 356 | } |
363 | 357 | ||
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void) | |||
369 | release_perfctr_nmi(x86_pmu.perfctr + i); | 363 | release_perfctr_nmi(x86_pmu.perfctr + i); |
370 | release_evntsel_nmi(x86_pmu.eventsel + i); | 364 | release_evntsel_nmi(x86_pmu.eventsel + i); |
371 | } | 365 | } |
372 | |||
373 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
374 | enable_lapic_nmi_watchdog(); | ||
375 | } | 366 | } |
376 | 367 | ||
377 | #else | 368 | #else |
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {} | |||
384 | static bool check_hw_exists(void) | 375 | static bool check_hw_exists(void) |
385 | { | 376 | { |
386 | u64 val, val_new = 0; | 377 | u64 val, val_new = 0; |
387 | int ret = 0; | 378 | int i, reg, ret = 0; |
379 | |||
380 | /* | ||
381 | * Check to see if the BIOS enabled any of the counters, if so | ||
382 | * complain and bail. | ||
383 | */ | ||
384 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
385 | reg = x86_pmu.eventsel + i; | ||
386 | ret = rdmsrl_safe(reg, &val); | ||
387 | if (ret) | ||
388 | goto msr_fail; | ||
389 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
390 | goto bios_fail; | ||
391 | } | ||
388 | 392 | ||
393 | if (x86_pmu.num_counters_fixed) { | ||
394 | reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
395 | ret = rdmsrl_safe(reg, &val); | ||
396 | if (ret) | ||
397 | goto msr_fail; | ||
398 | for (i = 0; i < x86_pmu.num_counters_fixed; i++) { | ||
399 | if (val & (0x03 << i*4)) | ||
400 | goto bios_fail; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * Now write a value and read it back to see if it matches, | ||
406 | * this is needed to detect certain hardware emulators (qemu/kvm) | ||
407 | * that don't trap on the MSR access and always return 0s. | ||
408 | */ | ||
389 | val = 0xabcdUL; | 409 | val = 0xabcdUL; |
390 | ret |= checking_wrmsrl(x86_pmu.perfctr, val); | 410 | ret = checking_wrmsrl(x86_pmu.perfctr, val); |
391 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); | 411 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); |
392 | if (ret || val != val_new) | 412 | if (ret || val != val_new) |
393 | return false; | 413 | goto msr_fail; |
394 | 414 | ||
395 | return true; | 415 | return true; |
416 | |||
417 | bios_fail: | ||
418 | printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); | ||
419 | printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); | ||
420 | return false; | ||
421 | |||
422 | msr_fail: | ||
423 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | ||
424 | return false; | ||
396 | } | 425 | } |
397 | 426 | ||
398 | static void reserve_ds_buffers(void); | 427 | static void reserve_ds_buffers(void); |
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
451 | struct hw_perf_event *hwc = &event->hw; | 480 | struct hw_perf_event *hwc = &event->hw; |
452 | u64 config; | 481 | u64 config; |
453 | 482 | ||
454 | if (!hwc->sample_period) { | 483 | if (!is_sampling_event(event)) { |
455 | hwc->sample_period = x86_pmu.max_period; | 484 | hwc->sample_period = x86_pmu.max_period; |
456 | hwc->last_period = hwc->sample_period; | 485 | hwc->last_period = hwc->sample_period; |
457 | local64_set(&hwc->period_left, hwc->sample_period); | 486 | local64_set(&hwc->period_left, hwc->sample_period); |
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void) | |||
1362 | pr_info("no hardware sampling interrupt available.\n"); | 1391 | pr_info("no hardware sampling interrupt available.\n"); |
1363 | } | 1392 | } |
1364 | 1393 | ||
1365 | void __init init_hw_perf_events(void) | 1394 | int __init init_hw_perf_events(void) |
1366 | { | 1395 | { |
1367 | struct event_constraint *c; | 1396 | struct event_constraint *c; |
1368 | int err; | 1397 | int err; |
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void) | |||
1377 | err = amd_pmu_init(); | 1406 | err = amd_pmu_init(); |
1378 | break; | 1407 | break; |
1379 | default: | 1408 | default: |
1380 | return; | 1409 | return 0; |
1381 | } | 1410 | } |
1382 | if (err != 0) { | 1411 | if (err != 0) { |
1383 | pr_cont("no PMU driver, software events only.\n"); | 1412 | pr_cont("no PMU driver, software events only.\n"); |
1384 | return; | 1413 | return 0; |
1385 | } | 1414 | } |
1386 | 1415 | ||
1387 | pmu_check_apic(); | 1416 | pmu_check_apic(); |
1388 | 1417 | ||
1389 | /* sanity check that the hardware exists or is emulated */ | 1418 | /* sanity check that the hardware exists or is emulated */ |
1390 | if (!check_hw_exists()) { | 1419 | if (!check_hw_exists()) |
1391 | pr_cont("Broken PMU hardware detected, software events only.\n"); | 1420 | return 0; |
1392 | return; | ||
1393 | } | ||
1394 | 1421 | ||
1395 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1422 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1396 | 1423 | ||
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void) | |||
1438 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); | 1465 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1439 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); | 1466 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1440 | 1467 | ||
1441 | perf_pmu_register(&pmu); | 1468 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
1442 | perf_cpu_notifier(x86_pmu_notifier); | 1469 | perf_cpu_notifier(x86_pmu_notifier); |
1470 | |||
1471 | return 0; | ||
1443 | } | 1472 | } |
1473 | early_initcall(init_hw_perf_events); | ||
1444 | 1474 | ||
1445 | static inline void x86_pmu_read(struct perf_event *event) | 1475 | static inline void x86_pmu_read(struct perf_event *event) |
1446 | { | 1476 | { |
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1686 | 1716 | ||
1687 | perf_callchain_store(entry, regs->ip); | 1717 | perf_callchain_store(entry, regs->ip); |
1688 | 1718 | ||
1689 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1719 | dump_trace(NULL, regs, NULL, &backtrace_ops, entry); |
1690 | } | 1720 | } |
1691 | 1721 | ||
1692 | #ifdef CONFIG_COMPAT | 1722 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index e421b8cd6944..67e2202a6039 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -1,7 +1,5 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | 1 | #ifdef CONFIG_CPU_SUP_AMD |
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst const u64 amd_hw_cache_event_ids | 3 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 4 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 5 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -275,7 +273,7 @@ done: | |||
275 | return &emptyconstraint; | 273 | return &emptyconstraint; |
276 | } | 274 | } |
277 | 275 | ||
278 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | 276 | static struct amd_nb *amd_alloc_nb(int cpu) |
279 | { | 277 | { |
280 | struct amd_nb *nb; | 278 | struct amd_nb *nb; |
281 | int i; | 279 | int i; |
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
285 | if (!nb) | 283 | if (!nb) |
286 | return NULL; | 284 | return NULL; |
287 | 285 | ||
288 | nb->nb_id = nb_id; | 286 | nb->nb_id = -1; |
289 | 287 | ||
290 | /* | 288 | /* |
291 | * initialize all possible NB constraints | 289 | * initialize all possible NB constraints |
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu) | |||
306 | if (boot_cpu_data.x86_max_cores < 2) | 304 | if (boot_cpu_data.x86_max_cores < 2) |
307 | return NOTIFY_OK; | 305 | return NOTIFY_OK; |
308 | 306 | ||
309 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | 307 | cpuc->amd_nb = amd_alloc_nb(cpu); |
310 | if (!cpuc->amd_nb) | 308 | if (!cpuc->amd_nb) |
311 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
312 | 310 | ||
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
325 | nb_id = amd_get_nb_id(cpu); | 323 | nb_id = amd_get_nb_id(cpu); |
326 | WARN_ON_ONCE(nb_id == BAD_APICID); | 324 | WARN_ON_ONCE(nb_id == BAD_APICID); |
327 | 325 | ||
328 | raw_spin_lock(&amd_nb_lock); | ||
329 | |||
330 | for_each_online_cpu(i) { | 326 | for_each_online_cpu(i) { |
331 | nb = per_cpu(cpu_hw_events, i).amd_nb; | 327 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
332 | if (WARN_ON_ONCE(!nb)) | 328 | if (WARN_ON_ONCE(!nb)) |
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
341 | 337 | ||
342 | cpuc->amd_nb->nb_id = nb_id; | 338 | cpuc->amd_nb->nb_id = nb_id; |
343 | cpuc->amd_nb->refcnt++; | 339 | cpuc->amd_nb->refcnt++; |
344 | |||
345 | raw_spin_unlock(&amd_nb_lock); | ||
346 | } | 340 | } |
347 | 341 | ||
348 | static void amd_pmu_cpu_dead(int cpu) | 342 | static void amd_pmu_cpu_dead(int cpu) |
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
354 | 348 | ||
355 | cpuhw = &per_cpu(cpu_hw_events, cpu); | 349 | cpuhw = &per_cpu(cpu_hw_events, cpu); |
356 | 350 | ||
357 | raw_spin_lock(&amd_nb_lock); | ||
358 | |||
359 | if (cpuhw->amd_nb) { | 351 | if (cpuhw->amd_nb) { |
360 | struct amd_nb *nb = cpuhw->amd_nb; | 352 | struct amd_nb *nb = cpuhw->amd_nb; |
361 | 353 | ||
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
364 | 356 | ||
365 | cpuhw->amd_nb = NULL; | 357 | cpuhw->amd_nb = NULL; |
366 | } | 358 | } |
367 | |||
368 | raw_spin_unlock(&amd_nb_lock); | ||
369 | } | 359 | } |
370 | 360 | ||
371 | static __initconst const struct x86_pmu amd_pmu = { | 361 | static __initconst const struct x86_pmu amd_pmu = { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c8f5c088cad1..24e390e40f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
816 | if (ret) | 816 | if (ret) |
817 | return ret; | 817 | return ret; |
818 | 818 | ||
819 | if (event->attr.precise_ip && | ||
820 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
821 | /* | ||
822 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
823 | * (0x003c) so that we can use it with PEBS. | ||
824 | * | ||
825 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
826 | * PEBS capable. However we can use INST_RETIRED.ANY_P | ||
827 | * (0x00c0), which is a PEBS capable event, to get the same | ||
828 | * count. | ||
829 | * | ||
830 | * INST_RETIRED.ANY_P counts the number of cycles that retires | ||
831 | * CNTMASK instructions. By setting CNTMASK to a value (16) | ||
832 | * larger than the maximum number of instructions that can be | ||
833 | * retired per cycle (4) and then inverting the condition, we | ||
834 | * count all cycles that retire 16 or less instructions, which | ||
835 | * is every cycle. | ||
836 | * | ||
837 | * Thereby we gain a PEBS capable cycle counter. | ||
838 | */ | ||
839 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | ||
840 | |||
841 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
842 | event->hw.config = alt_config; | ||
843 | } | ||
844 | |||
819 | if (event->attr.type != PERF_TYPE_RAW) | 845 | if (event->attr.type != PERF_TYPE_RAW) |
820 | return 0; | 846 | return 0; |
821 | 847 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd69..14d45928c282 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -22,26 +22,6 @@ | |||
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/perf_event.h> | 23 | #include <asm/perf_event.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | ||
26 | unsigned int cccr_msr; | ||
27 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
28 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
29 | }; | ||
30 | |||
31 | /* Interface defining a CPU specific perfctr watchdog */ | ||
32 | struct wd_ops { | ||
33 | int (*reserve)(void); | ||
34 | void (*unreserve)(void); | ||
35 | int (*setup)(unsigned nmi_hz); | ||
36 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | ||
37 | void (*stop)(void); | ||
38 | unsigned perfctr; | ||
39 | unsigned evntsel; | ||
40 | u64 checkbit; | ||
41 | }; | ||
42 | |||
43 | static const struct wd_ops *wd_ops; | ||
44 | |||
45 | /* | 25 | /* |
46 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 26 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
47 | * offset from MSR_P4_BSU_ESCR0. | 27 | * offset from MSR_P4_BSU_ESCR0. |
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; | |||
60 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | 40 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); |
61 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | 41 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); |
62 | 42 | ||
63 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
64 | |||
65 | /* converts an msr to an appropriate reservation bit */ | 43 | /* converts an msr to an appropriate reservation bit */ |
66 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | 44 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
67 | { | 45 | { |
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) | |||
172 | clear_bit(counter, evntsel_nmi_owner); | 150 | clear_bit(counter, evntsel_nmi_owner); |
173 | } | 151 | } |
174 | EXPORT_SYMBOL(release_evntsel_nmi); | 152 | EXPORT_SYMBOL(release_evntsel_nmi); |
175 | |||
176 | void disable_lapic_nmi_watchdog(void) | ||
177 | { | ||
178 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
179 | |||
180 | if (atomic_read(&nmi_active) <= 0) | ||
181 | return; | ||
182 | |||
183 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
184 | |||
185 | if (wd_ops) | ||
186 | wd_ops->unreserve(); | ||
187 | |||
188 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
189 | } | ||
190 | |||
191 | void enable_lapic_nmi_watchdog(void) | ||
192 | { | ||
193 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
194 | |||
195 | /* are we already enabled */ | ||
196 | if (atomic_read(&nmi_active) != 0) | ||
197 | return; | ||
198 | |||
199 | /* are we lapic aware */ | ||
200 | if (!wd_ops) | ||
201 | return; | ||
202 | if (!wd_ops->reserve()) { | ||
203 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | ||
204 | return; | ||
205 | } | ||
206 | |||
207 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); | ||
208 | touch_nmi_watchdog(); | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Activate the NMI watchdog via the local APIC. | ||
213 | */ | ||
214 | |||
215 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
216 | { | ||
217 | u64 counter_val; | ||
218 | unsigned int retval = hz; | ||
219 | |||
220 | /* | ||
221 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
222 | * are writable, with higher bits sign extending from bit 31. | ||
223 | * So, we can only program the counter with 31 bit values and | ||
224 | * 32nd bit should be 1, for 33.. to be 1. | ||
225 | * Find the appropriate nmi_hz | ||
226 | */ | ||
227 | counter_val = (u64)cpu_khz * 1000; | ||
228 | do_div(counter_val, retval); | ||
229 | if (counter_val > 0x7fffffffULL) { | ||
230 | u64 count = (u64)cpu_khz * 1000; | ||
231 | do_div(count, 0x7fffffffUL); | ||
232 | retval = count + 1; | ||
233 | } | ||
234 | return retval; | ||
235 | } | ||
236 | |||
237 | static void write_watchdog_counter(unsigned int perfctr_msr, | ||
238 | const char *descr, unsigned nmi_hz) | ||
239 | { | ||
240 | u64 count = (u64)cpu_khz * 1000; | ||
241 | |||
242 | do_div(count, nmi_hz); | ||
243 | if (descr) | ||
244 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
245 | wrmsrl(perfctr_msr, 0 - count); | ||
246 | } | ||
247 | |||
248 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
249 | const char *descr, unsigned nmi_hz) | ||
250 | { | ||
251 | u64 count = (u64)cpu_khz * 1000; | ||
252 | |||
253 | do_div(count, nmi_hz); | ||
254 | if (descr) | ||
255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
256 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * AMD K7/K8/Family10h/Family11h support. | ||
261 | * AMD keeps this interface nicely stable so there is not much variety | ||
262 | */ | ||
263 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
264 | #define K7_EVNTSEL_INT (1 << 20) | ||
265 | #define K7_EVNTSEL_OS (1 << 17) | ||
266 | #define K7_EVNTSEL_USR (1 << 16) | ||
267 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
268 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
269 | |||
270 | static int setup_k7_watchdog(unsigned nmi_hz) | ||
271 | { | ||
272 | unsigned int perfctr_msr, evntsel_msr; | ||
273 | unsigned int evntsel; | ||
274 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
275 | |||
276 | perfctr_msr = wd_ops->perfctr; | ||
277 | evntsel_msr = wd_ops->evntsel; | ||
278 | |||
279 | wrmsrl(perfctr_msr, 0UL); | ||
280 | |||
281 | evntsel = K7_EVNTSEL_INT | ||
282 | | K7_EVNTSEL_OS | ||
283 | | K7_EVNTSEL_USR | ||
284 | | K7_NMI_EVENT; | ||
285 | |||
286 | /* setup the timer */ | ||
287 | wrmsr(evntsel_msr, evntsel, 0); | ||
288 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); | ||
289 | |||
290 | /* initialize the wd struct before enabling */ | ||
291 | wd->perfctr_msr = perfctr_msr; | ||
292 | wd->evntsel_msr = evntsel_msr; | ||
293 | wd->cccr_msr = 0; /* unused */ | ||
294 | |||
295 | /* ok, everything is initialized, announce that we're set */ | ||
296 | cpu_nmi_set_wd_enabled(); | ||
297 | |||
298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
300 | wrmsr(evntsel_msr, evntsel, 0); | ||
301 | |||
302 | return 1; | ||
303 | } | ||
304 | |||
305 | static void single_msr_stop_watchdog(void) | ||
306 | { | ||
307 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
308 | |||
309 | wrmsr(wd->evntsel_msr, 0, 0); | ||
310 | } | ||
311 | |||
312 | static int single_msr_reserve(void) | ||
313 | { | ||
314 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | ||
315 | return 0; | ||
316 | |||
317 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | ||
318 | release_perfctr_nmi(wd_ops->perfctr); | ||
319 | return 0; | ||
320 | } | ||
321 | return 1; | ||
322 | } | ||
323 | |||
324 | static void single_msr_unreserve(void) | ||
325 | { | ||
326 | release_evntsel_nmi(wd_ops->evntsel); | ||
327 | release_perfctr_nmi(wd_ops->perfctr); | ||
328 | } | ||
329 | |||
330 | static void __kprobes | ||
331 | single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
332 | { | ||
333 | /* start the cycle over again */ | ||
334 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
335 | } | ||
336 | |||
337 | static const struct wd_ops k7_wd_ops = { | ||
338 | .reserve = single_msr_reserve, | ||
339 | .unreserve = single_msr_unreserve, | ||
340 | .setup = setup_k7_watchdog, | ||
341 | .rearm = single_msr_rearm, | ||
342 | .stop = single_msr_stop_watchdog, | ||
343 | .perfctr = MSR_K7_PERFCTR0, | ||
344 | .evntsel = MSR_K7_EVNTSEL0, | ||
345 | .checkbit = 1ULL << 47, | ||
346 | }; | ||
347 | |||
348 | /* | ||
349 | * Intel Model 6 (PPro+,P2,P3,P-M,Core1) | ||
350 | */ | ||
351 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
352 | #define P6_EVNTSEL_INT (1 << 20) | ||
353 | #define P6_EVNTSEL_OS (1 << 17) | ||
354 | #define P6_EVNTSEL_USR (1 << 16) | ||
355 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
356 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
357 | |||
358 | static int setup_p6_watchdog(unsigned nmi_hz) | ||
359 | { | ||
360 | unsigned int perfctr_msr, evntsel_msr; | ||
361 | unsigned int evntsel; | ||
362 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
363 | |||
364 | perfctr_msr = wd_ops->perfctr; | ||
365 | evntsel_msr = wd_ops->evntsel; | ||
366 | |||
367 | /* KVM doesn't implement this MSR */ | ||
368 | if (wrmsr_safe(perfctr_msr, 0, 0) < 0) | ||
369 | return 0; | ||
370 | |||
371 | evntsel = P6_EVNTSEL_INT | ||
372 | | P6_EVNTSEL_OS | ||
373 | | P6_EVNTSEL_USR | ||
374 | | P6_NMI_EVENT; | ||
375 | |||
376 | /* setup the timer */ | ||
377 | wrmsr(evntsel_msr, evntsel, 0); | ||
378 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
379 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); | ||
380 | |||
381 | /* initialize the wd struct before enabling */ | ||
382 | wd->perfctr_msr = perfctr_msr; | ||
383 | wd->evntsel_msr = evntsel_msr; | ||
384 | wd->cccr_msr = 0; /* unused */ | ||
385 | |||
386 | /* ok, everything is initialized, announce that we're set */ | ||
387 | cpu_nmi_set_wd_enabled(); | ||
388 | |||
389 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
390 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
391 | wrmsr(evntsel_msr, evntsel, 0); | ||
392 | |||
393 | return 1; | ||
394 | } | ||
395 | |||
396 | static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
397 | { | ||
398 | /* | ||
399 | * P6 based Pentium M need to re-unmask | ||
400 | * the apic vector but it doesn't hurt | ||
401 | * other P6 variant. | ||
402 | * ArchPerfom/Core Duo also needs this | ||
403 | */ | ||
404 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
405 | |||
406 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
407 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); | ||
408 | } | ||
409 | |||
410 | static const struct wd_ops p6_wd_ops = { | ||
411 | .reserve = single_msr_reserve, | ||
412 | .unreserve = single_msr_unreserve, | ||
413 | .setup = setup_p6_watchdog, | ||
414 | .rearm = p6_rearm, | ||
415 | .stop = single_msr_stop_watchdog, | ||
416 | .perfctr = MSR_P6_PERFCTR0, | ||
417 | .evntsel = MSR_P6_EVNTSEL0, | ||
418 | .checkbit = 1ULL << 39, | ||
419 | }; | ||
420 | |||
421 | /* | ||
422 | * Intel P4 performance counters. | ||
423 | * By far the most complicated of all. | ||
424 | */ | ||
425 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) | ||
426 | #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) | ||
427 | #define P4_ESCR_OS (1 << 3) | ||
428 | #define P4_ESCR_USR (1 << 2) | ||
429 | #define P4_CCCR_OVF_PMI0 (1 << 26) | ||
430 | #define P4_CCCR_OVF_PMI1 (1 << 27) | ||
431 | #define P4_CCCR_THRESHOLD(N) ((N) << 20) | ||
432 | #define P4_CCCR_COMPLEMENT (1 << 19) | ||
433 | #define P4_CCCR_COMPARE (1 << 18) | ||
434 | #define P4_CCCR_REQUIRED (3 << 16) | ||
435 | #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) | ||
436 | #define P4_CCCR_ENABLE (1 << 12) | ||
437 | #define P4_CCCR_OVF (1 << 31) | ||
438 | |||
439 | #define P4_CONTROLS 18 | ||
440 | static unsigned int p4_controls[18] = { | ||
441 | MSR_P4_BPU_CCCR0, | ||
442 | MSR_P4_BPU_CCCR1, | ||
443 | MSR_P4_BPU_CCCR2, | ||
444 | MSR_P4_BPU_CCCR3, | ||
445 | MSR_P4_MS_CCCR0, | ||
446 | MSR_P4_MS_CCCR1, | ||
447 | MSR_P4_MS_CCCR2, | ||
448 | MSR_P4_MS_CCCR3, | ||
449 | MSR_P4_FLAME_CCCR0, | ||
450 | MSR_P4_FLAME_CCCR1, | ||
451 | MSR_P4_FLAME_CCCR2, | ||
452 | MSR_P4_FLAME_CCCR3, | ||
453 | MSR_P4_IQ_CCCR0, | ||
454 | MSR_P4_IQ_CCCR1, | ||
455 | MSR_P4_IQ_CCCR2, | ||
456 | MSR_P4_IQ_CCCR3, | ||
457 | MSR_P4_IQ_CCCR4, | ||
458 | MSR_P4_IQ_CCCR5, | ||
459 | }; | ||
460 | /* | ||
461 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
462 | * CRU_ESCR0 (with any non-null event selector) through a complemented | ||
463 | * max threshold. [IA32-Vol3, Section 14.9.9] | ||
464 | */ | ||
465 | static int setup_p4_watchdog(unsigned nmi_hz) | ||
466 | { | ||
467 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
468 | unsigned int evntsel, cccr_val; | ||
469 | unsigned int misc_enable, dummy; | ||
470 | unsigned int ht_num; | ||
471 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
472 | |||
473 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
474 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
475 | return 0; | ||
476 | |||
477 | #ifdef CONFIG_SMP | ||
478 | /* detect which hyperthread we are on */ | ||
479 | if (smp_num_siblings == 2) { | ||
480 | unsigned int ebx, apicid; | ||
481 | |||
482 | ebx = cpuid_ebx(1); | ||
483 | apicid = (ebx >> 24) & 0xff; | ||
484 | ht_num = apicid & 1; | ||
485 | } else | ||
486 | #endif | ||
487 | ht_num = 0; | ||
488 | |||
489 | /* | ||
490 | * performance counters are shared resources | ||
491 | * assign each hyperthread its own set | ||
492 | * (re-use the ESCR0 register, seems safe | ||
493 | * and keeps the cccr_val the same) | ||
494 | */ | ||
495 | if (!ht_num) { | ||
496 | /* logical cpu 0 */ | ||
497 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
498 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
499 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
500 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
501 | |||
502 | /* | ||
503 | * If we're on the kdump kernel or other situation, we may | ||
504 | * still have other performance counter registers set to | ||
505 | * interrupt and they'll keep interrupting forever because | ||
506 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
507 | * pending interrupts and disable all the registers here, | ||
508 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
509 | * about the P4_CCCR_OVF quirk. | ||
510 | */ | ||
511 | if (reset_devices) { | ||
512 | unsigned int low, high; | ||
513 | int i; | ||
514 | |||
515 | for (i = 0; i < P4_CONTROLS; i++) { | ||
516 | rdmsr(p4_controls[i], low, high); | ||
517 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
518 | wrmsr(p4_controls[i], low, high); | ||
519 | } | ||
520 | } | ||
521 | } else { | ||
522 | /* logical cpu 1 */ | ||
523 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
524 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
525 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
526 | |||
527 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
528 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
529 | cccr_val = P4_CCCR_OVF_PMI0; | ||
530 | else | ||
531 | cccr_val = P4_CCCR_OVF_PMI1; | ||
532 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
533 | } | ||
534 | |||
535 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
536 | | P4_ESCR_OS | ||
537 | | P4_ESCR_USR; | ||
538 | |||
539 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
540 | | P4_CCCR_COMPLEMENT | ||
541 | | P4_CCCR_COMPARE | ||
542 | | P4_CCCR_REQUIRED; | ||
543 | |||
544 | wrmsr(evntsel_msr, evntsel, 0); | ||
545 | wrmsr(cccr_msr, cccr_val, 0); | ||
546 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | ||
547 | |||
548 | wd->perfctr_msr = perfctr_msr; | ||
549 | wd->evntsel_msr = evntsel_msr; | ||
550 | wd->cccr_msr = cccr_msr; | ||
551 | |||
552 | /* ok, everything is initialized, announce that we're set */ | ||
553 | cpu_nmi_set_wd_enabled(); | ||
554 | |||
555 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
556 | cccr_val |= P4_CCCR_ENABLE; | ||
557 | wrmsr(cccr_msr, cccr_val, 0); | ||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static void stop_p4_watchdog(void) | ||
562 | { | ||
563 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
564 | wrmsr(wd->cccr_msr, 0, 0); | ||
565 | wrmsr(wd->evntsel_msr, 0, 0); | ||
566 | } | ||
567 | |||
568 | static int p4_reserve(void) | ||
569 | { | ||
570 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | ||
571 | return 0; | ||
572 | #ifdef CONFIG_SMP | ||
573 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | ||
574 | goto fail1; | ||
575 | #endif | ||
576 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | ||
577 | goto fail2; | ||
578 | /* RED-PEN why is ESCR1 not reserved here? */ | ||
579 | return 1; | ||
580 | fail2: | ||
581 | #ifdef CONFIG_SMP | ||
582 | if (smp_num_siblings > 1) | ||
583 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
584 | fail1: | ||
585 | #endif | ||
586 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | static void p4_unreserve(void) | ||
591 | { | ||
592 | #ifdef CONFIG_SMP | ||
593 | if (smp_num_siblings > 1) | ||
594 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
595 | #endif | ||
596 | release_evntsel_nmi(MSR_P4_CRU_ESCR0); | ||
597 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
598 | } | ||
599 | |||
600 | static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
601 | { | ||
602 | unsigned dummy; | ||
603 | /* | ||
604 | * P4 quirks: | ||
605 | * - An overflown perfctr will assert its interrupt | ||
606 | * until the OVF flag in its CCCR is cleared. | ||
607 | * - LVTPC is masked on interrupt and must be | ||
608 | * unmasked by the LVTPC handler. | ||
609 | */ | ||
610 | rdmsrl(wd->cccr_msr, dummy); | ||
611 | dummy &= ~P4_CCCR_OVF; | ||
612 | wrmsrl(wd->cccr_msr, dummy); | ||
613 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
614 | /* start the cycle over again */ | ||
615 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
616 | } | ||
617 | |||
618 | static const struct wd_ops p4_wd_ops = { | ||
619 | .reserve = p4_reserve, | ||
620 | .unreserve = p4_unreserve, | ||
621 | .setup = setup_p4_watchdog, | ||
622 | .rearm = p4_rearm, | ||
623 | .stop = stop_p4_watchdog, | ||
624 | /* RED-PEN this is wrong for the other sibling */ | ||
625 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
626 | .evntsel = MSR_P4_BSU_ESCR0, | ||
627 | .checkbit = 1ULL << 39, | ||
628 | }; | ||
629 | |||
630 | /* | ||
631 | * Watchdog using the Intel architected PerfMon. | ||
632 | * Used for Core2 and hopefully all future Intel CPUs. | ||
633 | */ | ||
634 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
635 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
636 | |||
637 | static struct wd_ops intel_arch_wd_ops; | ||
638 | |||
639 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | ||
640 | { | ||
641 | unsigned int ebx; | ||
642 | union cpuid10_eax eax; | ||
643 | unsigned int unused; | ||
644 | unsigned int perfctr_msr, evntsel_msr; | ||
645 | unsigned int evntsel; | ||
646 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
647 | |||
648 | /* | ||
649 | * Check whether the Architectural PerfMon supports | ||
650 | * Unhalted Core Cycles Event or not. | ||
651 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
652 | */ | ||
653 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
654 | if ((eax.split.mask_length < | ||
655 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
656 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
657 | return 0; | ||
658 | |||
659 | perfctr_msr = wd_ops->perfctr; | ||
660 | evntsel_msr = wd_ops->evntsel; | ||
661 | |||
662 | wrmsrl(perfctr_msr, 0UL); | ||
663 | |||
664 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
665 | | ARCH_PERFMON_EVENTSEL_OS | ||
666 | | ARCH_PERFMON_EVENTSEL_USR | ||
667 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
668 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
669 | |||
670 | /* setup the timer */ | ||
671 | wrmsr(evntsel_msr, evntsel, 0); | ||
672 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
673 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | ||
674 | |||
675 | wd->perfctr_msr = perfctr_msr; | ||
676 | wd->evntsel_msr = evntsel_msr; | ||
677 | wd->cccr_msr = 0; /* unused */ | ||
678 | |||
679 | /* ok, everything is initialized, announce that we're set */ | ||
680 | cpu_nmi_set_wd_enabled(); | ||
681 | |||
682 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
684 | wrmsr(evntsel_msr, evntsel, 0); | ||
685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | ||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | static struct wd_ops intel_arch_wd_ops __read_mostly = { | ||
690 | .reserve = single_msr_reserve, | ||
691 | .unreserve = single_msr_unreserve, | ||
692 | .setup = setup_intel_arch_watchdog, | ||
693 | .rearm = p6_rearm, | ||
694 | .stop = single_msr_stop_watchdog, | ||
695 | .perfctr = MSR_ARCH_PERFMON_PERFCTR1, | ||
696 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, | ||
697 | }; | ||
698 | |||
699 | static void probe_nmi_watchdog(void) | ||
700 | { | ||
701 | switch (boot_cpu_data.x86_vendor) { | ||
702 | case X86_VENDOR_AMD: | ||
703 | if (boot_cpu_data.x86 == 6 || | ||
704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) | ||
705 | wd_ops = &k7_wd_ops; | ||
706 | return; | ||
707 | case X86_VENDOR_INTEL: | ||
708 | /* Work around where perfctr1 doesn't have a working enable | ||
709 | * bit as described in the following errata: | ||
710 | * AE49 Core Duo and Intel Core Solo 65 nm | ||
711 | * AN49 Intel Pentium Dual-Core | ||
712 | * AF49 Dual-Core Intel Xeon Processor LV | ||
713 | */ | ||
714 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || | ||
715 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
716 | boot_cpu_data.x86_mask == 4))) { | ||
717 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | ||
718 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | ||
719 | } | ||
720 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
721 | wd_ops = &intel_arch_wd_ops; | ||
722 | break; | ||
723 | } | ||
724 | switch (boot_cpu_data.x86) { | ||
725 | case 6: | ||
726 | if (boot_cpu_data.x86_model > 13) | ||
727 | return; | ||
728 | |||
729 | wd_ops = &p6_wd_ops; | ||
730 | break; | ||
731 | case 15: | ||
732 | wd_ops = &p4_wd_ops; | ||
733 | break; | ||
734 | default: | ||
735 | return; | ||
736 | } | ||
737 | break; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | /* Interface to nmi.c */ | ||
742 | |||
743 | int lapic_watchdog_init(unsigned nmi_hz) | ||
744 | { | ||
745 | if (!wd_ops) { | ||
746 | probe_nmi_watchdog(); | ||
747 | if (!wd_ops) { | ||
748 | printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | ||
749 | return -1; | ||
750 | } | ||
751 | |||
752 | if (!wd_ops->reserve()) { | ||
753 | printk(KERN_ERR | ||
754 | "NMI watchdog: cannot reserve perfctrs\n"); | ||
755 | return -1; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | if (!(wd_ops->setup(nmi_hz))) { | ||
760 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | ||
761 | raw_smp_processor_id()); | ||
762 | return -1; | ||
763 | } | ||
764 | |||
765 | return 0; | ||
766 | } | ||
767 | |||
768 | void lapic_watchdog_stop(void) | ||
769 | { | ||
770 | if (wd_ops) | ||
771 | wd_ops->stop(); | ||
772 | } | ||
773 | |||
774 | unsigned lapic_adjust_nmi_hz(unsigned hz) | ||
775 | { | ||
776 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
777 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
778 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | ||
779 | hz = adjust_for_32bit_ctr(hz); | ||
780 | return hz; | ||
781 | } | ||
782 | |||
783 | int __kprobes lapic_wd_event(unsigned nmi_hz) | ||
784 | { | ||
785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
786 | u64 ctr; | ||
787 | |||
788 | rdmsrl(wd->perfctr_msr, ctr); | ||
789 | if (ctr & wd_ops->checkbit) /* perfctr still running? */ | ||
790 | return 0; | ||
791 | |||
792 | wd_ops->rearm(wd, nmi_hz); | ||
793 | return 1; | ||
794 | } | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd52..8474c998cbd4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { | |||
175 | 175 | ||
176 | void | 176 | void |
177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
178 | unsigned long *stack, unsigned long bp, char *log_lvl) | 178 | unsigned long *stack, char *log_lvl) |
179 | { | 179 | { |
180 | printk("%sCall Trace:\n", log_lvl); | 180 | printk("%sCall Trace:\n", log_lvl); |
181 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 181 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); |
182 | } | 182 | } |
183 | 183 | ||
184 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 184 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
185 | unsigned long *stack, unsigned long bp) | 185 | unsigned long *stack) |
186 | { | 186 | { |
187 | show_trace_log_lvl(task, regs, stack, bp, ""); | 187 | show_trace_log_lvl(task, regs, stack, ""); |
188 | } | 188 | } |
189 | 189 | ||
190 | void show_stack(struct task_struct *task, unsigned long *sp) | 190 | void show_stack(struct task_struct *task, unsigned long *sp) |
191 | { | 191 | { |
192 | show_stack_log_lvl(task, NULL, sp, 0, ""); | 192 | show_stack_log_lvl(task, NULL, sp, ""); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | /* |
@@ -210,7 +210,7 @@ void dump_stack(void) | |||
210 | init_utsname()->release, | 210 | init_utsname()->release, |
211 | (int)strcspn(init_utsname()->version, " "), | 211 | (int)strcspn(init_utsname()->version, " "), |
212 | init_utsname()->version); | 212 | init_utsname()->version); |
213 | show_trace(NULL, NULL, &stack, bp); | 213 | show_trace(NULL, NULL, &stack); |
214 | } | 214 | } |
215 | EXPORT_SYMBOL(dump_stack); | 215 | EXPORT_SYMBOL(dump_stack); |
216 | 216 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bda..74cc1eda384b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,11 +17,12 @@ | |||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | 19 | ||
20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, |
21 | unsigned long *stack, unsigned long bp, | 21 | struct pt_regs *regs, unsigned long *stack, |
22 | const struct stacktrace_ops *ops, void *data) | 22 | const struct stacktrace_ops *ops, void *data) |
23 | { | 23 | { |
24 | int graph = 0; | 24 | int graph = 0; |
25 | unsigned long bp; | ||
25 | 26 | ||
26 | if (!task) | 27 | if (!task) |
27 | task = current; | 28 | task = current; |
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
34 | stack = (unsigned long *)task->thread.sp; | 35 | stack = (unsigned long *)task->thread.sp; |
35 | } | 36 | } |
36 | 37 | ||
37 | #ifdef CONFIG_FRAME_POINTER | 38 | bp = stack_frame(task, regs); |
38 | if (!bp) { | ||
39 | if (task == current) { | ||
40 | /* Grab bp right from our regs */ | ||
41 | get_bp(bp); | ||
42 | } else { | ||
43 | /* bp is the last reg pushed by switch_to */ | ||
44 | bp = *(unsigned long *) task->thread.sp; | ||
45 | } | ||
46 | } | ||
47 | #endif | ||
48 | |||
49 | for (;;) { | 39 | for (;;) { |
50 | struct thread_info *context; | 40 | struct thread_info *context; |
51 | 41 | ||
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); | |||
65 | 55 | ||
66 | void | 56 | void |
67 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 57 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
68 | unsigned long *sp, unsigned long bp, char *log_lvl) | 58 | unsigned long *sp, char *log_lvl) |
69 | { | 59 | { |
70 | unsigned long *stack; | 60 | unsigned long *stack; |
71 | int i; | 61 | int i; |
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
87 | touch_nmi_watchdog(); | 77 | touch_nmi_watchdog(); |
88 | } | 78 | } |
89 | printk(KERN_CONT "\n"); | 79 | printk(KERN_CONT "\n"); |
90 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 80 | show_trace_log_lvl(task, regs, sp, log_lvl); |
91 | } | 81 | } |
92 | 82 | ||
93 | 83 | ||
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) | |||
112 | u8 *ip; | 102 | u8 *ip; |
113 | 103 | ||
114 | printk(KERN_EMERG "Stack:\n"); | 104 | printk(KERN_EMERG "Stack:\n"); |
115 | show_stack_log_lvl(NULL, regs, ®s->sp, | 105 | show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); |
116 | 0, KERN_EMERG); | ||
117 | 106 | ||
118 | printk(KERN_EMERG "Code: "); | 107 | printk(KERN_EMERG "Code: "); |
119 | 108 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249a..64101335de19 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
140 | */ | 140 | */ |
141 | 141 | ||
142 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 142 | void dump_trace(struct task_struct *task, |
143 | unsigned long *stack, unsigned long bp, | 143 | struct pt_regs *regs, unsigned long *stack, |
144 | const struct stacktrace_ops *ops, void *data) | 144 | const struct stacktrace_ops *ops, void *data) |
145 | { | 145 | { |
146 | const unsigned cpu = get_cpu(); | 146 | const unsigned cpu = get_cpu(); |
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
149 | unsigned used = 0; | 149 | unsigned used = 0; |
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long bp; | ||
152 | 153 | ||
153 | if (!task) | 154 | if (!task) |
154 | task = current; | 155 | task = current; |
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
160 | stack = (unsigned long *)task->thread.sp; | 161 | stack = (unsigned long *)task->thread.sp; |
161 | } | 162 | } |
162 | 163 | ||
163 | #ifdef CONFIG_FRAME_POINTER | 164 | bp = stack_frame(task, regs); |
164 | if (!bp) { | ||
165 | if (task == current) { | ||
166 | /* Grab bp right from our regs */ | ||
167 | get_bp(bp); | ||
168 | } else { | ||
169 | /* bp is the last reg pushed by switch_to */ | ||
170 | bp = *(unsigned long *) task->thread.sp; | ||
171 | } | ||
172 | } | ||
173 | #endif | ||
174 | |||
175 | /* | 165 | /* |
176 | * Print function call entries in all stacks, starting at the | 166 | * Print function call entries in all stacks, starting at the |
177 | * current stack address. If the stacks consist of nested | 167 | * current stack address. If the stacks consist of nested |
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); | |||
235 | 225 | ||
236 | void | 226 | void |
237 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
238 | unsigned long *sp, unsigned long bp, char *log_lvl) | 228 | unsigned long *sp, char *log_lvl) |
239 | { | 229 | { |
240 | unsigned long *irq_stack_end; | 230 | unsigned long *irq_stack_end; |
241 | unsigned long *irq_stack; | 231 | unsigned long *irq_stack; |
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
279 | preempt_enable(); | 269 | preempt_enable(); |
280 | 270 | ||
281 | printk(KERN_CONT "\n"); | 271 | printk(KERN_CONT "\n"); |
282 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 272 | show_trace_log_lvl(task, regs, sp, log_lvl); |
283 | } | 273 | } |
284 | 274 | ||
285 | void show_registers(struct pt_regs *regs) | 275 | void show_registers(struct pt_regs *regs) |
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) | |||
308 | 298 | ||
309 | printk(KERN_EMERG "Stack:\n"); | 299 | printk(KERN_EMERG "Stack:\n"); |
310 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
311 | regs->bp, KERN_EMERG); | 301 | KERN_EMERG); |
312 | 302 | ||
313 | printk(KERN_EMERG "Code: "); | 303 | printk(KERN_EMERG "Code: "); |
314 | 304 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1cbd54c0df99..5940282bd2f9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | 1186 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | ||
1188 | if (kprobe_disabled(&op->kp)) | ||
1189 | return; | ||
1190 | |||
1187 | preempt_disable(); | 1191 | preempt_disable(); |
1188 | if (kprobe_running()) { | 1192 | if (kprobe_running()) { |
1189 | kprobes_inc_nmissed_count(&op->kp); | 1193 | kprobes_inc_nmissed_count(&op->kp); |
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | |||
1401 | return 0; | 1405 | return 0; |
1402 | } | 1406 | } |
1403 | 1407 | ||
1404 | /* Replace a breakpoint (int3) with a relative jump. */ | 1408 | #define MAX_OPTIMIZE_PROBES 256 |
1405 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | 1409 | static struct text_poke_param *jump_poke_params; |
1410 | static struct jump_poke_buffer { | ||
1411 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1412 | } *jump_poke_bufs; | ||
1413 | |||
1414 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1415 | u8 *insn_buf, | ||
1416 | struct optimized_kprobe *op) | ||
1406 | { | 1417 | { |
1407 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1408 | s32 rel = (s32)((long)op->optinsn.insn - | 1418 | s32 rel = (s32)((long)op->optinsn.insn - |
1409 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | 1419 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); |
1410 | 1420 | ||
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | |||
1412 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | 1422 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, |
1413 | RELATIVE_ADDR_SIZE); | 1423 | RELATIVE_ADDR_SIZE); |
1414 | 1424 | ||
1415 | jmp_code[0] = RELATIVEJUMP_OPCODE; | 1425 | insn_buf[0] = RELATIVEJUMP_OPCODE; |
1416 | *(s32 *)(&jmp_code[1]) = rel; | 1426 | *(s32 *)(&insn_buf[1]) = rel; |
1427 | |||
1428 | tprm->addr = op->kp.addr; | ||
1429 | tprm->opcode = insn_buf; | ||
1430 | tprm->len = RELATIVEJUMP_SIZE; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * Replace breakpoints (int3) with relative jumps. | ||
1435 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1436 | */ | ||
1437 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1438 | { | ||
1439 | struct optimized_kprobe *op, *tmp; | ||
1440 | int c = 0; | ||
1441 | |||
1442 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1443 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1444 | /* Setup param */ | ||
1445 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1446 | jump_poke_bufs[c].buf, op); | ||
1447 | list_del_init(&op->list); | ||
1448 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1449 | break; | ||
1450 | } | ||
1417 | 1451 | ||
1418 | /* | 1452 | /* |
1419 | * text_poke_smp doesn't support NMI/MCE code modifying. | 1453 | * text_poke_smp doesn't support NMI/MCE code modifying. |
1420 | * However, since kprobes itself also doesn't support NMI/MCE | 1454 | * However, since kprobes itself also doesn't support NMI/MCE |
1421 | * code probing, it's not a problem. | 1455 | * code probing, it's not a problem. |
1422 | */ | 1456 | */ |
1423 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | 1457 | text_poke_smp_batch(jump_poke_params, c); |
1424 | return 0; | 1458 | } |
1459 | |||
1460 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1461 | u8 *insn_buf, | ||
1462 | struct optimized_kprobe *op) | ||
1463 | { | ||
1464 | /* Set int3 to first byte for kprobes */ | ||
1465 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1466 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1467 | |||
1468 | tprm->addr = op->kp.addr; | ||
1469 | tprm->opcode = insn_buf; | ||
1470 | tprm->len = RELATIVEJUMP_SIZE; | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * Recover original instructions and breakpoints from relative jumps. | ||
1475 | * Caller must call with locking kprobe_mutex. | ||
1476 | */ | ||
1477 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1478 | struct list_head *done_list) | ||
1479 | { | ||
1480 | struct optimized_kprobe *op, *tmp; | ||
1481 | int c = 0; | ||
1482 | |||
1483 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1484 | /* Setup param */ | ||
1485 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1486 | jump_poke_bufs[c].buf, op); | ||
1487 | list_move(&op->list, done_list); | ||
1488 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1489 | break; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1494 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1495 | * code probing, it's not a problem. | ||
1496 | */ | ||
1497 | text_poke_smp_batch(jump_poke_params, c); | ||
1425 | } | 1498 | } |
1426 | 1499 | ||
1427 | /* Replace a relative jump with a breakpoint (int3). */ | 1500 | /* Replace a relative jump with a breakpoint (int3). */ |
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p, | |||
1453 | } | 1526 | } |
1454 | return 0; | 1527 | return 0; |
1455 | } | 1528 | } |
1529 | |||
1530 | static int __kprobes init_poke_params(void) | ||
1531 | { | ||
1532 | /* Allocate code buffer and parameter array */ | ||
1533 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1534 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1535 | if (!jump_poke_bufs) | ||
1536 | return -ENOMEM; | ||
1537 | |||
1538 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1539 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1540 | if (!jump_poke_params) { | ||
1541 | kfree(jump_poke_bufs); | ||
1542 | jump_poke_bufs = NULL; | ||
1543 | return -ENOMEM; | ||
1544 | } | ||
1545 | |||
1546 | return 0; | ||
1547 | } | ||
1548 | #else /* !CONFIG_OPTPROBES */ | ||
1549 | static int __kprobes init_poke_params(void) | ||
1550 | { | ||
1551 | return 0; | ||
1552 | } | ||
1456 | #endif | 1553 | #endif |
1457 | 1554 | ||
1458 | int __init arch_init_kprobes(void) | 1555 | int __init arch_init_kprobes(void) |
1459 | { | 1556 | { |
1460 | return 0; | 1557 | return init_poke_params(); |
1461 | } | 1558 | } |
1462 | 1559 | ||
1463 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1560 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86aa..96ed1aac543a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -91,8 +91,7 @@ void exit_thread(void) | |||
91 | void show_regs(struct pt_regs *regs) | 91 | void show_regs(struct pt_regs *regs) |
92 | { | 92 | { |
93 | show_registers(regs); | 93 | show_registers(regs); |
94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), | 94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); |
95 | regs->bp); | ||
96 | } | 95 | } |
97 | 96 | ||
98 | void show_regs_common(void) | 97 | void show_regs_common(void) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..68f61ac632e1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void) | |||
281 | */ | 281 | */ |
282 | smp_store_cpu_info(cpuid); | 282 | smp_store_cpu_info(cpuid); |
283 | 283 | ||
284 | /* | ||
285 | * This must be done before setting cpu_online_mask | ||
286 | * or calling notify_cpu_starting. | ||
287 | */ | ||
288 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
289 | wmb(); | ||
290 | |||
284 | notify_cpu_starting(cpuid); | 291 | notify_cpu_starting(cpuid); |
285 | 292 | ||
286 | /* | 293 | /* |
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
316 | */ | 323 | */ |
317 | check_tsc_sync_target(); | 324 | check_tsc_sync_target(); |
318 | 325 | ||
319 | if (nmi_watchdog == NMI_IO_APIC) { | ||
320 | legacy_pic->mask(0); | ||
321 | enable_NMI_through_LVT0(); | ||
322 | legacy_pic->unmask(0); | ||
323 | } | ||
324 | |||
325 | /* This must be done before setting cpu_online_mask */ | ||
326 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
327 | wmb(); | ||
328 | |||
329 | /* | 326 | /* |
330 | * We need to hold call_lock, so there is no inconsistency | 327 | * We need to hold call_lock, so there is no inconsistency |
331 | * between the time smp_call_function() determines number of | 328 | * between the time smp_call_function() determines number of |
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1061 | printk(KERN_INFO "SMP mode deactivated.\n"); | 1058 | printk(KERN_INFO "SMP mode deactivated.\n"); |
1062 | smpboot_clear_io_apic(); | 1059 | smpboot_clear_io_apic(); |
1063 | 1060 | ||
1064 | localise_nmi_watchdog(); | ||
1065 | |||
1066 | connect_bsp_APIC(); | 1061 | connect_bsp_APIC(); |
1067 | setup_local_APIC(); | 1062 | setup_local_APIC(); |
1068 | end_local_APIC_setup(); | 1063 | end_local_APIC_setup(); |
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1196 | #ifdef CONFIG_X86_IO_APIC | 1191 | #ifdef CONFIG_X86_IO_APIC |
1197 | setup_ioapic_dest(); | 1192 | setup_ioapic_dest(); |
1198 | #endif | 1193 | #endif |
1199 | check_nmi_watchdog(); | ||
1200 | mtrr_aps_init(); | 1194 | mtrr_aps_init(); |
1201 | } | 1195 | } |
1202 | 1196 | ||
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void) | |||
1341 | if (cpu == 0) | 1335 | if (cpu == 0) |
1342 | return -EBUSY; | 1336 | return -EBUSY; |
1343 | 1337 | ||
1344 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1345 | stop_apic_nmi_watchdog(NULL); | ||
1346 | clear_local_APIC(); | 1338 | clear_local_APIC(); |
1347 | 1339 | ||
1348 | cpu_disable_common(); | 1340 | cpu_disable_common(); |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a7..938c8e10a19a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { | |||
73 | */ | 73 | */ |
74 | void save_stack_trace(struct stack_trace *trace) | 74 | void save_stack_trace(struct stack_trace *trace) |
75 | { | 75 | { |
76 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); | 76 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); |
77 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
78 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(save_stack_trace); | 80 | EXPORT_SYMBOL_GPL(save_stack_trace); |
81 | 81 | ||
82 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | 82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) |
83 | { | 83 | { |
84 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | 84 | dump_trace(current, regs, NULL, &save_stack_ops, trace); |
85 | if (trace->nr_entries < trace->max_entries) | 85 | if (trace->nr_entries < trace->max_entries) |
86 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 86 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
87 | } | 87 | } |
88 | 88 | ||
89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
90 | { | 90 | { |
91 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 91 | dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); |
92 | if (trace->nr_entries < trace->max_entries) | 92 | if (trace->nr_entries < trace->max_entries) |
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 93 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cfa..25a28a245937 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -22,10 +22,6 @@ | |||
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
26 | int timer_ack; | ||
27 | #endif | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
30 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 26 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
31 | #endif | 27 | #endif |
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
63 | /* Keep nmi watchdog up to date */ | 59 | /* Keep nmi watchdog up to date */ |
64 | inc_irq_stat(irq0_irqs); | 60 | inc_irq_stat(irq0_irqs); |
65 | 61 | ||
66 | /* Optimized out for !IO_APIC and x86_64 */ | ||
67 | if (timer_ack) { | ||
68 | /* | ||
69 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
70 | * manually to deassert NMI lines for the watchdog if run | ||
71 | * on an 82489DX-based system. | ||
72 | */ | ||
73 | raw_spin_lock(&i8259A_lock); | ||
74 | outb(0x0c, PIC_MASTER_OCW3); | ||
75 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
76 | inb(PIC_MASTER_POLL); | ||
77 | raw_spin_unlock(&i8259A_lock); | ||
78 | } | ||
79 | |||
80 | global_clock_event->event_handler(global_clock_event); | 62 | global_clock_event->event_handler(global_clock_event); |
81 | 63 | ||
82 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | 64 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c96..bb6f04167361 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors); | |||
83 | 83 | ||
84 | static int ignore_nmis; | 84 | static int ignore_nmis; |
85 | 85 | ||
86 | int unknown_nmi_panic; | ||
87 | |||
86 | static inline void conditional_sti(struct pt_regs *regs) | 88 | static inline void conditional_sti(struct pt_regs *regs) |
87 | { | 89 | { |
88 | if (regs->flags & X86_EFLAGS_IF) | 90 | if (regs->flags & X86_EFLAGS_IF) |
@@ -300,6 +302,13 @@ gp_in_kernel: | |||
300 | die("general protection fault", regs, error_code); | 302 | die("general protection fault", regs, error_code); |
301 | } | 303 | } |
302 | 304 | ||
305 | static int __init setup_unknown_nmi_panic(char *str) | ||
306 | { | ||
307 | unknown_nmi_panic = 1; | ||
308 | return 1; | ||
309 | } | ||
310 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
311 | |||
303 | static notrace __kprobes void | 312 | static notrace __kprobes void |
304 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | 313 | mem_parity_error(unsigned char reason, struct pt_regs *regs) |
305 | { | 314 | { |
@@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
371 | reason, smp_processor_id()); | 380 | reason, smp_processor_id()); |
372 | 381 | ||
373 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | 382 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
374 | if (panic_on_unrecovered_nmi) | 383 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
375 | panic("NMI: Not continuing"); | 384 | panic("NMI: Not continuing"); |
376 | 385 | ||
377 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 386 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
@@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
397 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 406 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
398 | == NOTIFY_STOP) | 407 | == NOTIFY_STOP) |
399 | return; | 408 | return; |
400 | |||
401 | #ifndef CONFIG_LOCKUP_DETECTOR | ||
402 | /* | ||
403 | * Ok, so this is none of the documented NMI sources, | ||
404 | * so it must be the NMI watchdog. | ||
405 | */ | ||
406 | if (nmi_watchdog_tick(regs, reason)) | ||
407 | return; | ||
408 | if (!do_nmi_callback(regs, cpu)) | ||
409 | #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||
410 | unknown_nmi_error(reason, regs); | ||
411 | #else | ||
412 | unknown_nmi_error(reason, regs); | ||
413 | #endif | 409 | #endif |
410 | unknown_nmi_error(reason, regs); | ||
414 | 411 | ||
415 | return; | 412 | return; |
416 | } | 413 | } |
@@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
446 | 443 | ||
447 | void stop_nmi(void) | 444 | void stop_nmi(void) |
448 | { | 445 | { |
449 | acpi_nmi_disable(); | ||
450 | ignore_nmis++; | 446 | ignore_nmis++; |
451 | } | 447 | } |
452 | 448 | ||
453 | void restart_nmi(void) | 449 | void restart_nmi(void) |
454 | { | 450 | { |
455 | ignore_nmis--; | 451 | ignore_nmis--; |
456 | acpi_nmi_enable(); | ||
457 | } | 452 | } |
458 | 453 | ||
459 | /* May run on IST stack. */ | 454 | /* May run on IST stack. */ |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, | |||
185 | e->trace.entries = e->trace_entries; | 185 | e->trace.entries = e->trace_entries; |
186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | 186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); |
187 | e->trace.skip = 0; | 187 | e->trace.skip = 0; |
188 | save_stack_trace_bp(&e->trace, regs->bp); | 188 | save_stack_trace_regs(&e->trace, regs); |
189 | 189 | ||
190 | /* Round address down to nearest 16 bytes */ | 190 | /* Round address down to nearest 16 bytes */ |
191 | shadow_copy = kmemcheck_shadow_lookup(address | 191 | shadow_copy = kmemcheck_shadow_lookup(address |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..72cbec14d783 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
126 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
127 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
128 | if (depth) | 128 | if (depth) |
129 | dump_trace(NULL, regs, (unsigned long *)stack, 0, | 129 | dump_trace(NULL, regs, (unsigned long *)stack, |
130 | &backtrace_ops, &depth); | 130 | &backtrace_ops, &depth); |
131 | return; | 131 | return; |
132 | } | 132 | } |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b5790..0636dd93cef8 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c | |||
@@ -58,9 +58,6 @@ static void timer_stop(void) | |||
58 | 58 | ||
59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) | 59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) |
60 | { | 60 | { |
61 | if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) | ||
62 | return -ENODEV; | ||
63 | |||
64 | ops->start = timer_start; | 61 | ops->start = timer_start; |
65 | ops->stop = timer_stop; | 62 | ops->stop = timer_stop; |
66 | ops->cpu_type = "timer"; | 63 | ops->cpu_type = "timer"; |
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 660a2728908d..0cac7ec0d2ec 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c | |||
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, | |||
577 | * as possible (without an NMI being received in the middle of | 577 | * as possible (without an NMI being received in the middle of |
578 | * this) - so disable NMIs and initialize the device: | 578 | * this) - so disable NMIs and initialize the device: |
579 | */ | 579 | */ |
580 | acpi_nmi_disable(); | ||
581 | status = acpi_ns_evaluate(info); | 580 | status = acpi_ns_evaluate(info); |
582 | acpi_nmi_enable(); | ||
583 | 581 | ||
584 | if (ACPI_SUCCESS(status)) { | 582 | if (ACPI_SUCCESS(status)) { |
585 | walk_info->num_INI++; | 583 | walk_info->num_INI++; |
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 3d77116e4634..c19f4a20794a 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c | |||
@@ -649,12 +649,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) | |||
649 | * If nmi_watchdog is turned off then we can turn on | 649 | * If nmi_watchdog is turned off then we can turn on |
650 | * our nmi decoding capability. | 650 | * our nmi decoding capability. |
651 | */ | 651 | */ |
652 | if (!nmi_watchdog_active()) | 652 | hpwdt_nmi_decoding = 1; |
653 | hpwdt_nmi_decoding = 1; | ||
654 | else | ||
655 | dev_warn(&dev->dev, "NMI decoding is disabled. To enable this " | ||
656 | "functionality you must reboot with nmi_watchdog=0 " | ||
657 | "and load the hpwdt driver with priority=1.\n"); | ||
658 | } | 653 | } |
659 | #else | 654 | #else |
660 | static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) | 655 | static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8beabb958f61..725bf6bd39f7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -154,12 +154,14 @@ enum { | |||
154 | TRACE_EVENT_FL_ENABLED_BIT, | 154 | TRACE_EVENT_FL_ENABLED_BIT, |
155 | TRACE_EVENT_FL_FILTERED_BIT, | 155 | TRACE_EVENT_FL_FILTERED_BIT, |
156 | TRACE_EVENT_FL_RECORDED_CMD_BIT, | 156 | TRACE_EVENT_FL_RECORDED_CMD_BIT, |
157 | TRACE_EVENT_FL_CAP_ANY_BIT, | ||
157 | }; | 158 | }; |
158 | 159 | ||
159 | enum { | 160 | enum { |
160 | TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), | 161 | TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), |
161 | TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), | 162 | TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), |
162 | TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), | 163 | TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), |
164 | TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), | ||
163 | }; | 165 | }; |
164 | 166 | ||
165 | struct ftrace_event_call { | 167 | struct ftrace_event_call { |
@@ -196,6 +198,14 @@ struct ftrace_event_call { | |||
196 | #endif | 198 | #endif |
197 | }; | 199 | }; |
198 | 200 | ||
201 | #define __TRACE_EVENT_FLAGS(name, value) \ | ||
202 | static int __init trace_init_flags_##name(void) \ | ||
203 | { \ | ||
204 | event_##name.flags = value; \ | ||
205 | return 0; \ | ||
206 | } \ | ||
207 | early_initcall(trace_init_flags_##name); | ||
208 | |||
199 | #define PERF_MAX_TRACE_SIZE 2048 | 209 | #define PERF_MAX_TRACE_SIZE 2048 |
200 | 210 | ||
201 | #define MAX_FILTER_PRED 32 | 211 | #define MAX_FILTER_PRED 32 |
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e7d1b2e0070d..b78edb58ee66 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h | |||
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); | |||
275 | extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); | 275 | extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); |
276 | extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); | 276 | extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); |
277 | extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); | 277 | extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); |
278 | extern int arch_optimize_kprobe(struct optimized_kprobe *op); | 278 | extern void arch_optimize_kprobes(struct list_head *oplist); |
279 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
280 | struct list_head *done_list); | ||
279 | extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); | 281 | extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); |
280 | extern kprobe_opcode_t *get_optinsn_slot(void); | 282 | extern kprobe_opcode_t *get_optinsn_slot(void); |
281 | extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); | 283 | extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 06aab5eee134..17ccf44e7dcb 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -17,8 +17,6 @@ | |||
17 | #ifdef ARCH_HAS_NMI_WATCHDOG | 17 | #ifdef ARCH_HAS_NMI_WATCHDOG |
18 | #include <asm/nmi.h> | 18 | #include <asm/nmi.h> |
19 | extern void touch_nmi_watchdog(void); | 19 | extern void touch_nmi_watchdog(void); |
20 | extern void acpi_nmi_disable(void); | ||
21 | extern void acpi_nmi_enable(void); | ||
22 | #else | 20 | #else |
23 | #ifndef CONFIG_HARDLOCKUP_DETECTOR | 21 | #ifndef CONFIG_HARDLOCKUP_DETECTOR |
24 | static inline void touch_nmi_watchdog(void) | 22 | static inline void touch_nmi_watchdog(void) |
@@ -28,8 +26,6 @@ static inline void touch_nmi_watchdog(void) | |||
28 | #else | 26 | #else |
29 | extern void touch_nmi_watchdog(void); | 27 | extern void touch_nmi_watchdog(void); |
30 | #endif | 28 | #endif |
31 | static inline void acpi_nmi_disable(void) { } | ||
32 | static inline void acpi_nmi_enable(void) { } | ||
33 | #endif | 29 | #endif |
34 | 30 | ||
35 | /* | 31 | /* |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f1279e105ee..dda5b0a3ff60 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -215,8 +215,9 @@ struct perf_event_attr { | |||
215 | */ | 215 | */ |
216 | precise_ip : 2, /* skid constraint */ | 216 | precise_ip : 2, /* skid constraint */ |
217 | mmap_data : 1, /* non-exec mmap data */ | 217 | mmap_data : 1, /* non-exec mmap data */ |
218 | sample_id_all : 1, /* sample_type all events */ | ||
218 | 219 | ||
219 | __reserved_1 : 46; | 220 | __reserved_1 : 45; |
220 | 221 | ||
221 | union { | 222 | union { |
222 | __u32 wakeup_events; /* wakeup every n events */ | 223 | __u32 wakeup_events; /* wakeup every n events */ |
@@ -327,6 +328,15 @@ struct perf_event_header { | |||
327 | enum perf_event_type { | 328 | enum perf_event_type { |
328 | 329 | ||
329 | /* | 330 | /* |
331 | * If perf_event_attr.sample_id_all is set then all event types will | ||
332 | * have the sample_type selected fields related to where/when | ||
333 | * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) | ||
334 | * described in PERF_RECORD_SAMPLE below, it will be stashed just after | ||
335 | * the perf_event_header and the fields already present for the existing | ||
336 | * fields, i.e. at the end of the payload. That way a newer perf.data | ||
337 | * file will be supported by older perf tools, with these new optional | ||
338 | * fields being ignored. | ||
339 | * | ||
330 | * The MMAP events record the PROT_EXEC mappings so that we can | 340 | * The MMAP events record the PROT_EXEC mappings so that we can |
331 | * correlate userspace IPs to code. They have the following structure: | 341 | * correlate userspace IPs to code. They have the following structure: |
332 | * | 342 | * |
@@ -578,6 +588,10 @@ struct perf_event; | |||
578 | struct pmu { | 588 | struct pmu { |
579 | struct list_head entry; | 589 | struct list_head entry; |
580 | 590 | ||
591 | struct device *dev; | ||
592 | char *name; | ||
593 | int type; | ||
594 | |||
581 | int * __percpu pmu_disable_count; | 595 | int * __percpu pmu_disable_count; |
582 | struct perf_cpu_context * __percpu pmu_cpu_context; | 596 | struct perf_cpu_context * __percpu pmu_cpu_context; |
583 | int task_ctx_nr; | 597 | int task_ctx_nr; |
@@ -758,6 +772,9 @@ struct perf_event { | |||
758 | u64 shadow_ctx_time; | 772 | u64 shadow_ctx_time; |
759 | 773 | ||
760 | struct perf_event_attr attr; | 774 | struct perf_event_attr attr; |
775 | u16 header_size; | ||
776 | u16 id_header_size; | ||
777 | u16 read_size; | ||
761 | struct hw_perf_event hw; | 778 | struct hw_perf_event hw; |
762 | 779 | ||
763 | struct perf_event_context *ctx; | 780 | struct perf_event_context *ctx; |
@@ -903,7 +920,7 @@ struct perf_output_handle { | |||
903 | 920 | ||
904 | #ifdef CONFIG_PERF_EVENTS | 921 | #ifdef CONFIG_PERF_EVENTS |
905 | 922 | ||
906 | extern int perf_pmu_register(struct pmu *pmu); | 923 | extern int perf_pmu_register(struct pmu *pmu, char *name, int type); |
907 | extern void perf_pmu_unregister(struct pmu *pmu); | 924 | extern void perf_pmu_unregister(struct pmu *pmu); |
908 | 925 | ||
909 | extern int perf_num_counters(void); | 926 | extern int perf_num_counters(void); |
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, | |||
970 | struct perf_sample_data *data, | 987 | struct perf_sample_data *data, |
971 | struct pt_regs *regs); | 988 | struct pt_regs *regs); |
972 | 989 | ||
990 | static inline bool is_sampling_event(struct perf_event *event) | ||
991 | { | ||
992 | return event->attr.sample_period != 0; | ||
993 | } | ||
994 | |||
973 | /* | 995 | /* |
974 | * Return 1 for a software event, 0 for a hardware event | 996 | * Return 1 for a software event, 0 for a hardware event |
975 | */ | 997 | */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 223874538b33..a99d735db3df 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, | |||
316 | size_t *lenp, loff_t *ppos); | 316 | size_t *lenp, loff_t *ppos); |
317 | extern unsigned int softlockup_panic; | 317 | extern unsigned int softlockup_panic; |
318 | extern int softlockup_thresh; | 318 | extern int softlockup_thresh; |
319 | void lockup_detector_init(void); | ||
319 | #else | 320 | #else |
320 | static inline void touch_softlockup_watchdog(void) | 321 | static inline void touch_softlockup_watchdog(void) |
321 | { | 322 | { |
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void) | |||
326 | static inline void touch_all_softlockup_watchdogs(void) | 327 | static inline void touch_all_softlockup_watchdogs(void) |
327 | { | 328 | { |
328 | } | 329 | } |
330 | static inline void lockup_detector_init(void) | ||
331 | { | ||
332 | } | ||
329 | #endif | 333 | #endif |
330 | 334 | ||
331 | #ifdef CONFIG_DETECT_HUNG_TASK | 335 | #ifdef CONFIG_DETECT_HUNG_TASK |
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 51efbef38fb0..25310f1d7f37 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __LINUX_STACKTRACE_H | 2 | #define __LINUX_STACKTRACE_H |
3 | 3 | ||
4 | struct task_struct; | 4 | struct task_struct; |
5 | struct pt_regs; | ||
5 | 6 | ||
6 | #ifdef CONFIG_STACKTRACE | 7 | #ifdef CONFIG_STACKTRACE |
7 | struct task_struct; | 8 | struct task_struct; |
@@ -13,7 +14,8 @@ struct stack_trace { | |||
13 | }; | 14 | }; |
14 | 15 | ||
15 | extern void save_stack_trace(struct stack_trace *trace); | 16 | extern void save_stack_trace(struct stack_trace *trace); |
16 | extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); | 17 | extern void save_stack_trace_regs(struct stack_trace *trace, |
18 | struct pt_regs *regs); | ||
17 | extern void save_stack_trace_tsk(struct task_struct *tsk, | 19 | extern void save_stack_trace_tsk(struct task_struct *tsk, |
18 | struct stack_trace *trace); | 20 | struct stack_trace *trace); |
19 | 21 | ||
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cacc27a0e285..18cd0684fc4e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; | |||
127 | #define SYSCALL_TRACE_ENTER_EVENT(sname) \ | 127 | #define SYSCALL_TRACE_ENTER_EVENT(sname) \ |
128 | static struct syscall_metadata \ | 128 | static struct syscall_metadata \ |
129 | __attribute__((__aligned__(4))) __syscall_meta_##sname; \ | 129 | __attribute__((__aligned__(4))) __syscall_meta_##sname; \ |
130 | static struct ftrace_event_call \ | ||
131 | __attribute__((__aligned__(4))) event_enter_##sname; \ | ||
132 | static struct ftrace_event_call __used \ | 130 | static struct ftrace_event_call __used \ |
133 | __attribute__((__aligned__(4))) \ | 131 | __attribute__((__aligned__(4))) \ |
134 | __attribute__((section("_ftrace_events"))) \ | 132 | __attribute__((section("_ftrace_events"))) \ |
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs; | |||
137 | .class = &event_class_syscall_enter, \ | 135 | .class = &event_class_syscall_enter, \ |
138 | .event.funcs = &enter_syscall_print_funcs, \ | 136 | .event.funcs = &enter_syscall_print_funcs, \ |
139 | .data = (void *)&__syscall_meta_##sname,\ | 137 | .data = (void *)&__syscall_meta_##sname,\ |
140 | } | 138 | }; \ |
139 | __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) | ||
141 | 140 | ||
142 | #define SYSCALL_TRACE_EXIT_EVENT(sname) \ | 141 | #define SYSCALL_TRACE_EXIT_EVENT(sname) \ |
143 | static struct syscall_metadata \ | 142 | static struct syscall_metadata \ |
144 | __attribute__((__aligned__(4))) __syscall_meta_##sname; \ | 143 | __attribute__((__aligned__(4))) __syscall_meta_##sname; \ |
145 | static struct ftrace_event_call \ | ||
146 | __attribute__((__aligned__(4))) event_exit_##sname; \ | ||
147 | static struct ftrace_event_call __used \ | 144 | static struct ftrace_event_call __used \ |
148 | __attribute__((__aligned__(4))) \ | 145 | __attribute__((__aligned__(4))) \ |
149 | __attribute__((section("_ftrace_events"))) \ | 146 | __attribute__((section("_ftrace_events"))) \ |
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; | |||
152 | .class = &event_class_syscall_exit, \ | 149 | .class = &event_class_syscall_exit, \ |
153 | .event.funcs = &exit_syscall_print_funcs, \ | 150 | .event.funcs = &exit_syscall_print_funcs, \ |
154 | .data = (void *)&__syscall_meta_##sname,\ | 151 | .data = (void *)&__syscall_meta_##sname,\ |
155 | } | 152 | }; \ |
153 | __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) | ||
156 | 154 | ||
157 | #define SYSCALL_METADATA(sname, nb) \ | 155 | #define SYSCALL_METADATA(sname, nb) \ |
158 | SYSCALL_TRACE_ENTER_EVENT(sname); \ | 156 | SYSCALL_TRACE_ENTER_EVENT(sname); \ |
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a4a90b6726ce..5a6074fcd81d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -234,6 +234,8 @@ do_trace: \ | |||
234 | PARAMS(void *__data, proto), \ | 234 | PARAMS(void *__data, proto), \ |
235 | PARAMS(__data, args)) | 235 | PARAMS(__data, args)) |
236 | 236 | ||
237 | #define TRACE_EVENT_FLAGS(event, flag) | ||
238 | |||
237 | #endif /* DECLARE_TRACE */ | 239 | #endif /* DECLARE_TRACE */ |
238 | 240 | ||
239 | #ifndef TRACE_EVENT | 241 | #ifndef TRACE_EVENT |
@@ -354,4 +356,6 @@ do_trace: \ | |||
354 | assign, print, reg, unreg) \ | 356 | assign, print, reg, unreg) \ |
355 | DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) | 357 | DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) |
356 | 358 | ||
359 | #define TRACE_EVENT_FLAGS(event, flag) | ||
360 | |||
357 | #endif /* ifdef TRACE_EVENT (see note above) */ | 361 | #endif /* ifdef TRACE_EVENT (see note above) */ |
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index fb726ac7caee..5a4c04a75b3d 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h | |||
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter, | |||
40 | syscall_regfunc, syscall_unregfunc | 40 | syscall_regfunc, syscall_unregfunc |
41 | ); | 41 | ); |
42 | 42 | ||
43 | TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY) | ||
44 | |||
43 | TRACE_EVENT_FN(sys_exit, | 45 | TRACE_EVENT_FN(sys_exit, |
44 | 46 | ||
45 | TP_PROTO(struct pt_regs *regs, long ret), | 47 | TP_PROTO(struct pt_regs *regs, long ret), |
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit, | |||
62 | syscall_regfunc, syscall_unregfunc | 64 | syscall_regfunc, syscall_unregfunc |
63 | ); | 65 | ); |
64 | 66 | ||
67 | TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY) | ||
68 | |||
65 | #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ | 69 | #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ |
66 | 70 | ||
67 | #endif /* _TRACE_EVENTS_SYSCALLS_H */ | 71 | #endif /* _TRACE_EVENTS_SYSCALLS_H */ |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a9377c0083ad..e718a917d897 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -82,6 +82,10 @@ | |||
82 | TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ | 82 | TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ |
83 | PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ | 83 | PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ |
84 | 84 | ||
85 | #undef TRACE_EVENT_FLAGS | ||
86 | #define TRACE_EVENT_FLAGS(name, value) \ | ||
87 | __TRACE_EVENT_FLAGS(name, value) | ||
88 | |||
85 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) | 89 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) |
86 | 90 | ||
87 | 91 | ||
@@ -129,6 +133,9 @@ | |||
129 | #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ | 133 | #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ |
130 | DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) | 134 | DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) |
131 | 135 | ||
136 | #undef TRACE_EVENT_FLAGS | ||
137 | #define TRACE_EVENT_FLAGS(event, flag) | ||
138 | |||
132 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) | 139 | #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) |
133 | 140 | ||
134 | /* | 141 | /* |
diff --git a/init/main.c b/init/main.c index 8646401f7a0e..ea51770c0170 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -67,6 +67,7 @@ | |||
67 | #include <linux/sfi.h> | 67 | #include <linux/sfi.h> |
68 | #include <linux/shmem_fs.h> | 68 | #include <linux/shmem_fs.h> |
69 | #include <linux/slab.h> | 69 | #include <linux/slab.h> |
70 | #include <linux/perf_event.h> | ||
70 | 71 | ||
71 | #include <asm/io.h> | 72 | #include <asm/io.h> |
72 | #include <asm/bugs.h> | 73 | #include <asm/bugs.h> |
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void) | |||
603 | "enabled *very* early, fixing it\n"); | 604 | "enabled *very* early, fixing it\n"); |
604 | local_irq_disable(); | 605 | local_irq_disable(); |
605 | } | 606 | } |
607 | idr_init_cache(); | ||
608 | perf_event_init(); | ||
606 | rcu_init(); | 609 | rcu_init(); |
607 | radix_tree_init(); | 610 | radix_tree_init(); |
608 | /* init some links before init_ISA_irqs() */ | 611 | /* init some links before init_ISA_irqs() */ |
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void) | |||
658 | enable_debug_pagealloc(); | 661 | enable_debug_pagealloc(); |
659 | kmemleak_init(); | 662 | kmemleak_init(); |
660 | debug_objects_mem_init(); | 663 | debug_objects_mem_init(); |
661 | idr_init_cache(); | ||
662 | setup_per_cpu_pageset(); | 664 | setup_per_cpu_pageset(); |
663 | numa_policy_init(); | 665 | numa_policy_init(); |
664 | if (late_time_init) | 666 | if (late_time_init) |
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused) | |||
882 | smp_prepare_cpus(setup_max_cpus); | 884 | smp_prepare_cpus(setup_max_cpus); |
883 | 885 | ||
884 | do_pre_smp_initcalls(); | 886 | do_pre_smp_initcalls(); |
887 | lockup_detector_init(); | ||
885 | 888 | ||
886 | smp_init(); | 889 | smp_init(); |
887 | sched_init_smp(); | 890 | sched_init_smp(); |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index e5325825aeb6..086adf25a55e 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void) | |||
641 | 641 | ||
642 | constraints_initialized = 1; | 642 | constraints_initialized = 1; |
643 | 643 | ||
644 | perf_pmu_register(&perf_breakpoint); | 644 | perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); |
645 | 645 | ||
646 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | 646 | return register_die_notifier(&hw_breakpoint_exceptions_nb); |
647 | 647 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9737a76e106f..7663e5df0e6f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p) | |||
354 | return p->pre_handler == aggr_pre_handler; | 354 | return p->pre_handler == aggr_pre_handler; |
355 | } | 355 | } |
356 | 356 | ||
357 | /* Return true(!0) if the kprobe is unused */ | ||
358 | static inline int kprobe_unused(struct kprobe *p) | ||
359 | { | ||
360 | return kprobe_aggrprobe(p) && kprobe_disabled(p) && | ||
361 | list_empty(&p->list); | ||
362 | } | ||
363 | |||
357 | /* | 364 | /* |
358 | * Keep all fields in the kprobe consistent | 365 | * Keep all fields in the kprobe consistent |
359 | */ | 366 | */ |
360 | static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | 367 | static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) |
361 | { | 368 | { |
362 | memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); | 369 | memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); |
363 | memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); | 370 | memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn)); |
364 | } | 371 | } |
365 | 372 | ||
366 | #ifdef CONFIG_OPTPROBES | 373 | #ifdef CONFIG_OPTPROBES |
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
384 | } | 391 | } |
385 | } | 392 | } |
386 | 393 | ||
394 | /* Free optimized instructions and optimized_kprobe */ | ||
395 | static __kprobes void free_aggr_kprobe(struct kprobe *p) | ||
396 | { | ||
397 | struct optimized_kprobe *op; | ||
398 | |||
399 | op = container_of(p, struct optimized_kprobe, kp); | ||
400 | arch_remove_optimized_kprobe(op); | ||
401 | arch_remove_kprobe(p); | ||
402 | kfree(op); | ||
403 | } | ||
404 | |||
387 | /* Return true(!0) if the kprobe is ready for optimization. */ | 405 | /* Return true(!0) if the kprobe is ready for optimization. */ |
388 | static inline int kprobe_optready(struct kprobe *p) | 406 | static inline int kprobe_optready(struct kprobe *p) |
389 | { | 407 | { |
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p) | |||
397 | return 0; | 415 | return 0; |
398 | } | 416 | } |
399 | 417 | ||
418 | /* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ | ||
419 | static inline int kprobe_disarmed(struct kprobe *p) | ||
420 | { | ||
421 | struct optimized_kprobe *op; | ||
422 | |||
423 | /* If kprobe is not aggr/opt probe, just return kprobe is disabled */ | ||
424 | if (!kprobe_aggrprobe(p)) | ||
425 | return kprobe_disabled(p); | ||
426 | |||
427 | op = container_of(p, struct optimized_kprobe, kp); | ||
428 | |||
429 | return kprobe_disabled(p) && list_empty(&op->list); | ||
430 | } | ||
431 | |||
432 | /* Return true(!0) if the probe is queued on (un)optimizing lists */ | ||
433 | static int __kprobes kprobe_queued(struct kprobe *p) | ||
434 | { | ||
435 | struct optimized_kprobe *op; | ||
436 | |||
437 | if (kprobe_aggrprobe(p)) { | ||
438 | op = container_of(p, struct optimized_kprobe, kp); | ||
439 | if (!list_empty(&op->list)) | ||
440 | return 1; | ||
441 | } | ||
442 | return 0; | ||
443 | } | ||
444 | |||
400 | /* | 445 | /* |
401 | * Return an optimized kprobe whose optimizing code replaces | 446 | * Return an optimized kprobe whose optimizing code replaces |
402 | * instructions including addr (exclude breakpoint). | 447 | * instructions including addr (exclude breakpoint). |
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | |||
422 | 467 | ||
423 | /* Optimization staging list, protected by kprobe_mutex */ | 468 | /* Optimization staging list, protected by kprobe_mutex */ |
424 | static LIST_HEAD(optimizing_list); | 469 | static LIST_HEAD(optimizing_list); |
470 | static LIST_HEAD(unoptimizing_list); | ||
425 | 471 | ||
426 | static void kprobe_optimizer(struct work_struct *work); | 472 | static void kprobe_optimizer(struct work_struct *work); |
427 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); | 473 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); |
474 | static DECLARE_COMPLETION(optimizer_comp); | ||
428 | #define OPTIMIZE_DELAY 5 | 475 | #define OPTIMIZE_DELAY 5 |
429 | 476 | ||
430 | /* Kprobe jump optimizer */ | 477 | /* |
431 | static __kprobes void kprobe_optimizer(struct work_struct *work) | 478 | * Optimize (replace a breakpoint with a jump) kprobes listed on |
479 | * optimizing_list. | ||
480 | */ | ||
481 | static __kprobes void do_optimize_kprobes(void) | ||
432 | { | 482 | { |
433 | struct optimized_kprobe *op, *tmp; | 483 | /* Optimization never be done when disarmed */ |
434 | 484 | if (kprobes_all_disarmed || !kprobes_allow_optimization || | |
435 | /* Lock modules while optimizing kprobes */ | 485 | list_empty(&optimizing_list)) |
436 | mutex_lock(&module_mutex); | 486 | return; |
437 | mutex_lock(&kprobe_mutex); | ||
438 | if (kprobes_all_disarmed || !kprobes_allow_optimization) | ||
439 | goto end; | ||
440 | |||
441 | /* | ||
442 | * Wait for quiesence period to ensure all running interrupts | ||
443 | * are done. Because optprobe may modify multiple instructions | ||
444 | * there is a chance that Nth instruction is interrupted. In that | ||
445 | * case, running interrupt can return to 2nd-Nth byte of jump | ||
446 | * instruction. This wait is for avoiding it. | ||
447 | */ | ||
448 | synchronize_sched(); | ||
449 | 487 | ||
450 | /* | 488 | /* |
451 | * The optimization/unoptimization refers online_cpus via | 489 | * The optimization/unoptimization refers online_cpus via |
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
459 | */ | 497 | */ |
460 | get_online_cpus(); | 498 | get_online_cpus(); |
461 | mutex_lock(&text_mutex); | 499 | mutex_lock(&text_mutex); |
462 | list_for_each_entry_safe(op, tmp, &optimizing_list, list) { | 500 | arch_optimize_kprobes(&optimizing_list); |
463 | WARN_ON(kprobe_disabled(&op->kp)); | 501 | mutex_unlock(&text_mutex); |
464 | if (arch_optimize_kprobe(op) < 0) | 502 | put_online_cpus(); |
465 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 503 | } |
466 | list_del_init(&op->list); | 504 | |
505 | /* | ||
506 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint | ||
507 | * if need) kprobes listed on unoptimizing_list. | ||
508 | */ | ||
509 | static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | ||
510 | { | ||
511 | struct optimized_kprobe *op, *tmp; | ||
512 | |||
513 | /* Unoptimization must be done anytime */ | ||
514 | if (list_empty(&unoptimizing_list)) | ||
515 | return; | ||
516 | |||
517 | /* Ditto to do_optimize_kprobes */ | ||
518 | get_online_cpus(); | ||
519 | mutex_lock(&text_mutex); | ||
520 | arch_unoptimize_kprobes(&unoptimizing_list, free_list); | ||
521 | /* Loop free_list for disarming */ | ||
522 | list_for_each_entry_safe(op, tmp, free_list, list) { | ||
523 | /* Disarm probes if marked disabled */ | ||
524 | if (kprobe_disabled(&op->kp)) | ||
525 | arch_disarm_kprobe(&op->kp); | ||
526 | if (kprobe_unused(&op->kp)) { | ||
527 | /* | ||
528 | * Remove unused probes from hash list. After waiting | ||
529 | * for synchronization, these probes are reclaimed. | ||
530 | * (reclaiming is done by do_free_cleaned_kprobes.) | ||
531 | */ | ||
532 | hlist_del_rcu(&op->kp.hlist); | ||
533 | } else | ||
534 | list_del_init(&op->list); | ||
467 | } | 535 | } |
468 | mutex_unlock(&text_mutex); | 536 | mutex_unlock(&text_mutex); |
469 | put_online_cpus(); | 537 | put_online_cpus(); |
470 | end: | 538 | } |
539 | |||
540 | /* Reclaim all kprobes on the free_list */ | ||
541 | static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list) | ||
542 | { | ||
543 | struct optimized_kprobe *op, *tmp; | ||
544 | |||
545 | list_for_each_entry_safe(op, tmp, free_list, list) { | ||
546 | BUG_ON(!kprobe_unused(&op->kp)); | ||
547 | list_del_init(&op->list); | ||
548 | free_aggr_kprobe(&op->kp); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | /* Start optimizer after OPTIMIZE_DELAY passed */ | ||
553 | static __kprobes void kick_kprobe_optimizer(void) | ||
554 | { | ||
555 | if (!delayed_work_pending(&optimizing_work)) | ||
556 | schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); | ||
557 | } | ||
558 | |||
559 | /* Kprobe jump optimizer */ | ||
560 | static __kprobes void kprobe_optimizer(struct work_struct *work) | ||
561 | { | ||
562 | LIST_HEAD(free_list); | ||
563 | |||
564 | /* Lock modules while optimizing kprobes */ | ||
565 | mutex_lock(&module_mutex); | ||
566 | mutex_lock(&kprobe_mutex); | ||
567 | |||
568 | /* | ||
569 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) | ||
570 | * kprobes before waiting for quiesence period. | ||
571 | */ | ||
572 | do_unoptimize_kprobes(&free_list); | ||
573 | |||
574 | /* | ||
575 | * Step 2: Wait for quiesence period to ensure all running interrupts | ||
576 | * are done. Because optprobe may modify multiple instructions | ||
577 | * there is a chance that Nth instruction is interrupted. In that | ||
578 | * case, running interrupt can return to 2nd-Nth byte of jump | ||
579 | * instruction. This wait is for avoiding it. | ||
580 | */ | ||
581 | synchronize_sched(); | ||
582 | |||
583 | /* Step 3: Optimize kprobes after quiesence period */ | ||
584 | do_optimize_kprobes(); | ||
585 | |||
586 | /* Step 4: Free cleaned kprobes after quiesence period */ | ||
587 | do_free_cleaned_kprobes(&free_list); | ||
588 | |||
471 | mutex_unlock(&kprobe_mutex); | 589 | mutex_unlock(&kprobe_mutex); |
472 | mutex_unlock(&module_mutex); | 590 | mutex_unlock(&module_mutex); |
591 | |||
592 | /* Step 5: Kick optimizer again if needed */ | ||
593 | if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) | ||
594 | kick_kprobe_optimizer(); | ||
595 | else | ||
596 | /* Wake up all waiters */ | ||
597 | complete_all(&optimizer_comp); | ||
598 | } | ||
599 | |||
600 | /* Wait for completing optimization and unoptimization */ | ||
601 | static __kprobes void wait_for_kprobe_optimizer(void) | ||
602 | { | ||
603 | if (delayed_work_pending(&optimizing_work)) | ||
604 | wait_for_completion(&optimizer_comp); | ||
473 | } | 605 | } |
474 | 606 | ||
475 | /* Optimize kprobe if p is ready to be optimized */ | 607 | /* Optimize kprobe if p is ready to be optimized */ |
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p) | |||
495 | /* Check if it is already optimized. */ | 627 | /* Check if it is already optimized. */ |
496 | if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) | 628 | if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) |
497 | return; | 629 | return; |
498 | |||
499 | op->kp.flags |= KPROBE_FLAG_OPTIMIZED; | 630 | op->kp.flags |= KPROBE_FLAG_OPTIMIZED; |
500 | list_add(&op->list, &optimizing_list); | 631 | |
501 | if (!delayed_work_pending(&optimizing_work)) | 632 | if (!list_empty(&op->list)) |
502 | schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); | 633 | /* This is under unoptimizing. Just dequeue the probe */ |
634 | list_del_init(&op->list); | ||
635 | else { | ||
636 | list_add(&op->list, &optimizing_list); | ||
637 | kick_kprobe_optimizer(); | ||
638 | } | ||
639 | } | ||
640 | |||
641 | /* Short cut to direct unoptimizing */ | ||
642 | static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) | ||
643 | { | ||
644 | get_online_cpus(); | ||
645 | arch_unoptimize_kprobe(op); | ||
646 | put_online_cpus(); | ||
647 | if (kprobe_disabled(&op->kp)) | ||
648 | arch_disarm_kprobe(&op->kp); | ||
503 | } | 649 | } |
504 | 650 | ||
505 | /* Unoptimize a kprobe if p is optimized */ | 651 | /* Unoptimize a kprobe if p is optimized */ |
506 | static __kprobes void unoptimize_kprobe(struct kprobe *p) | 652 | static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force) |
507 | { | 653 | { |
508 | struct optimized_kprobe *op; | 654 | struct optimized_kprobe *op; |
509 | 655 | ||
510 | if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { | 656 | if (!kprobe_aggrprobe(p) || kprobe_disarmed(p)) |
511 | op = container_of(p, struct optimized_kprobe, kp); | 657 | return; /* This is not an optprobe nor optimized */ |
512 | if (!list_empty(&op->list)) | 658 | |
513 | /* Dequeue from the optimization queue */ | 659 | op = container_of(p, struct optimized_kprobe, kp); |
660 | if (!kprobe_optimized(p)) { | ||
661 | /* Unoptimized or unoptimizing case */ | ||
662 | if (force && !list_empty(&op->list)) { | ||
663 | /* | ||
664 | * Only if this is unoptimizing kprobe and forced, | ||
665 | * forcibly unoptimize it. (No need to unoptimize | ||
666 | * unoptimized kprobe again :) | ||
667 | */ | ||
514 | list_del_init(&op->list); | 668 | list_del_init(&op->list); |
515 | else | 669 | force_unoptimize_kprobe(op); |
516 | /* Replace jump with break */ | 670 | } |
517 | arch_unoptimize_kprobe(op); | 671 | return; |
518 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 672 | } |
673 | |||
674 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | ||
675 | if (!list_empty(&op->list)) { | ||
676 | /* Dequeue from the optimization queue */ | ||
677 | list_del_init(&op->list); | ||
678 | return; | ||
679 | } | ||
680 | /* Optimized kprobe case */ | ||
681 | if (force) | ||
682 | /* Forcibly update the code: this is a special case */ | ||
683 | force_unoptimize_kprobe(op); | ||
684 | else { | ||
685 | list_add(&op->list, &unoptimizing_list); | ||
686 | kick_kprobe_optimizer(); | ||
519 | } | 687 | } |
520 | } | 688 | } |
521 | 689 | ||
690 | /* Cancel unoptimizing for reusing */ | ||
691 | static void reuse_unused_kprobe(struct kprobe *ap) | ||
692 | { | ||
693 | struct optimized_kprobe *op; | ||
694 | |||
695 | BUG_ON(!kprobe_unused(ap)); | ||
696 | /* | ||
697 | * Unused kprobe MUST be on the way of delayed unoptimizing (means | ||
698 | * there is still a relative jump) and disabled. | ||
699 | */ | ||
700 | op = container_of(ap, struct optimized_kprobe, kp); | ||
701 | if (unlikely(list_empty(&op->list))) | ||
702 | printk(KERN_WARNING "Warning: found a stray unused " | ||
703 | "aggrprobe@%p\n", ap->addr); | ||
704 | /* Enable the probe again */ | ||
705 | ap->flags &= ~KPROBE_FLAG_DISABLED; | ||
706 | /* Optimize it again (remove from op->list) */ | ||
707 | BUG_ON(!kprobe_optready(ap)); | ||
708 | optimize_kprobe(ap); | ||
709 | } | ||
710 | |||
522 | /* Remove optimized instructions */ | 711 | /* Remove optimized instructions */ |
523 | static void __kprobes kill_optimized_kprobe(struct kprobe *p) | 712 | static void __kprobes kill_optimized_kprobe(struct kprobe *p) |
524 | { | 713 | { |
525 | struct optimized_kprobe *op; | 714 | struct optimized_kprobe *op; |
526 | 715 | ||
527 | op = container_of(p, struct optimized_kprobe, kp); | 716 | op = container_of(p, struct optimized_kprobe, kp); |
528 | if (!list_empty(&op->list)) { | 717 | if (!list_empty(&op->list)) |
529 | /* Dequeue from the optimization queue */ | 718 | /* Dequeue from the (un)optimization queue */ |
530 | list_del_init(&op->list); | 719 | list_del_init(&op->list); |
531 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 720 | |
532 | } | 721 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; |
533 | /* Don't unoptimize, because the target code will be freed. */ | 722 | /* Don't touch the code, because it is already freed. */ |
534 | arch_remove_optimized_kprobe(op); | 723 | arch_remove_optimized_kprobe(op); |
535 | } | 724 | } |
536 | 725 | ||
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p) | |||
543 | arch_prepare_optimized_kprobe(op); | 732 | arch_prepare_optimized_kprobe(op); |
544 | } | 733 | } |
545 | 734 | ||
546 | /* Free optimized instructions and optimized_kprobe */ | ||
547 | static __kprobes void free_aggr_kprobe(struct kprobe *p) | ||
548 | { | ||
549 | struct optimized_kprobe *op; | ||
550 | |||
551 | op = container_of(p, struct optimized_kprobe, kp); | ||
552 | arch_remove_optimized_kprobe(op); | ||
553 | kfree(op); | ||
554 | } | ||
555 | |||
556 | /* Allocate new optimized_kprobe and try to prepare optimized instructions */ | 735 | /* Allocate new optimized_kprobe and try to prepare optimized instructions */ |
557 | static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) | 736 | static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) |
558 | { | 737 | { |
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) | |||
587 | op = container_of(ap, struct optimized_kprobe, kp); | 766 | op = container_of(ap, struct optimized_kprobe, kp); |
588 | if (!arch_prepared_optinsn(&op->optinsn)) { | 767 | if (!arch_prepared_optinsn(&op->optinsn)) { |
589 | /* If failed to setup optimizing, fallback to kprobe */ | 768 | /* If failed to setup optimizing, fallback to kprobe */ |
590 | free_aggr_kprobe(ap); | 769 | arch_remove_optimized_kprobe(op); |
770 | kfree(op); | ||
591 | return; | 771 | return; |
592 | } | 772 | } |
593 | 773 | ||
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void) | |||
631 | return; | 811 | return; |
632 | 812 | ||
633 | kprobes_allow_optimization = false; | 813 | kprobes_allow_optimization = false; |
634 | printk(KERN_INFO "Kprobes globally unoptimized\n"); | ||
635 | get_online_cpus(); /* For avoiding text_mutex deadlock */ | ||
636 | mutex_lock(&text_mutex); | ||
637 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 814 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
638 | head = &kprobe_table[i]; | 815 | head = &kprobe_table[i]; |
639 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 816 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
640 | if (!kprobe_disabled(p)) | 817 | if (!kprobe_disabled(p)) |
641 | unoptimize_kprobe(p); | 818 | unoptimize_kprobe(p, false); |
642 | } | 819 | } |
643 | } | 820 | } |
644 | 821 | /* Wait for unoptimizing completion */ | |
645 | mutex_unlock(&text_mutex); | 822 | wait_for_kprobe_optimizer(); |
646 | put_online_cpus(); | 823 | printk(KERN_INFO "Kprobes globally unoptimized\n"); |
647 | /* Allow all currently running kprobes to complete */ | ||
648 | synchronize_sched(); | ||
649 | } | 824 | } |
650 | 825 | ||
651 | int sysctl_kprobes_optimization; | 826 | int sysctl_kprobes_optimization; |
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, | |||
669 | } | 844 | } |
670 | #endif /* CONFIG_SYSCTL */ | 845 | #endif /* CONFIG_SYSCTL */ |
671 | 846 | ||
847 | /* Put a breakpoint for a probe. Must be called with text_mutex locked */ | ||
672 | static void __kprobes __arm_kprobe(struct kprobe *p) | 848 | static void __kprobes __arm_kprobe(struct kprobe *p) |
673 | { | 849 | { |
674 | struct kprobe *old_p; | 850 | struct kprobe *_p; |
675 | 851 | ||
676 | /* Check collision with other optimized kprobes */ | 852 | /* Check collision with other optimized kprobes */ |
677 | old_p = get_optimized_kprobe((unsigned long)p->addr); | 853 | _p = get_optimized_kprobe((unsigned long)p->addr); |
678 | if (unlikely(old_p)) | 854 | if (unlikely(_p)) |
679 | unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ | 855 | /* Fallback to unoptimized kprobe */ |
856 | unoptimize_kprobe(_p, true); | ||
680 | 857 | ||
681 | arch_arm_kprobe(p); | 858 | arch_arm_kprobe(p); |
682 | optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ | 859 | optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ |
683 | } | 860 | } |
684 | 861 | ||
685 | static void __kprobes __disarm_kprobe(struct kprobe *p) | 862 | /* Remove the breakpoint of a probe. Must be called with text_mutex locked */ |
863 | static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt) | ||
686 | { | 864 | { |
687 | struct kprobe *old_p; | 865 | struct kprobe *_p; |
688 | 866 | ||
689 | unoptimize_kprobe(p); /* Try to unoptimize */ | 867 | unoptimize_kprobe(p, false); /* Try to unoptimize */ |
690 | arch_disarm_kprobe(p); | ||
691 | 868 | ||
692 | /* If another kprobe was blocked, optimize it. */ | 869 | if (!kprobe_queued(p)) { |
693 | old_p = get_optimized_kprobe((unsigned long)p->addr); | 870 | arch_disarm_kprobe(p); |
694 | if (unlikely(old_p)) | 871 | /* If another kprobe was blocked, optimize it. */ |
695 | optimize_kprobe(old_p); | 872 | _p = get_optimized_kprobe((unsigned long)p->addr); |
873 | if (unlikely(_p) && reopt) | ||
874 | optimize_kprobe(_p); | ||
875 | } | ||
876 | /* TODO: reoptimize others after unoptimized this probe */ | ||
696 | } | 877 | } |
697 | 878 | ||
698 | #else /* !CONFIG_OPTPROBES */ | 879 | #else /* !CONFIG_OPTPROBES */ |
699 | 880 | ||
700 | #define optimize_kprobe(p) do {} while (0) | 881 | #define optimize_kprobe(p) do {} while (0) |
701 | #define unoptimize_kprobe(p) do {} while (0) | 882 | #define unoptimize_kprobe(p, f) do {} while (0) |
702 | #define kill_optimized_kprobe(p) do {} while (0) | 883 | #define kill_optimized_kprobe(p) do {} while (0) |
703 | #define prepare_optimized_kprobe(p) do {} while (0) | 884 | #define prepare_optimized_kprobe(p) do {} while (0) |
704 | #define try_to_optimize_kprobe(p) do {} while (0) | 885 | #define try_to_optimize_kprobe(p) do {} while (0) |
705 | #define __arm_kprobe(p) arch_arm_kprobe(p) | 886 | #define __arm_kprobe(p) arch_arm_kprobe(p) |
706 | #define __disarm_kprobe(p) arch_disarm_kprobe(p) | 887 | #define __disarm_kprobe(p, o) arch_disarm_kprobe(p) |
888 | #define kprobe_disarmed(p) kprobe_disabled(p) | ||
889 | #define wait_for_kprobe_optimizer() do {} while (0) | ||
890 | |||
891 | /* There should be no unused kprobes can be reused without optimization */ | ||
892 | static void reuse_unused_kprobe(struct kprobe *ap) | ||
893 | { | ||
894 | printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); | ||
895 | BUG_ON(kprobe_unused(ap)); | ||
896 | } | ||
707 | 897 | ||
708 | static __kprobes void free_aggr_kprobe(struct kprobe *p) | 898 | static __kprobes void free_aggr_kprobe(struct kprobe *p) |
709 | { | 899 | { |
900 | arch_remove_kprobe(p); | ||
710 | kfree(p); | 901 | kfree(p); |
711 | } | 902 | } |
712 | 903 | ||
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp) | |||
732 | /* Disarm a kprobe with text_mutex */ | 923 | /* Disarm a kprobe with text_mutex */ |
733 | static void __kprobes disarm_kprobe(struct kprobe *kp) | 924 | static void __kprobes disarm_kprobe(struct kprobe *kp) |
734 | { | 925 | { |
735 | get_online_cpus(); /* For avoiding text_mutex deadlock */ | 926 | /* Ditto */ |
736 | mutex_lock(&text_mutex); | 927 | mutex_lock(&text_mutex); |
737 | __disarm_kprobe(kp); | 928 | __disarm_kprobe(kp, true); |
738 | mutex_unlock(&text_mutex); | 929 | mutex_unlock(&text_mutex); |
739 | put_online_cpus(); | ||
740 | } | 930 | } |
741 | 931 | ||
742 | /* | 932 | /* |
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) | |||
942 | BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); | 1132 | BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); |
943 | 1133 | ||
944 | if (p->break_handler || p->post_handler) | 1134 | if (p->break_handler || p->post_handler) |
945 | unoptimize_kprobe(ap); /* Fall back to normal kprobe */ | 1135 | unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */ |
946 | 1136 | ||
947 | if (p->break_handler) { | 1137 | if (p->break_handler) { |
948 | if (ap->break_handler) | 1138 | if (ap->break_handler) |
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
993 | * This is the second or subsequent kprobe at the address - handle | 1183 | * This is the second or subsequent kprobe at the address - handle |
994 | * the intricacies | 1184 | * the intricacies |
995 | */ | 1185 | */ |
996 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | 1186 | static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, |
997 | struct kprobe *p) | 1187 | struct kprobe *p) |
998 | { | 1188 | { |
999 | int ret = 0; | 1189 | int ret = 0; |
1000 | struct kprobe *ap = old_p; | 1190 | struct kprobe *ap = orig_p; |
1001 | 1191 | ||
1002 | if (!kprobe_aggrprobe(old_p)) { | 1192 | if (!kprobe_aggrprobe(orig_p)) { |
1003 | /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ | 1193 | /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ |
1004 | ap = alloc_aggr_kprobe(old_p); | 1194 | ap = alloc_aggr_kprobe(orig_p); |
1005 | if (!ap) | 1195 | if (!ap) |
1006 | return -ENOMEM; | 1196 | return -ENOMEM; |
1007 | init_aggr_kprobe(ap, old_p); | 1197 | init_aggr_kprobe(ap, orig_p); |
1008 | } | 1198 | } else if (kprobe_unused(ap)) |
1199 | /* This probe is going to die. Rescue it */ | ||
1200 | reuse_unused_kprobe(ap); | ||
1009 | 1201 | ||
1010 | if (kprobe_gone(ap)) { | 1202 | if (kprobe_gone(ap)) { |
1011 | /* | 1203 | /* |
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
1039 | return add_new_kprobe(ap, p); | 1231 | return add_new_kprobe(ap, p); |
1040 | } | 1232 | } |
1041 | 1233 | ||
1042 | /* Try to disable aggr_kprobe, and return 1 if succeeded.*/ | ||
1043 | static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) | ||
1044 | { | ||
1045 | struct kprobe *kp; | ||
1046 | |||
1047 | list_for_each_entry_rcu(kp, &p->list, list) { | ||
1048 | if (!kprobe_disabled(kp)) | ||
1049 | /* | ||
1050 | * There is an active probe on the list. | ||
1051 | * We can't disable aggr_kprobe. | ||
1052 | */ | ||
1053 | return 0; | ||
1054 | } | ||
1055 | p->flags |= KPROBE_FLAG_DISABLED; | ||
1056 | return 1; | ||
1057 | } | ||
1058 | |||
1059 | static int __kprobes in_kprobes_functions(unsigned long addr) | 1234 | static int __kprobes in_kprobes_functions(unsigned long addr) |
1060 | { | 1235 | { |
1061 | struct kprobe_blackpoint *kb; | 1236 | struct kprobe_blackpoint *kb; |
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | |||
1098 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ | 1273 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
1099 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) | 1274 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) |
1100 | { | 1275 | { |
1101 | struct kprobe *old_p, *list_p; | 1276 | struct kprobe *ap, *list_p; |
1102 | 1277 | ||
1103 | old_p = get_kprobe(p->addr); | 1278 | ap = get_kprobe(p->addr); |
1104 | if (unlikely(!old_p)) | 1279 | if (unlikely(!ap)) |
1105 | return NULL; | 1280 | return NULL; |
1106 | 1281 | ||
1107 | if (p != old_p) { | 1282 | if (p != ap) { |
1108 | list_for_each_entry_rcu(list_p, &old_p->list, list) | 1283 | list_for_each_entry_rcu(list_p, &ap->list, list) |
1109 | if (list_p == p) | 1284 | if (list_p == p) |
1110 | /* kprobe p is a valid probe */ | 1285 | /* kprobe p is a valid probe */ |
1111 | goto valid; | 1286 | goto valid; |
1112 | return NULL; | 1287 | return NULL; |
1113 | } | 1288 | } |
1114 | valid: | 1289 | valid: |
1115 | return old_p; | 1290 | return ap; |
1116 | } | 1291 | } |
1117 | 1292 | ||
1118 | /* Return error if the kprobe is being re-registered */ | 1293 | /* Return error if the kprobe is being re-registered */ |
1119 | static inline int check_kprobe_rereg(struct kprobe *p) | 1294 | static inline int check_kprobe_rereg(struct kprobe *p) |
1120 | { | 1295 | { |
1121 | int ret = 0; | 1296 | int ret = 0; |
1122 | struct kprobe *old_p; | ||
1123 | 1297 | ||
1124 | mutex_lock(&kprobe_mutex); | 1298 | mutex_lock(&kprobe_mutex); |
1125 | old_p = __get_valid_kprobe(p); | 1299 | if (__get_valid_kprobe(p)) |
1126 | if (old_p) | ||
1127 | ret = -EINVAL; | 1300 | ret = -EINVAL; |
1128 | mutex_unlock(&kprobe_mutex); | 1301 | mutex_unlock(&kprobe_mutex); |
1302 | |||
1129 | return ret; | 1303 | return ret; |
1130 | } | 1304 | } |
1131 | 1305 | ||
@@ -1229,67 +1403,121 @@ fail_with_jump_label: | |||
1229 | } | 1403 | } |
1230 | EXPORT_SYMBOL_GPL(register_kprobe); | 1404 | EXPORT_SYMBOL_GPL(register_kprobe); |
1231 | 1405 | ||
1406 | /* Check if all probes on the aggrprobe are disabled */ | ||
1407 | static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) | ||
1408 | { | ||
1409 | struct kprobe *kp; | ||
1410 | |||
1411 | list_for_each_entry_rcu(kp, &ap->list, list) | ||
1412 | if (!kprobe_disabled(kp)) | ||
1413 | /* | ||
1414 | * There is an active probe on the list. | ||
1415 | * We can't disable this ap. | ||
1416 | */ | ||
1417 | return 0; | ||
1418 | |||
1419 | return 1; | ||
1420 | } | ||
1421 | |||
1422 | /* Disable one kprobe: Make sure called under kprobe_mutex is locked */ | ||
1423 | static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) | ||
1424 | { | ||
1425 | struct kprobe *orig_p; | ||
1426 | |||
1427 | /* Get an original kprobe for return */ | ||
1428 | orig_p = __get_valid_kprobe(p); | ||
1429 | if (unlikely(orig_p == NULL)) | ||
1430 | return NULL; | ||
1431 | |||
1432 | if (!kprobe_disabled(p)) { | ||
1433 | /* Disable probe if it is a child probe */ | ||
1434 | if (p != orig_p) | ||
1435 | p->flags |= KPROBE_FLAG_DISABLED; | ||
1436 | |||
1437 | /* Try to disarm and disable this/parent probe */ | ||
1438 | if (p == orig_p || aggr_kprobe_disabled(orig_p)) { | ||
1439 | disarm_kprobe(orig_p); | ||
1440 | orig_p->flags |= KPROBE_FLAG_DISABLED; | ||
1441 | } | ||
1442 | } | ||
1443 | |||
1444 | return orig_p; | ||
1445 | } | ||
1446 | |||
1232 | /* | 1447 | /* |
1233 | * Unregister a kprobe without a scheduler synchronization. | 1448 | * Unregister a kprobe without a scheduler synchronization. |
1234 | */ | 1449 | */ |
1235 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | 1450 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) |
1236 | { | 1451 | { |
1237 | struct kprobe *old_p, *list_p; | 1452 | struct kprobe *ap, *list_p; |
1238 | 1453 | ||
1239 | old_p = __get_valid_kprobe(p); | 1454 | /* Disable kprobe. This will disarm it if needed. */ |
1240 | if (old_p == NULL) | 1455 | ap = __disable_kprobe(p); |
1456 | if (ap == NULL) | ||
1241 | return -EINVAL; | 1457 | return -EINVAL; |
1242 | 1458 | ||
1243 | if (old_p == p || | 1459 | if (ap == p) |
1244 | (kprobe_aggrprobe(old_p) && | ||
1245 | list_is_singular(&old_p->list))) { | ||
1246 | /* | 1460 | /* |
1247 | * Only probe on the hash list. Disarm only if kprobes are | 1461 | * This probe is an independent(and non-optimized) kprobe |
1248 | * enabled and not gone - otherwise, the breakpoint would | 1462 | * (not an aggrprobe). Remove from the hash list. |
1249 | * already have been removed. We save on flushing icache. | ||
1250 | */ | 1463 | */ |
1251 | if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) | 1464 | goto disarmed; |
1252 | disarm_kprobe(old_p); | 1465 | |
1253 | hlist_del_rcu(&old_p->hlist); | 1466 | /* Following process expects this probe is an aggrprobe */ |
1254 | } else { | 1467 | WARN_ON(!kprobe_aggrprobe(ap)); |
1468 | |||
1469 | if (list_is_singular(&ap->list) && kprobe_disarmed(ap)) | ||
1470 | /* | ||
1471 | * !disarmed could be happen if the probe is under delayed | ||
1472 | * unoptimizing. | ||
1473 | */ | ||
1474 | goto disarmed; | ||
1475 | else { | ||
1476 | /* If disabling probe has special handlers, update aggrprobe */ | ||
1255 | if (p->break_handler && !kprobe_gone(p)) | 1477 | if (p->break_handler && !kprobe_gone(p)) |
1256 | old_p->break_handler = NULL; | 1478 | ap->break_handler = NULL; |
1257 | if (p->post_handler && !kprobe_gone(p)) { | 1479 | if (p->post_handler && !kprobe_gone(p)) { |
1258 | list_for_each_entry_rcu(list_p, &old_p->list, list) { | 1480 | list_for_each_entry_rcu(list_p, &ap->list, list) { |
1259 | if ((list_p != p) && (list_p->post_handler)) | 1481 | if ((list_p != p) && (list_p->post_handler)) |
1260 | goto noclean; | 1482 | goto noclean; |
1261 | } | 1483 | } |
1262 | old_p->post_handler = NULL; | 1484 | ap->post_handler = NULL; |
1263 | } | 1485 | } |
1264 | noclean: | 1486 | noclean: |
1487 | /* | ||
1488 | * Remove from the aggrprobe: this path will do nothing in | ||
1489 | * __unregister_kprobe_bottom(). | ||
1490 | */ | ||
1265 | list_del_rcu(&p->list); | 1491 | list_del_rcu(&p->list); |
1266 | if (!kprobe_disabled(old_p)) { | 1492 | if (!kprobe_disabled(ap) && !kprobes_all_disarmed) |
1267 | try_to_disable_aggr_kprobe(old_p); | 1493 | /* |
1268 | if (!kprobes_all_disarmed) { | 1494 | * Try to optimize this probe again, because post |
1269 | if (kprobe_disabled(old_p)) | 1495 | * handler may have been changed. |
1270 | disarm_kprobe(old_p); | 1496 | */ |
1271 | else | 1497 | optimize_kprobe(ap); |
1272 | /* Try to optimize this probe again */ | ||
1273 | optimize_kprobe(old_p); | ||
1274 | } | ||
1275 | } | ||
1276 | } | 1498 | } |
1277 | return 0; | 1499 | return 0; |
1500 | |||
1501 | disarmed: | ||
1502 | BUG_ON(!kprobe_disarmed(ap)); | ||
1503 | hlist_del_rcu(&ap->hlist); | ||
1504 | return 0; | ||
1278 | } | 1505 | } |
1279 | 1506 | ||
1280 | static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) | 1507 | static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) |
1281 | { | 1508 | { |
1282 | struct kprobe *old_p; | 1509 | struct kprobe *ap; |
1283 | 1510 | ||
1284 | if (list_empty(&p->list)) | 1511 | if (list_empty(&p->list)) |
1512 | /* This is an independent kprobe */ | ||
1285 | arch_remove_kprobe(p); | 1513 | arch_remove_kprobe(p); |
1286 | else if (list_is_singular(&p->list)) { | 1514 | else if (list_is_singular(&p->list)) { |
1287 | /* "p" is the last child of an aggr_kprobe */ | 1515 | /* This is the last child of an aggrprobe */ |
1288 | old_p = list_entry(p->list.next, struct kprobe, list); | 1516 | ap = list_entry(p->list.next, struct kprobe, list); |
1289 | list_del(&p->list); | 1517 | list_del(&p->list); |
1290 | arch_remove_kprobe(old_p); | 1518 | free_aggr_kprobe(ap); |
1291 | free_aggr_kprobe(old_p); | ||
1292 | } | 1519 | } |
1520 | /* Otherwise, do nothing. */ | ||
1293 | } | 1521 | } |
1294 | 1522 | ||
1295 | int __kprobes register_kprobes(struct kprobe **kps, int num) | 1523 | int __kprobes register_kprobes(struct kprobe **kps, int num) |
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) | |||
1607 | int __kprobes disable_kprobe(struct kprobe *kp) | 1835 | int __kprobes disable_kprobe(struct kprobe *kp) |
1608 | { | 1836 | { |
1609 | int ret = 0; | 1837 | int ret = 0; |
1610 | struct kprobe *p; | ||
1611 | 1838 | ||
1612 | mutex_lock(&kprobe_mutex); | 1839 | mutex_lock(&kprobe_mutex); |
1613 | 1840 | ||
1614 | /* Check whether specified probe is valid. */ | 1841 | /* Disable this kprobe */ |
1615 | p = __get_valid_kprobe(kp); | 1842 | if (__disable_kprobe(kp) == NULL) |
1616 | if (unlikely(p == NULL)) { | ||
1617 | ret = -EINVAL; | 1843 | ret = -EINVAL; |
1618 | goto out; | ||
1619 | } | ||
1620 | 1844 | ||
1621 | /* If the probe is already disabled (or gone), just return */ | ||
1622 | if (kprobe_disabled(kp)) | ||
1623 | goto out; | ||
1624 | |||
1625 | kp->flags |= KPROBE_FLAG_DISABLED; | ||
1626 | if (p != kp) | ||
1627 | /* When kp != p, p is always enabled. */ | ||
1628 | try_to_disable_aggr_kprobe(p); | ||
1629 | |||
1630 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
1631 | disarm_kprobe(p); | ||
1632 | out: | ||
1633 | mutex_unlock(&kprobe_mutex); | 1845 | mutex_unlock(&kprobe_mutex); |
1634 | return ret; | 1846 | return ret; |
1635 | } | 1847 | } |
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void) | |||
1927 | mutex_lock(&kprobe_mutex); | 2139 | mutex_lock(&kprobe_mutex); |
1928 | 2140 | ||
1929 | /* If kprobes are already disarmed, just return */ | 2141 | /* If kprobes are already disarmed, just return */ |
1930 | if (kprobes_all_disarmed) | 2142 | if (kprobes_all_disarmed) { |
1931 | goto already_disabled; | 2143 | mutex_unlock(&kprobe_mutex); |
2144 | return; | ||
2145 | } | ||
1932 | 2146 | ||
1933 | kprobes_all_disarmed = true; | 2147 | kprobes_all_disarmed = true; |
1934 | printk(KERN_INFO "Kprobes globally disabled\n"); | 2148 | printk(KERN_INFO "Kprobes globally disabled\n"); |
1935 | 2149 | ||
1936 | /* | ||
1937 | * Here we call get_online_cpus() for avoiding text_mutex deadlock, | ||
1938 | * because disarming may also unoptimize kprobes. | ||
1939 | */ | ||
1940 | get_online_cpus(); | ||
1941 | mutex_lock(&text_mutex); | 2150 | mutex_lock(&text_mutex); |
1942 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 2151 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1943 | head = &kprobe_table[i]; | 2152 | head = &kprobe_table[i]; |
1944 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 2153 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
1945 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) | 2154 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) |
1946 | __disarm_kprobe(p); | 2155 | __disarm_kprobe(p, false); |
1947 | } | 2156 | } |
1948 | } | 2157 | } |
1949 | |||
1950 | mutex_unlock(&text_mutex); | 2158 | mutex_unlock(&text_mutex); |
1951 | put_online_cpus(); | ||
1952 | mutex_unlock(&kprobe_mutex); | 2159 | mutex_unlock(&kprobe_mutex); |
1953 | /* Allow all currently running kprobes to complete */ | ||
1954 | synchronize_sched(); | ||
1955 | return; | ||
1956 | 2160 | ||
1957 | already_disabled: | 2161 | /* Wait for disarming all kprobes by optimizer */ |
1958 | mutex_unlock(&kprobe_mutex); | 2162 | wait_for_kprobe_optimizer(); |
1959 | return; | ||
1960 | } | 2163 | } |
1961 | 2164 | ||
1962 | /* | 2165 | /* |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2870feee81dd..11847bf1e8cc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/smp.h> | 15 | #include <linux/smp.h> |
16 | #include <linux/idr.h> | ||
16 | #include <linux/file.h> | 17 | #include <linux/file.h> |
17 | #include <linux/poll.h> | 18 | #include <linux/poll.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
@@ -21,7 +22,9 @@ | |||
21 | #include <linux/dcache.h> | 22 | #include <linux/dcache.h> |
22 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
23 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/reboot.h> | ||
24 | #include <linux/vmstat.h> | 26 | #include <linux/vmstat.h> |
27 | #include <linux/device.h> | ||
25 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
26 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
27 | #include <linux/rculist.h> | 30 | #include <linux/rculist.h> |
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
133 | } | 136 | } |
134 | } | 137 | } |
135 | 138 | ||
139 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
140 | { | ||
141 | /* | ||
142 | * only top level events have the pid namespace they were created in | ||
143 | */ | ||
144 | if (event->parent) | ||
145 | event = event->parent; | ||
146 | |||
147 | return task_tgid_nr_ns(p, event->ns); | ||
148 | } | ||
149 | |||
150 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
151 | { | ||
152 | /* | ||
153 | * only top level events have the pid namespace they were created in | ||
154 | */ | ||
155 | if (event->parent) | ||
156 | event = event->parent; | ||
157 | |||
158 | return task_pid_nr_ns(p, event->ns); | ||
159 | } | ||
160 | |||
136 | /* | 161 | /* |
137 | * If we inherit events we want to return the parent event id | 162 | * If we inherit events we want to return the parent event id |
138 | * to userspace. | 163 | * to userspace. |
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
312 | ctx->nr_stat++; | 337 | ctx->nr_stat++; |
313 | } | 338 | } |
314 | 339 | ||
340 | /* | ||
341 | * Called at perf_event creation and when events are attached/detached from a | ||
342 | * group. | ||
343 | */ | ||
344 | static void perf_event__read_size(struct perf_event *event) | ||
345 | { | ||
346 | int entry = sizeof(u64); /* value */ | ||
347 | int size = 0; | ||
348 | int nr = 1; | ||
349 | |||
350 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
351 | size += sizeof(u64); | ||
352 | |||
353 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
354 | size += sizeof(u64); | ||
355 | |||
356 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
357 | entry += sizeof(u64); | ||
358 | |||
359 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
360 | nr += event->group_leader->nr_siblings; | ||
361 | size += sizeof(u64); | ||
362 | } | ||
363 | |||
364 | size += entry * nr; | ||
365 | event->read_size = size; | ||
366 | } | ||
367 | |||
368 | static void perf_event__header_size(struct perf_event *event) | ||
369 | { | ||
370 | struct perf_sample_data *data; | ||
371 | u64 sample_type = event->attr.sample_type; | ||
372 | u16 size = 0; | ||
373 | |||
374 | perf_event__read_size(event); | ||
375 | |||
376 | if (sample_type & PERF_SAMPLE_IP) | ||
377 | size += sizeof(data->ip); | ||
378 | |||
379 | if (sample_type & PERF_SAMPLE_ADDR) | ||
380 | size += sizeof(data->addr); | ||
381 | |||
382 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
383 | size += sizeof(data->period); | ||
384 | |||
385 | if (sample_type & PERF_SAMPLE_READ) | ||
386 | size += event->read_size; | ||
387 | |||
388 | event->header_size = size; | ||
389 | } | ||
390 | |||
391 | static void perf_event__id_header_size(struct perf_event *event) | ||
392 | { | ||
393 | struct perf_sample_data *data; | ||
394 | u64 sample_type = event->attr.sample_type; | ||
395 | u16 size = 0; | ||
396 | |||
397 | if (sample_type & PERF_SAMPLE_TID) | ||
398 | size += sizeof(data->tid_entry); | ||
399 | |||
400 | if (sample_type & PERF_SAMPLE_TIME) | ||
401 | size += sizeof(data->time); | ||
402 | |||
403 | if (sample_type & PERF_SAMPLE_ID) | ||
404 | size += sizeof(data->id); | ||
405 | |||
406 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
407 | size += sizeof(data->stream_id); | ||
408 | |||
409 | if (sample_type & PERF_SAMPLE_CPU) | ||
410 | size += sizeof(data->cpu_entry); | ||
411 | |||
412 | event->id_header_size = size; | ||
413 | } | ||
414 | |||
315 | static void perf_group_attach(struct perf_event *event) | 415 | static void perf_group_attach(struct perf_event *event) |
316 | { | 416 | { |
317 | struct perf_event *group_leader = event->group_leader; | 417 | struct perf_event *group_leader = event->group_leader, *pos; |
318 | 418 | ||
319 | /* | 419 | /* |
320 | * We can have double attach due to group movement in perf_event_open. | 420 | * We can have double attach due to group movement in perf_event_open. |
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event) | |||
333 | 433 | ||
334 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 434 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
335 | group_leader->nr_siblings++; | 435 | group_leader->nr_siblings++; |
436 | |||
437 | perf_event__header_size(group_leader); | ||
438 | |||
439 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
440 | perf_event__header_size(pos); | ||
336 | } | 441 | } |
337 | 442 | ||
338 | /* | 443 | /* |
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event) | |||
391 | if (event->group_leader != event) { | 496 | if (event->group_leader != event) { |
392 | list_del_init(&event->group_entry); | 497 | list_del_init(&event->group_entry); |
393 | event->group_leader->nr_siblings--; | 498 | event->group_leader->nr_siblings--; |
394 | return; | 499 | goto out; |
395 | } | 500 | } |
396 | 501 | ||
397 | if (!list_empty(&event->group_entry)) | 502 | if (!list_empty(&event->group_entry)) |
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event) | |||
410 | /* Inherit group flags from the previous leader */ | 515 | /* Inherit group flags from the previous leader */ |
411 | sibling->group_flags = event->group_flags; | 516 | sibling->group_flags = event->group_flags; |
412 | } | 517 | } |
518 | |||
519 | out: | ||
520 | perf_event__header_size(event->group_leader); | ||
521 | |||
522 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
523 | perf_event__header_size(tmp); | ||
413 | } | 524 | } |
414 | 525 | ||
415 | static inline int | 526 | static inline int |
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1073 | /* | 1184 | /* |
1074 | * not supported on inherited events | 1185 | * not supported on inherited events |
1075 | */ | 1186 | */ |
1076 | if (event->attr.inherit) | 1187 | if (event->attr.inherit || !is_sampling_event(event)) |
1077 | return -EINVAL; | 1188 | return -EINVAL; |
1078 | 1189 | ||
1079 | atomic_add(refresh, &event->event_limit); | 1190 | atomic_add(refresh, &event->event_limit); |
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file) | |||
2289 | return perf_event_release_kernel(event); | 2400 | return perf_event_release_kernel(event); |
2290 | } | 2401 | } |
2291 | 2402 | ||
2292 | static int perf_event_read_size(struct perf_event *event) | ||
2293 | { | ||
2294 | int entry = sizeof(u64); /* value */ | ||
2295 | int size = 0; | ||
2296 | int nr = 1; | ||
2297 | |||
2298 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
2299 | size += sizeof(u64); | ||
2300 | |||
2301 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
2302 | size += sizeof(u64); | ||
2303 | |||
2304 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
2305 | entry += sizeof(u64); | ||
2306 | |||
2307 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
2308 | nr += event->group_leader->nr_siblings; | ||
2309 | size += sizeof(u64); | ||
2310 | } | ||
2311 | |||
2312 | size += entry * nr; | ||
2313 | |||
2314 | return size; | ||
2315 | } | ||
2316 | |||
2317 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2403 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
2318 | { | 2404 | { |
2319 | struct perf_event *child; | 2405 | struct perf_event *child; |
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
2428 | if (event->state == PERF_EVENT_STATE_ERROR) | 2514 | if (event->state == PERF_EVENT_STATE_ERROR) |
2429 | return 0; | 2515 | return 0; |
2430 | 2516 | ||
2431 | if (count < perf_event_read_size(event)) | 2517 | if (count < event->read_size) |
2432 | return -ENOSPC; | 2518 | return -ENOSPC; |
2433 | 2519 | ||
2434 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2520 | WARN_ON_ONCE(event->ctx->parent_ctx); |
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
2514 | int ret = 0; | 2600 | int ret = 0; |
2515 | u64 value; | 2601 | u64 value; |
2516 | 2602 | ||
2517 | if (!event->attr.sample_period) | 2603 | if (!is_sampling_event(event)) |
2518 | return -EINVAL; | 2604 | return -EINVAL; |
2519 | 2605 | ||
2520 | if (copy_from_user(&value, arg, sizeof(value))) | 2606 | if (copy_from_user(&value, arg, sizeof(value))) |
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, | |||
3305 | } while (len); | 3391 | } while (len); |
3306 | } | 3392 | } |
3307 | 3393 | ||
3394 | static void __perf_event_header__init_id(struct perf_event_header *header, | ||
3395 | struct perf_sample_data *data, | ||
3396 | struct perf_event *event) | ||
3397 | { | ||
3398 | u64 sample_type = event->attr.sample_type; | ||
3399 | |||
3400 | data->type = sample_type; | ||
3401 | header->size += event->id_header_size; | ||
3402 | |||
3403 | if (sample_type & PERF_SAMPLE_TID) { | ||
3404 | /* namespace issues */ | ||
3405 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3406 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3407 | } | ||
3408 | |||
3409 | if (sample_type & PERF_SAMPLE_TIME) | ||
3410 | data->time = perf_clock(); | ||
3411 | |||
3412 | if (sample_type & PERF_SAMPLE_ID) | ||
3413 | data->id = primary_event_id(event); | ||
3414 | |||
3415 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3416 | data->stream_id = event->id; | ||
3417 | |||
3418 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3419 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3420 | data->cpu_entry.reserved = 0; | ||
3421 | } | ||
3422 | } | ||
3423 | |||
3424 | static void perf_event_header__init_id(struct perf_event_header *header, | ||
3425 | struct perf_sample_data *data, | ||
3426 | struct perf_event *event) | ||
3427 | { | ||
3428 | if (event->attr.sample_id_all) | ||
3429 | __perf_event_header__init_id(header, data, event); | ||
3430 | } | ||
3431 | |||
3432 | static void __perf_event__output_id_sample(struct perf_output_handle *handle, | ||
3433 | struct perf_sample_data *data) | ||
3434 | { | ||
3435 | u64 sample_type = data->type; | ||
3436 | |||
3437 | if (sample_type & PERF_SAMPLE_TID) | ||
3438 | perf_output_put(handle, data->tid_entry); | ||
3439 | |||
3440 | if (sample_type & PERF_SAMPLE_TIME) | ||
3441 | perf_output_put(handle, data->time); | ||
3442 | |||
3443 | if (sample_type & PERF_SAMPLE_ID) | ||
3444 | perf_output_put(handle, data->id); | ||
3445 | |||
3446 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3447 | perf_output_put(handle, data->stream_id); | ||
3448 | |||
3449 | if (sample_type & PERF_SAMPLE_CPU) | ||
3450 | perf_output_put(handle, data->cpu_entry); | ||
3451 | } | ||
3452 | |||
3453 | static void perf_event__output_id_sample(struct perf_event *event, | ||
3454 | struct perf_output_handle *handle, | ||
3455 | struct perf_sample_data *sample) | ||
3456 | { | ||
3457 | if (event->attr.sample_id_all) | ||
3458 | __perf_event__output_id_sample(handle, sample); | ||
3459 | } | ||
3460 | |||
3308 | int perf_output_begin(struct perf_output_handle *handle, | 3461 | int perf_output_begin(struct perf_output_handle *handle, |
3309 | struct perf_event *event, unsigned int size, | 3462 | struct perf_event *event, unsigned int size, |
3310 | int nmi, int sample) | 3463 | int nmi, int sample) |
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3312 | struct perf_buffer *buffer; | 3465 | struct perf_buffer *buffer; |
3313 | unsigned long tail, offset, head; | 3466 | unsigned long tail, offset, head; |
3314 | int have_lost; | 3467 | int have_lost; |
3468 | struct perf_sample_data sample_data; | ||
3315 | struct { | 3469 | struct { |
3316 | struct perf_event_header header; | 3470 | struct perf_event_header header; |
3317 | u64 id; | 3471 | u64 id; |
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3338 | goto out; | 3492 | goto out; |
3339 | 3493 | ||
3340 | have_lost = local_read(&buffer->lost); | 3494 | have_lost = local_read(&buffer->lost); |
3341 | if (have_lost) | 3495 | if (have_lost) { |
3342 | size += sizeof(lost_event); | 3496 | lost_event.header.size = sizeof(lost_event); |
3497 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
3498 | event); | ||
3499 | size += lost_event.header.size; | ||
3500 | } | ||
3343 | 3501 | ||
3344 | perf_output_get_handle(handle); | 3502 | perf_output_get_handle(handle); |
3345 | 3503 | ||
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3370 | if (have_lost) { | 3528 | if (have_lost) { |
3371 | lost_event.header.type = PERF_RECORD_LOST; | 3529 | lost_event.header.type = PERF_RECORD_LOST; |
3372 | lost_event.header.misc = 0; | 3530 | lost_event.header.misc = 0; |
3373 | lost_event.header.size = sizeof(lost_event); | ||
3374 | lost_event.id = event->id; | 3531 | lost_event.id = event->id; |
3375 | lost_event.lost = local_xchg(&buffer->lost, 0); | 3532 | lost_event.lost = local_xchg(&buffer->lost, 0); |
3376 | 3533 | ||
3377 | perf_output_put(handle, lost_event); | 3534 | perf_output_put(handle, lost_event); |
3535 | perf_event__output_id_sample(event, handle, &sample_data); | ||
3378 | } | 3536 | } |
3379 | 3537 | ||
3380 | return 0; | 3538 | return 0; |
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle) | |||
3407 | rcu_read_unlock(); | 3565 | rcu_read_unlock(); |
3408 | } | 3566 | } |
3409 | 3567 | ||
3410 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
3411 | { | ||
3412 | /* | ||
3413 | * only top level events have the pid namespace they were created in | ||
3414 | */ | ||
3415 | if (event->parent) | ||
3416 | event = event->parent; | ||
3417 | |||
3418 | return task_tgid_nr_ns(p, event->ns); | ||
3419 | } | ||
3420 | |||
3421 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
3422 | { | ||
3423 | /* | ||
3424 | * only top level events have the pid namespace they were created in | ||
3425 | */ | ||
3426 | if (event->parent) | ||
3427 | event = event->parent; | ||
3428 | |||
3429 | return task_pid_nr_ns(p, event->ns); | ||
3430 | } | ||
3431 | |||
3432 | static void perf_output_read_one(struct perf_output_handle *handle, | 3568 | static void perf_output_read_one(struct perf_output_handle *handle, |
3433 | struct perf_event *event, | 3569 | struct perf_event *event, |
3434 | u64 enabled, u64 running) | 3570 | u64 enabled, u64 running) |
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3603 | { | 3739 | { |
3604 | u64 sample_type = event->attr.sample_type; | 3740 | u64 sample_type = event->attr.sample_type; |
3605 | 3741 | ||
3606 | data->type = sample_type; | ||
3607 | |||
3608 | header->type = PERF_RECORD_SAMPLE; | 3742 | header->type = PERF_RECORD_SAMPLE; |
3609 | header->size = sizeof(*header); | 3743 | header->size = sizeof(*header) + event->header_size; |
3610 | 3744 | ||
3611 | header->misc = 0; | 3745 | header->misc = 0; |
3612 | header->misc |= perf_misc_flags(regs); | 3746 | header->misc |= perf_misc_flags(regs); |
3613 | 3747 | ||
3614 | if (sample_type & PERF_SAMPLE_IP) { | 3748 | __perf_event_header__init_id(header, data, event); |
3615 | data->ip = perf_instruction_pointer(regs); | ||
3616 | |||
3617 | header->size += sizeof(data->ip); | ||
3618 | } | ||
3619 | |||
3620 | if (sample_type & PERF_SAMPLE_TID) { | ||
3621 | /* namespace issues */ | ||
3622 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3623 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3624 | |||
3625 | header->size += sizeof(data->tid_entry); | ||
3626 | } | ||
3627 | |||
3628 | if (sample_type & PERF_SAMPLE_TIME) { | ||
3629 | data->time = perf_clock(); | ||
3630 | |||
3631 | header->size += sizeof(data->time); | ||
3632 | } | ||
3633 | |||
3634 | if (sample_type & PERF_SAMPLE_ADDR) | ||
3635 | header->size += sizeof(data->addr); | ||
3636 | |||
3637 | if (sample_type & PERF_SAMPLE_ID) { | ||
3638 | data->id = primary_event_id(event); | ||
3639 | |||
3640 | header->size += sizeof(data->id); | ||
3641 | } | ||
3642 | |||
3643 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | ||
3644 | data->stream_id = event->id; | ||
3645 | |||
3646 | header->size += sizeof(data->stream_id); | ||
3647 | } | ||
3648 | |||
3649 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3650 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3651 | data->cpu_entry.reserved = 0; | ||
3652 | |||
3653 | header->size += sizeof(data->cpu_entry); | ||
3654 | } | ||
3655 | |||
3656 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
3657 | header->size += sizeof(data->period); | ||
3658 | 3749 | ||
3659 | if (sample_type & PERF_SAMPLE_READ) | 3750 | if (sample_type & PERF_SAMPLE_IP) |
3660 | header->size += perf_event_read_size(event); | 3751 | data->ip = perf_instruction_pointer(regs); |
3661 | 3752 | ||
3662 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3753 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
3663 | int size = 1; | 3754 | int size = 1; |
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event, | |||
3722 | struct task_struct *task) | 3813 | struct task_struct *task) |
3723 | { | 3814 | { |
3724 | struct perf_output_handle handle; | 3815 | struct perf_output_handle handle; |
3816 | struct perf_sample_data sample; | ||
3725 | struct perf_read_event read_event = { | 3817 | struct perf_read_event read_event = { |
3726 | .header = { | 3818 | .header = { |
3727 | .type = PERF_RECORD_READ, | 3819 | .type = PERF_RECORD_READ, |
3728 | .misc = 0, | 3820 | .misc = 0, |
3729 | .size = sizeof(read_event) + perf_event_read_size(event), | 3821 | .size = sizeof(read_event) + event->read_size, |
3730 | }, | 3822 | }, |
3731 | .pid = perf_event_pid(event, task), | 3823 | .pid = perf_event_pid(event, task), |
3732 | .tid = perf_event_tid(event, task), | 3824 | .tid = perf_event_tid(event, task), |
3733 | }; | 3825 | }; |
3734 | int ret; | 3826 | int ret; |
3735 | 3827 | ||
3828 | perf_event_header__init_id(&read_event.header, &sample, event); | ||
3736 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 3829 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); |
3737 | if (ret) | 3830 | if (ret) |
3738 | return; | 3831 | return; |
3739 | 3832 | ||
3740 | perf_output_put(&handle, read_event); | 3833 | perf_output_put(&handle, read_event); |
3741 | perf_output_read(&handle, event); | 3834 | perf_output_read(&handle, event); |
3835 | perf_event__output_id_sample(event, &handle, &sample); | ||
3742 | 3836 | ||
3743 | perf_output_end(&handle); | 3837 | perf_output_end(&handle); |
3744 | } | 3838 | } |
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event, | |||
3768 | struct perf_task_event *task_event) | 3862 | struct perf_task_event *task_event) |
3769 | { | 3863 | { |
3770 | struct perf_output_handle handle; | 3864 | struct perf_output_handle handle; |
3865 | struct perf_sample_data sample; | ||
3771 | struct task_struct *task = task_event->task; | 3866 | struct task_struct *task = task_event->task; |
3772 | int size, ret; | 3867 | int ret, size = task_event->event_id.header.size; |
3773 | 3868 | ||
3774 | size = task_event->event_id.header.size; | 3869 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
3775 | ret = perf_output_begin(&handle, event, size, 0, 0); | ||
3776 | 3870 | ||
3871 | ret = perf_output_begin(&handle, event, | ||
3872 | task_event->event_id.header.size, 0, 0); | ||
3777 | if (ret) | 3873 | if (ret) |
3778 | return; | 3874 | goto out; |
3779 | 3875 | ||
3780 | task_event->event_id.pid = perf_event_pid(event, task); | 3876 | task_event->event_id.pid = perf_event_pid(event, task); |
3781 | task_event->event_id.ppid = perf_event_pid(event, current); | 3877 | task_event->event_id.ppid = perf_event_pid(event, current); |
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event, | |||
3785 | 3881 | ||
3786 | perf_output_put(&handle, task_event->event_id); | 3882 | perf_output_put(&handle, task_event->event_id); |
3787 | 3883 | ||
3884 | perf_event__output_id_sample(event, &handle, &sample); | ||
3885 | |||
3788 | perf_output_end(&handle); | 3886 | perf_output_end(&handle); |
3887 | out: | ||
3888 | task_event->event_id.header.size = size; | ||
3789 | } | 3889 | } |
3790 | 3890 | ||
3791 | static int perf_event_task_match(struct perf_event *event) | 3891 | static int perf_event_task_match(struct perf_event *event) |
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3900 | struct perf_comm_event *comm_event) | 4000 | struct perf_comm_event *comm_event) |
3901 | { | 4001 | { |
3902 | struct perf_output_handle handle; | 4002 | struct perf_output_handle handle; |
4003 | struct perf_sample_data sample; | ||
3903 | int size = comm_event->event_id.header.size; | 4004 | int size = comm_event->event_id.header.size; |
3904 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4005 | int ret; |
4006 | |||
4007 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | ||
4008 | ret = perf_output_begin(&handle, event, | ||
4009 | comm_event->event_id.header.size, 0, 0); | ||
3905 | 4010 | ||
3906 | if (ret) | 4011 | if (ret) |
3907 | return; | 4012 | goto out; |
3908 | 4013 | ||
3909 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); | 4014 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); |
3910 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4015 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3912 | perf_output_put(&handle, comm_event->event_id); | 4017 | perf_output_put(&handle, comm_event->event_id); |
3913 | perf_output_copy(&handle, comm_event->comm, | 4018 | perf_output_copy(&handle, comm_event->comm, |
3914 | comm_event->comm_size); | 4019 | comm_event->comm_size); |
4020 | |||
4021 | perf_event__output_id_sample(event, &handle, &sample); | ||
4022 | |||
3915 | perf_output_end(&handle); | 4023 | perf_output_end(&handle); |
4024 | out: | ||
4025 | comm_event->event_id.header.size = size; | ||
3916 | } | 4026 | } |
3917 | 4027 | ||
3918 | static int perf_event_comm_match(struct perf_event *event) | 4028 | static int perf_event_comm_match(struct perf_event *event) |
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3957 | comm_event->comm_size = size; | 4067 | comm_event->comm_size = size; |
3958 | 4068 | ||
3959 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 4069 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3960 | |||
3961 | rcu_read_lock(); | 4070 | rcu_read_lock(); |
3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4071 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4072 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4038 | struct perf_mmap_event *mmap_event) | 4147 | struct perf_mmap_event *mmap_event) |
4039 | { | 4148 | { |
4040 | struct perf_output_handle handle; | 4149 | struct perf_output_handle handle; |
4150 | struct perf_sample_data sample; | ||
4041 | int size = mmap_event->event_id.header.size; | 4151 | int size = mmap_event->event_id.header.size; |
4042 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4152 | int ret; |
4043 | 4153 | ||
4154 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | ||
4155 | ret = perf_output_begin(&handle, event, | ||
4156 | mmap_event->event_id.header.size, 0, 0); | ||
4044 | if (ret) | 4157 | if (ret) |
4045 | return; | 4158 | goto out; |
4046 | 4159 | ||
4047 | mmap_event->event_id.pid = perf_event_pid(event, current); | 4160 | mmap_event->event_id.pid = perf_event_pid(event, current); |
4048 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4161 | mmap_event->event_id.tid = perf_event_tid(event, current); |
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4050 | perf_output_put(&handle, mmap_event->event_id); | 4163 | perf_output_put(&handle, mmap_event->event_id); |
4051 | perf_output_copy(&handle, mmap_event->file_name, | 4164 | perf_output_copy(&handle, mmap_event->file_name, |
4052 | mmap_event->file_size); | 4165 | mmap_event->file_size); |
4166 | |||
4167 | perf_event__output_id_sample(event, &handle, &sample); | ||
4168 | |||
4053 | perf_output_end(&handle); | 4169 | perf_output_end(&handle); |
4170 | out: | ||
4171 | mmap_event->event_id.header.size = size; | ||
4054 | } | 4172 | } |
4055 | 4173 | ||
4056 | static int perf_event_mmap_match(struct perf_event *event, | 4174 | static int perf_event_mmap_match(struct perf_event *event, |
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
4205 | static void perf_log_throttle(struct perf_event *event, int enable) | 4323 | static void perf_log_throttle(struct perf_event *event, int enable) |
4206 | { | 4324 | { |
4207 | struct perf_output_handle handle; | 4325 | struct perf_output_handle handle; |
4326 | struct perf_sample_data sample; | ||
4208 | int ret; | 4327 | int ret; |
4209 | 4328 | ||
4210 | struct { | 4329 | struct { |
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
4226 | if (enable) | 4345 | if (enable) |
4227 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; | 4346 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; |
4228 | 4347 | ||
4229 | ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); | 4348 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
4349 | |||
4350 | ret = perf_output_begin(&handle, event, | ||
4351 | throttle_event.header.size, 1, 0); | ||
4230 | if (ret) | 4352 | if (ret) |
4231 | return; | 4353 | return; |
4232 | 4354 | ||
4233 | perf_output_put(&handle, throttle_event); | 4355 | perf_output_put(&handle, throttle_event); |
4356 | perf_event__output_id_sample(event, &handle, &sample); | ||
4234 | perf_output_end(&handle); | 4357 | perf_output_end(&handle); |
4235 | } | 4358 | } |
4236 | 4359 | ||
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4246 | struct hw_perf_event *hwc = &event->hw; | 4369 | struct hw_perf_event *hwc = &event->hw; |
4247 | int ret = 0; | 4370 | int ret = 0; |
4248 | 4371 | ||
4372 | /* | ||
4373 | * Non-sampling counters might still use the PMI to fold short | ||
4374 | * hardware counters, ignore those. | ||
4375 | */ | ||
4376 | if (unlikely(!is_sampling_event(event))) | ||
4377 | return 0; | ||
4378 | |||
4249 | if (!throttle) { | 4379 | if (!throttle) { |
4250 | hwc->interrupts++; | 4380 | hwc->interrupts++; |
4251 | } else { | 4381 | } else { |
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
4391 | if (!regs) | 4521 | if (!regs) |
4392 | return; | 4522 | return; |
4393 | 4523 | ||
4394 | if (!hwc->sample_period) | 4524 | if (!is_sampling_event(event)) |
4395 | return; | 4525 | return; |
4396 | 4526 | ||
4397 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4527 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
4554 | struct hw_perf_event *hwc = &event->hw; | 4684 | struct hw_perf_event *hwc = &event->hw; |
4555 | struct hlist_head *head; | 4685 | struct hlist_head *head; |
4556 | 4686 | ||
4557 | if (hwc->sample_period) { | 4687 | if (is_sampling_event(event)) { |
4558 | hwc->last_period = hwc->sample_period; | 4688 | hwc->last_period = hwc->sample_period; |
4559 | perf_swevent_set_period(event); | 4689 | perf_swevent_set_period(event); |
4560 | } | 4690 | } |
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event) | |||
4811 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 4941 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
4812 | return -ENOENT; | 4942 | return -ENOENT; |
4813 | 4943 | ||
4814 | /* | ||
4815 | * Raw tracepoint data is a severe data leak, only allow root to | ||
4816 | * have these. | ||
4817 | */ | ||
4818 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | ||
4819 | perf_paranoid_tracepoint_raw() && | ||
4820 | !capable(CAP_SYS_ADMIN)) | ||
4821 | return -EPERM; | ||
4822 | |||
4823 | err = perf_trace_init(event); | 4944 | err = perf_trace_init(event); |
4824 | if (err) | 4945 | if (err) |
4825 | return err; | 4946 | return err; |
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = { | |||
4842 | 4963 | ||
4843 | static inline void perf_tp_register(void) | 4964 | static inline void perf_tp_register(void) |
4844 | { | 4965 | { |
4845 | perf_pmu_register(&perf_tracepoint); | 4966 | perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); |
4846 | } | 4967 | } |
4847 | 4968 | ||
4848 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4969 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4932 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 5053 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4933 | { | 5054 | { |
4934 | struct hw_perf_event *hwc = &event->hw; | 5055 | struct hw_perf_event *hwc = &event->hw; |
5056 | s64 period; | ||
5057 | |||
5058 | if (!is_sampling_event(event)) | ||
5059 | return; | ||
4935 | 5060 | ||
4936 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 5061 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4937 | hwc->hrtimer.function = perf_swevent_hrtimer; | 5062 | hwc->hrtimer.function = perf_swevent_hrtimer; |
4938 | if (hwc->sample_period) { | ||
4939 | s64 period = local64_read(&hwc->period_left); | ||
4940 | 5063 | ||
4941 | if (period) { | 5064 | period = local64_read(&hwc->period_left); |
4942 | if (period < 0) | 5065 | if (period) { |
4943 | period = 10000; | 5066 | if (period < 0) |
5067 | period = 10000; | ||
4944 | 5068 | ||
4945 | local64_set(&hwc->period_left, 0); | 5069 | local64_set(&hwc->period_left, 0); |
4946 | } else { | 5070 | } else { |
4947 | period = max_t(u64, 10000, hwc->sample_period); | 5071 | period = max_t(u64, 10000, hwc->sample_period); |
4948 | } | 5072 | } |
4949 | __hrtimer_start_range_ns(&hwc->hrtimer, | 5073 | __hrtimer_start_range_ns(&hwc->hrtimer, |
4950 | ns_to_ktime(period), 0, | 5074 | ns_to_ktime(period), 0, |
4951 | HRTIMER_MODE_REL_PINNED, 0); | 5075 | HRTIMER_MODE_REL_PINNED, 0); |
4952 | } | ||
4953 | } | 5076 | } |
4954 | 5077 | ||
4955 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 5078 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4956 | { | 5079 | { |
4957 | struct hw_perf_event *hwc = &event->hw; | 5080 | struct hw_perf_event *hwc = &event->hw; |
4958 | 5081 | ||
4959 | if (hwc->sample_period) { | 5082 | if (is_sampling_event(event)) { |
4960 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | 5083 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); |
4961 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | 5084 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); |
4962 | 5085 | ||
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu) | |||
5184 | out: | 5307 | out: |
5185 | mutex_unlock(&pmus_lock); | 5308 | mutex_unlock(&pmus_lock); |
5186 | } | 5309 | } |
5310 | static struct idr pmu_idr; | ||
5311 | |||
5312 | static ssize_t | ||
5313 | type_show(struct device *dev, struct device_attribute *attr, char *page) | ||
5314 | { | ||
5315 | struct pmu *pmu = dev_get_drvdata(dev); | ||
5316 | |||
5317 | return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); | ||
5318 | } | ||
5319 | |||
5320 | static struct device_attribute pmu_dev_attrs[] = { | ||
5321 | __ATTR_RO(type), | ||
5322 | __ATTR_NULL, | ||
5323 | }; | ||
5324 | |||
5325 | static int pmu_bus_running; | ||
5326 | static struct bus_type pmu_bus = { | ||
5327 | .name = "event_source", | ||
5328 | .dev_attrs = pmu_dev_attrs, | ||
5329 | }; | ||
5330 | |||
5331 | static void pmu_dev_release(struct device *dev) | ||
5332 | { | ||
5333 | kfree(dev); | ||
5334 | } | ||
5335 | |||
5336 | static int pmu_dev_alloc(struct pmu *pmu) | ||
5337 | { | ||
5338 | int ret = -ENOMEM; | ||
5339 | |||
5340 | pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); | ||
5341 | if (!pmu->dev) | ||
5342 | goto out; | ||
5343 | |||
5344 | device_initialize(pmu->dev); | ||
5345 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | ||
5346 | if (ret) | ||
5347 | goto free_dev; | ||
5348 | |||
5349 | dev_set_drvdata(pmu->dev, pmu); | ||
5350 | pmu->dev->bus = &pmu_bus; | ||
5351 | pmu->dev->release = pmu_dev_release; | ||
5352 | ret = device_add(pmu->dev); | ||
5353 | if (ret) | ||
5354 | goto free_dev; | ||
5355 | |||
5356 | out: | ||
5357 | return ret; | ||
5358 | |||
5359 | free_dev: | ||
5360 | put_device(pmu->dev); | ||
5361 | goto out; | ||
5362 | } | ||
5187 | 5363 | ||
5188 | int perf_pmu_register(struct pmu *pmu) | 5364 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
5189 | { | 5365 | { |
5190 | int cpu, ret; | 5366 | int cpu, ret; |
5191 | 5367 | ||
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu) | |||
5195 | if (!pmu->pmu_disable_count) | 5371 | if (!pmu->pmu_disable_count) |
5196 | goto unlock; | 5372 | goto unlock; |
5197 | 5373 | ||
5374 | pmu->type = -1; | ||
5375 | if (!name) | ||
5376 | goto skip_type; | ||
5377 | pmu->name = name; | ||
5378 | |||
5379 | if (type < 0) { | ||
5380 | int err = idr_pre_get(&pmu_idr, GFP_KERNEL); | ||
5381 | if (!err) | ||
5382 | goto free_pdc; | ||
5383 | |||
5384 | err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); | ||
5385 | if (err) { | ||
5386 | ret = err; | ||
5387 | goto free_pdc; | ||
5388 | } | ||
5389 | } | ||
5390 | pmu->type = type; | ||
5391 | |||
5392 | if (pmu_bus_running) { | ||
5393 | ret = pmu_dev_alloc(pmu); | ||
5394 | if (ret) | ||
5395 | goto free_idr; | ||
5396 | } | ||
5397 | |||
5398 | skip_type: | ||
5198 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | 5399 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); |
5199 | if (pmu->pmu_cpu_context) | 5400 | if (pmu->pmu_cpu_context) |
5200 | goto got_cpu_context; | 5401 | goto got_cpu_context; |
5201 | 5402 | ||
5202 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 5403 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
5203 | if (!pmu->pmu_cpu_context) | 5404 | if (!pmu->pmu_cpu_context) |
5204 | goto free_pdc; | 5405 | goto free_dev; |
5205 | 5406 | ||
5206 | for_each_possible_cpu(cpu) { | 5407 | for_each_possible_cpu(cpu) { |
5207 | struct perf_cpu_context *cpuctx; | 5408 | struct perf_cpu_context *cpuctx; |
@@ -5245,6 +5446,14 @@ unlock: | |||
5245 | 5446 | ||
5246 | return ret; | 5447 | return ret; |
5247 | 5448 | ||
5449 | free_dev: | ||
5450 | device_del(pmu->dev); | ||
5451 | put_device(pmu->dev); | ||
5452 | |||
5453 | free_idr: | ||
5454 | if (pmu->type >= PERF_TYPE_MAX) | ||
5455 | idr_remove(&pmu_idr, pmu->type); | ||
5456 | |||
5248 | free_pdc: | 5457 | free_pdc: |
5249 | free_percpu(pmu->pmu_disable_count); | 5458 | free_percpu(pmu->pmu_disable_count); |
5250 | goto unlock; | 5459 | goto unlock; |
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5264 | synchronize_rcu(); | 5473 | synchronize_rcu(); |
5265 | 5474 | ||
5266 | free_percpu(pmu->pmu_disable_count); | 5475 | free_percpu(pmu->pmu_disable_count); |
5476 | if (pmu->type >= PERF_TYPE_MAX) | ||
5477 | idr_remove(&pmu_idr, pmu->type); | ||
5478 | device_del(pmu->dev); | ||
5479 | put_device(pmu->dev); | ||
5267 | free_pmu_context(pmu); | 5480 | free_pmu_context(pmu); |
5268 | } | 5481 | } |
5269 | 5482 | ||
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5273 | int idx; | 5486 | int idx; |
5274 | 5487 | ||
5275 | idx = srcu_read_lock(&pmus_srcu); | 5488 | idx = srcu_read_lock(&pmus_srcu); |
5489 | |||
5490 | rcu_read_lock(); | ||
5491 | pmu = idr_find(&pmu_idr, event->attr.type); | ||
5492 | rcu_read_unlock(); | ||
5493 | if (pmu) | ||
5494 | goto unlock; | ||
5495 | |||
5276 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5496 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5277 | int ret = pmu->event_init(event); | 5497 | int ret = pmu->event_init(event); |
5278 | if (!ret) | 5498 | if (!ret) |
@@ -5738,6 +5958,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5738 | mutex_unlock(¤t->perf_event_mutex); | 5958 | mutex_unlock(¤t->perf_event_mutex); |
5739 | 5959 | ||
5740 | /* | 5960 | /* |
5961 | * Precalculate sample_data sizes | ||
5962 | */ | ||
5963 | perf_event__header_size(event); | ||
5964 | perf_event__id_header_size(event); | ||
5965 | |||
5966 | /* | ||
5741 | * Drop the reference on the group_event after placing the | 5967 | * Drop the reference on the group_event after placing the |
5742 | * new event on the sibling_list. This ensures destruction | 5968 | * new event on the sibling_list. This ensures destruction |
5743 | * of the group leader will find the pointer to itself in | 5969 | * of the group leader will find the pointer to itself in |
@@ -6090,6 +6316,12 @@ inherit_event(struct perf_event *parent_event, | |||
6090 | child_event->overflow_handler = parent_event->overflow_handler; | 6316 | child_event->overflow_handler = parent_event->overflow_handler; |
6091 | 6317 | ||
6092 | /* | 6318 | /* |
6319 | * Precalculate sample_data sizes | ||
6320 | */ | ||
6321 | perf_event__header_size(child_event); | ||
6322 | perf_event__id_header_size(child_event); | ||
6323 | |||
6324 | /* | ||
6093 | * Link it up in the child's context: | 6325 | * Link it up in the child's context: |
6094 | */ | 6326 | */ |
6095 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | 6327 | raw_spin_lock_irqsave(&child_ctx->lock, flags); |
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
6320 | mutex_unlock(&swhash->hlist_mutex); | 6552 | mutex_unlock(&swhash->hlist_mutex); |
6321 | } | 6553 | } |
6322 | 6554 | ||
6323 | #ifdef CONFIG_HOTPLUG_CPU | 6555 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
6324 | static void perf_pmu_rotate_stop(struct pmu *pmu) | 6556 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
6325 | { | 6557 | { |
6326 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 6558 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu) | |||
6374 | static inline void perf_event_exit_cpu(int cpu) { } | 6606 | static inline void perf_event_exit_cpu(int cpu) { } |
6375 | #endif | 6607 | #endif |
6376 | 6608 | ||
6609 | static int | ||
6610 | perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) | ||
6611 | { | ||
6612 | int cpu; | ||
6613 | |||
6614 | for_each_online_cpu(cpu) | ||
6615 | perf_event_exit_cpu(cpu); | ||
6616 | |||
6617 | return NOTIFY_OK; | ||
6618 | } | ||
6619 | |||
6620 | /* | ||
6621 | * Run the perf reboot notifier at the very last possible moment so that | ||
6622 | * the generic watchdog code runs as long as possible. | ||
6623 | */ | ||
6624 | static struct notifier_block perf_reboot_notifier = { | ||
6625 | .notifier_call = perf_reboot, | ||
6626 | .priority = INT_MIN, | ||
6627 | }; | ||
6628 | |||
6377 | static int __cpuinit | 6629 | static int __cpuinit |
6378 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 6630 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
6379 | { | 6631 | { |
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void) | |||
6402 | { | 6654 | { |
6403 | int ret; | 6655 | int ret; |
6404 | 6656 | ||
6657 | idr_init(&pmu_idr); | ||
6658 | |||
6405 | perf_event_init_all_cpus(); | 6659 | perf_event_init_all_cpus(); |
6406 | init_srcu_struct(&pmus_srcu); | 6660 | init_srcu_struct(&pmus_srcu); |
6407 | perf_pmu_register(&perf_swevent); | 6661 | perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); |
6408 | perf_pmu_register(&perf_cpu_clock); | 6662 | perf_pmu_register(&perf_cpu_clock, NULL, -1); |
6409 | perf_pmu_register(&perf_task_clock); | 6663 | perf_pmu_register(&perf_task_clock, NULL, -1); |
6410 | perf_tp_register(); | 6664 | perf_tp_register(); |
6411 | perf_cpu_notifier(perf_cpu_notify); | 6665 | perf_cpu_notifier(perf_cpu_notify); |
6666 | register_reboot_notifier(&perf_reboot_notifier); | ||
6412 | 6667 | ||
6413 | ret = init_hw_breakpoint(); | 6668 | ret = init_hw_breakpoint(); |
6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | 6669 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); |
6415 | } | 6670 | } |
6671 | |||
6672 | static int __init perf_event_sysfs_init(void) | ||
6673 | { | ||
6674 | struct pmu *pmu; | ||
6675 | int ret; | ||
6676 | |||
6677 | mutex_lock(&pmus_lock); | ||
6678 | |||
6679 | ret = bus_register(&pmu_bus); | ||
6680 | if (ret) | ||
6681 | goto unlock; | ||
6682 | |||
6683 | list_for_each_entry(pmu, &pmus, entry) { | ||
6684 | if (!pmu->name || pmu->type < 0) | ||
6685 | continue; | ||
6686 | |||
6687 | ret = pmu_dev_alloc(pmu); | ||
6688 | WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); | ||
6689 | } | ||
6690 | pmu_bus_running = 1; | ||
6691 | ret = 0; | ||
6692 | |||
6693 | unlock: | ||
6694 | mutex_unlock(&pmus_lock); | ||
6695 | |||
6696 | return ret; | ||
6697 | } | ||
6698 | device_initcall(perf_event_sysfs_init); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 297d1a0eedb0..c68cead94dd7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -8293,8 +8293,6 @@ void __init sched_init(void) | |||
8293 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 8293 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
8294 | #endif /* SMP */ | 8294 | #endif /* SMP */ |
8295 | 8295 | ||
8296 | perf_event_init(); | ||
8297 | |||
8298 | scheduler_running = 1; | 8296 | scheduler_running = 1; |
8299 | } | 8297 | } |
8300 | 8298 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5abfa1518554..46404414d8a7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -745,21 +745,21 @@ static struct ctl_table kern_table[] = { | |||
745 | .extra1 = &zero, | 745 | .extra1 = &zero, |
746 | .extra2 = &one, | 746 | .extra2 = &one, |
747 | }, | 747 | }, |
748 | #endif | ||
749 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR) | ||
750 | { | 748 | { |
751 | .procname = "unknown_nmi_panic", | 749 | .procname = "nmi_watchdog", |
752 | .data = &unknown_nmi_panic, | 750 | .data = &watchdog_enabled, |
753 | .maxlen = sizeof (int), | 751 | .maxlen = sizeof (int), |
754 | .mode = 0644, | 752 | .mode = 0644, |
755 | .proc_handler = proc_dointvec, | 753 | .proc_handler = proc_dowatchdog_enabled, |
756 | }, | 754 | }, |
755 | #endif | ||
756 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) | ||
757 | { | 757 | { |
758 | .procname = "nmi_watchdog", | 758 | .procname = "unknown_nmi_panic", |
759 | .data = &nmi_watchdog_enabled, | 759 | .data = &unknown_nmi_panic, |
760 | .maxlen = sizeof (int), | 760 | .maxlen = sizeof (int), |
761 | .mode = 0644, | 761 | .mode = 0644, |
762 | .proc_handler = proc_nmi_enabled, | 762 | .proc_handler = proc_dointvec, |
763 | }, | 763 | }, |
764 | #endif | 764 | #endif |
765 | #if defined(CONFIG_X86) | 765 | #if defined(CONFIG_X86) |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 1357c5786064..4b2545a136ff 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = { | |||
136 | { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, | 136 | { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, |
137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, | 137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, |
138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, | 138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, |
139 | { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" }, | ||
140 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, | 139 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, |
141 | {} | 140 | {} |
142 | }; | 141 | }; |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670e..19a359d5e6d5 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | |||
21 | /* Count the events in use (per event id, not per instance) */ | 21 | /* Count the events in use (per event id, not per instance) */ |
22 | static int total_ref_count; | 22 | static int total_ref_count; |
23 | 23 | ||
24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | ||
25 | struct perf_event *p_event) | ||
26 | { | ||
27 | /* No tracing, just counting, so no obvious leak */ | ||
28 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | ||
29 | return 0; | ||
30 | |||
31 | /* Some events are ok to be traced by non-root users... */ | ||
32 | if (p_event->attach_state == PERF_ATTACH_TASK) { | ||
33 | if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * ...otherwise raw tracepoint data can be a severe data leak, | ||
39 | * only allow root to have these. | ||
40 | */ | ||
41 | if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | ||
42 | return -EPERM; | ||
43 | |||
44 | return 0; | ||
45 | } | ||
46 | |||
24 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 47 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
25 | struct perf_event *p_event) | 48 | struct perf_event *p_event) |
26 | { | 49 | { |
27 | struct hlist_head __percpu *list; | 50 | struct hlist_head __percpu *list; |
28 | int ret = -ENOMEM; | 51 | int ret; |
29 | int cpu; | 52 | int cpu; |
30 | 53 | ||
54 | ret = perf_trace_event_perm(tp_event, p_event); | ||
55 | if (ret) | ||
56 | return ret; | ||
57 | |||
31 | p_event->tp_event = tp_event; | 58 | p_event->tp_event = tp_event; |
32 | if (tp_event->perf_refcount++ > 0) | 59 | if (tp_event->perf_refcount++ > 0) |
33 | return 0; | 60 | return 0; |
34 | 61 | ||
62 | ret = -ENOMEM; | ||
63 | |||
35 | list = alloc_percpu(struct hlist_head); | 64 | list = alloc_percpu(struct hlist_head); |
36 | if (!list) | 65 | if (!list) |
37 | goto fail; | 66 | goto fail; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e3c41a4024c..eb17e143b5da 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str) | |||
57 | { | 57 | { |
58 | if (!strncmp(str, "panic", 5)) | 58 | if (!strncmp(str, "panic", 5)) |
59 | hardlockup_panic = 1; | 59 | hardlockup_panic = 1; |
60 | else if (!strncmp(str, "0", 1)) | ||
61 | no_watchdog = 1; | ||
60 | return 1; | 62 | return 1; |
61 | } | 63 | } |
62 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 64 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -547,13 +549,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = { | |||
547 | .notifier_call = cpu_callback | 549 | .notifier_call = cpu_callback |
548 | }; | 550 | }; |
549 | 551 | ||
550 | static int __init spawn_watchdog_task(void) | 552 | void __init lockup_detector_init(void) |
551 | { | 553 | { |
552 | void *cpu = (void *)(long)smp_processor_id(); | 554 | void *cpu = (void *)(long)smp_processor_id(); |
553 | int err; | 555 | int err; |
554 | 556 | ||
555 | if (no_watchdog) | 557 | if (no_watchdog) |
556 | return 0; | 558 | return; |
557 | 559 | ||
558 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 560 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
559 | WARN_ON(notifier_to_errno(err)); | 561 | WARN_ON(notifier_to_errno(err)); |
@@ -561,6 +563,5 @@ static int __init spawn_watchdog_task(void) | |||
561 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 563 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
562 | register_cpu_notifier(&cpu_nfb); | 564 | register_cpu_notifier(&cpu_nfb); |
563 | 565 | ||
564 | return 0; | 566 | return; |
565 | } | 567 | } |
566 | early_initcall(spawn_watchdog_task); | ||
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index b2c63309a651..6f5a498608b2 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt | |||
@@ -24,12 +24,47 @@ OPTIONS | |||
24 | --input=:: | 24 | --input=:: |
25 | Input file name. (default: perf.data) | 25 | Input file name. (default: perf.data) |
26 | 26 | ||
27 | -d:: | ||
28 | --dsos=<dso[,dso...]>:: | ||
29 | Only consider symbols in these dsos. | ||
30 | -s:: | ||
31 | --symbol=<symbol>:: | ||
32 | Symbol to annotate. | ||
33 | |||
34 | -f:: | ||
35 | --force:: | ||
36 | Don't complain, do it. | ||
37 | |||
38 | -v:: | ||
39 | --verbose:: | ||
40 | Be more verbose. (Show symbol address, etc) | ||
41 | |||
42 | -D:: | ||
43 | --dump-raw-trace:: | ||
44 | Dump raw trace in ASCII. | ||
45 | |||
46 | -k:: | ||
47 | --vmlinux=<file>:: | ||
48 | vmlinux pathname. | ||
49 | |||
50 | -m:: | ||
51 | --modules:: | ||
52 | Load module symbols. WARNING: use only with -k and LIVE kernel. | ||
53 | |||
54 | -l:: | ||
55 | --print-line:: | ||
56 | Print matching source lines (may be slow). | ||
57 | |||
58 | -P:: | ||
59 | --full-paths:: | ||
60 | Don't shorten the displayed pathnames. | ||
61 | |||
27 | --stdio:: Use the stdio interface. | 62 | --stdio:: Use the stdio interface. |
28 | 63 | ||
29 | --tui:: Use the TUI interface Use of --tui requires a tty, if one is not | 64 | --tui:: Use the TUI interface Use of --tui requires a tty, if one is not |
30 | present, as when piping to other commands, the stdio interface is | 65 | present, as when piping to other commands, the stdio interface is |
31 | used. This interfaces starts by centering on the line with more | 66 | used. This interfaces starts by centering on the line with more |
32 | samples, TAB/UNTAB cycles thru the lines with more samples. | 67 | samples, TAB/UNTAB cycles through the lines with more samples. |
33 | 68 | ||
34 | SEE ALSO | 69 | SEE ALSO |
35 | -------- | 70 | -------- |
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt index 01b642c0bf8f..5eaac6f26d51 100644 --- a/tools/perf/Documentation/perf-buildid-list.txt +++ b/tools/perf/Documentation/perf-buildid-list.txt | |||
@@ -18,6 +18,9 @@ perf report. | |||
18 | 18 | ||
19 | OPTIONS | 19 | OPTIONS |
20 | ------- | 20 | ------- |
21 | -H:: | ||
22 | --with-hits:: | ||
23 | Show only DSOs with hits. | ||
21 | -i:: | 24 | -i:: |
22 | --input=:: | 25 | --input=:: |
23 | Input file name. (default: perf.data) | 26 | Input file name. (default: perf.data) |
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 20d97d84ea1c..6a9ec2b35310 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt | |||
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data. | |||
19 | 19 | ||
20 | OPTIONS | 20 | OPTIONS |
21 | ------- | 21 | ------- |
22 | -M:: | ||
23 | --displacement:: | ||
24 | Show position displacement relative to baseline. | ||
25 | |||
26 | -D:: | ||
27 | --dump-raw-trace:: | ||
28 | Dump raw trace in ASCII. | ||
29 | |||
30 | -m:: | ||
31 | --modules:: | ||
32 | Load module symbols. WARNING: use only with -k and LIVE kernel | ||
33 | |||
22 | -d:: | 34 | -d:: |
23 | --dsos=:: | 35 | --dsos=:: |
24 | Only consider symbols in these dsos. CSV that understands | 36 | Only consider symbols in these dsos. CSV that understands |
@@ -42,7 +54,7 @@ OPTIONS | |||
42 | --field-separator=:: | 54 | --field-separator=:: |
43 | 55 | ||
44 | Use a special separator character and don't pad with spaces, replacing | 56 | Use a special separator character and don't pad with spaces, replacing |
45 | all occurances of this separator in symbol names (and other output) | 57 | all occurrences of this separator in symbol names (and other output) |
46 | with a '.' character, that thus it's the only non valid separator. | 58 | with a '.' character, that thus it's the only non valid separator. |
47 | 59 | ||
48 | -v:: | 60 | -v:: |
@@ -50,6 +62,11 @@ OPTIONS | |||
50 | Be verbose, for instance, show the raw counts in addition to the | 62 | Be verbose, for instance, show the raw counts in addition to the |
51 | diff. | 63 | diff. |
52 | 64 | ||
65 | -f:: | ||
66 | --force:: | ||
67 | Don't complain, do it. | ||
68 | |||
69 | |||
53 | SEE ALSO | 70 | SEE ALSO |
54 | -------- | 71 | -------- |
55 | linkperf:perf-record[1] | 72 | linkperf:perf-record[1] |
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index d004e19fe6d6..dd84cb2f0a88 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt | |||
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm: | |||
22 | a performance counter profile of guest os in realtime | 22 | a performance counter profile of guest os in realtime |
23 | of an arbitrary workload. | 23 | of an arbitrary workload. |
24 | 24 | ||
25 | 'perf kvm record <command>' to record the performance couinter profile | 25 | 'perf kvm record <command>' to record the performance counter profile |
26 | of an arbitrary workload and save it into a perf data file. If both | 26 | of an arbitrary workload and save it into a perf data file. If both |
27 | --host and --guest are input, the perf data file name is perf.data.kvm. | 27 | --host and --guest are input, the perf data file name is perf.data.kvm. |
28 | If there is no --host but --guest, the file name is perf.data.guest. | 28 | If there is no --host but --guest, the file name is perf.data.guest. |
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm: | |||
40 | 40 | ||
41 | OPTIONS | 41 | OPTIONS |
42 | ------- | 42 | ------- |
43 | -i:: | ||
44 | --input=:: | ||
45 | Input file name. | ||
46 | -o:: | ||
47 | --output:: | ||
48 | Output file name. | ||
43 | --host=:: | 49 | --host=:: |
44 | Collect host side performance profile. | 50 | Collect host side performance profile. |
45 | --guest=:: | 51 | --guest=:: |
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index b317102138c8..921de259ea10 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt | |||
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command. | |||
24 | 24 | ||
25 | 'perf lock report' reports statistical data. | 25 | 'perf lock report' reports statistical data. |
26 | 26 | ||
27 | OPTIONS | ||
28 | ------- | ||
29 | |||
30 | -i:: | ||
31 | --input=<file>:: | ||
32 | Input file name. | ||
33 | |||
34 | -v:: | ||
35 | --verbose:: | ||
36 | Be more verbose (show symbol address, etc). | ||
37 | |||
38 | -D:: | ||
39 | --dump-raw-trace:: | ||
40 | Dump raw trace in ASCII. | ||
41 | |||
27 | SEE ALSO | 42 | SEE ALSO |
28 | -------- | 43 | -------- |
29 | linkperf:perf[1] | 44 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 62de1b7f4e76..4e2323276984 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt | |||
@@ -115,7 +115,7 @@ Each probe argument follows below syntax. | |||
115 | 115 | ||
116 | LINE SYNTAX | 116 | LINE SYNTAX |
117 | ----------- | 117 | ----------- |
118 | Line range is descripted by following syntax. | 118 | Line range is described by following syntax. |
119 | 119 | ||
120 | "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" | 120 | "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" |
121 | 121 | ||
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a91f9f9e6e5c..52462ae26455 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -39,15 +39,24 @@ OPTIONS | |||
39 | be passed as follows: '\mem:addr[:[r][w][x]]'. | 39 | be passed as follows: '\mem:addr[:[r][w][x]]'. |
40 | If you want to profile read-write accesses in 0x1000, just set | 40 | If you want to profile read-write accesses in 0x1000, just set |
41 | 'mem:0x1000:rw'. | 41 | 'mem:0x1000:rw'. |
42 | |||
43 | --filter=<filter>:: | ||
44 | Event filter. | ||
45 | |||
42 | -a:: | 46 | -a:: |
43 | System-wide collection. | 47 | --all-cpus:: |
48 | System-wide collection from all CPUs. | ||
44 | 49 | ||
45 | -l:: | 50 | -l:: |
46 | Scale counter values. | 51 | Scale counter values. |
47 | 52 | ||
48 | -p:: | 53 | -p:: |
49 | --pid=:: | 54 | --pid=:: |
50 | Record events on existing pid. | 55 | Record events on existing process ID. |
56 | |||
57 | -t:: | ||
58 | --tid=:: | ||
59 | Record events on existing thread ID. | ||
51 | 60 | ||
52 | -r:: | 61 | -r:: |
53 | --realtime=:: | 62 | --realtime=:: |
@@ -99,6 +108,11 @@ OPTIONS | |||
99 | --data:: | 108 | --data:: |
100 | Sample addresses. | 109 | Sample addresses. |
101 | 110 | ||
111 | -T:: | ||
112 | --timestamp:: | ||
113 | Sample timestamps. Use it with 'perf report -D' to see the timestamps, | ||
114 | for instance. | ||
115 | |||
102 | -n:: | 116 | -n:: |
103 | --no-samples:: | 117 | --no-samples:: |
104 | Don't sample. | 118 | Don't sample. |
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun | |||
109 | 123 | ||
110 | -C:: | 124 | -C:: |
111 | --cpu:: | 125 | --cpu:: |
112 | Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a | 126 | Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a |
113 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 127 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
114 | In per-thread mode with inheritance mode on (default), samples are captured only when | 128 | In per-thread mode with inheritance mode on (default), samples are captured only when |
115 | the thread executes on the designated CPUs. Default is to monitor all CPUs. | 129 | the thread executes on the designated CPUs. Default is to monitor all CPUs. |
116 | 130 | ||
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 12052c9ed0ba..fefea77ec6e9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -20,6 +20,11 @@ OPTIONS | |||
20 | -i:: | 20 | -i:: |
21 | --input=:: | 21 | --input=:: |
22 | Input file name. (default: perf.data) | 22 | Input file name. (default: perf.data) |
23 | |||
24 | -v:: | ||
25 | --verbose:: | ||
26 | Be more verbose. (show symbol address, etc) | ||
27 | |||
23 | -d:: | 28 | -d:: |
24 | --dsos=:: | 29 | --dsos=:: |
25 | Only consider symbols in these dsos. CSV that understands | 30 | Only consider symbols in these dsos. CSV that understands |
@@ -27,6 +32,10 @@ OPTIONS | |||
27 | -n:: | 32 | -n:: |
28 | --show-nr-samples:: | 33 | --show-nr-samples:: |
29 | Show the number of samples for each symbol | 34 | Show the number of samples for each symbol |
35 | |||
36 | --showcpuutilization:: | ||
37 | Show sample percentage for different cpu modes. | ||
38 | |||
30 | -T:: | 39 | -T:: |
31 | --threads:: | 40 | --threads:: |
32 | Show per-thread event counters | 41 | Show per-thread event counters |
@@ -39,12 +48,24 @@ OPTIONS | |||
39 | Only consider these symbols. CSV that understands | 48 | Only consider these symbols. CSV that understands |
40 | file://filename entries. | 49 | file://filename entries. |
41 | 50 | ||
51 | -U:: | ||
52 | --hide-unresolved:: | ||
53 | Only display entries resolved to a symbol. | ||
54 | |||
42 | -s:: | 55 | -s:: |
43 | --sort=:: | 56 | --sort=:: |
44 | Sort by key(s): pid, comm, dso, symbol, parent. | 57 | Sort by key(s): pid, comm, dso, symbol, parent. |
45 | 58 | ||
59 | -p:: | ||
60 | --parent=<regex>:: | ||
61 | regex filter to identify parent, see: '--sort parent' | ||
62 | |||
63 | -x:: | ||
64 | --exclude-other:: | ||
65 | Only display entries with parent-match. | ||
66 | |||
46 | -w:: | 67 | -w:: |
47 | --field-width=:: | 68 | --column-widths=<width[,width...]>:: |
48 | Force each column width to the provided list, for large terminal | 69 | Force each column width to the provided list, for large terminal |
49 | readability. | 70 | readability. |
50 | 71 | ||
@@ -52,19 +73,26 @@ OPTIONS | |||
52 | --field-separator=:: | 73 | --field-separator=:: |
53 | 74 | ||
54 | Use a special separator character and don't pad with spaces, replacing | 75 | Use a special separator character and don't pad with spaces, replacing |
55 | all occurances of this separator in symbol names (and other output) | 76 | all occurrences of this separator in symbol names (and other output) |
56 | with a '.' character, that thus it's the only non valid separator. | 77 | with a '.' character, that thus it's the only non valid separator. |
57 | 78 | ||
79 | -D:: | ||
80 | --dump-raw-trace:: | ||
81 | Dump raw trace in ASCII. | ||
82 | |||
58 | -g [type,min]:: | 83 | -g [type,min]:: |
59 | --call-graph:: | 84 | --call-graph:: |
60 | Display callchains using type and min percent threshold. | 85 | Display call chains using type and min percent threshold. |
61 | type can be either: | 86 | type can be either: |
62 | - flat: single column, linear exposure of callchains. | 87 | - flat: single column, linear exposure of call chains. |
63 | - graph: use a graph tree, displaying absolute overhead rates. | 88 | - graph: use a graph tree, displaying absolute overhead rates. |
64 | - fractal: like graph, but displays relative rates. Each branch of | 89 | - fractal: like graph, but displays relative rates. Each branch of |
65 | the tree is considered as a new profiled object. + | 90 | the tree is considered as a new profiled object. + |
66 | Default: fractal,0.5. | 91 | Default: fractal,0.5. |
67 | 92 | ||
93 | --pretty=<key>:: | ||
94 | Pretty printing style. key: normal, raw | ||
95 | |||
68 | --stdio:: Use the stdio interface. | 96 | --stdio:: Use the stdio interface. |
69 | 97 | ||
70 | --tui:: Use the TUI interface, that is integrated with annotate and allows | 98 | --tui:: Use the TUI interface, that is integrated with annotate and allows |
@@ -72,6 +100,22 @@ OPTIONS | |||
72 | requires a tty, if one is not present, as when piping to other | 100 | requires a tty, if one is not present, as when piping to other |
73 | commands, the stdio interface is used. | 101 | commands, the stdio interface is used. |
74 | 102 | ||
103 | -k:: | ||
104 | --vmlinux=<file>:: | ||
105 | vmlinux pathname | ||
106 | |||
107 | --kallsyms=<file>:: | ||
108 | kallsyms pathname | ||
109 | |||
110 | -m:: | ||
111 | --modules:: | ||
112 | Load module symbols. WARNING: This should only be used with -k and | ||
113 | a LIVE kernel. | ||
114 | |||
115 | -f:: | ||
116 | --force:: | ||
117 | Don't complain, do it. | ||
118 | |||
75 | SEE ALSO | 119 | SEE ALSO |
76 | -------- | 120 | -------- |
77 | linkperf:perf-stat[1] | 121 | linkperf:perf-stat[1] |
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 8417644a6166..46822d5fde1c 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt | |||
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf sched' {record|latency|replay|trace} | 11 | 'perf sched' {record|latency|map|replay|trace} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | There are four variants of perf sched: | 15 | There are five variants of perf sched: |
16 | 16 | ||
17 | 'perf sched record <command>' to record the scheduling events | 17 | 'perf sched record <command>' to record the scheduling events |
18 | of an arbitrary workload. | 18 | of an arbitrary workload. |
@@ -30,8 +30,22 @@ There are four variants of perf sched: | |||
30 | of the workload as it occurred when it was recorded - and can repeat | 30 | of the workload as it occurred when it was recorded - and can repeat |
31 | it a number of times, measuring its performance.) | 31 | it a number of times, measuring its performance.) |
32 | 32 | ||
33 | 'perf sched map' to print a textual context-switching outline of | ||
34 | workload captured via perf sched record. Columns stand for | ||
35 | individual CPUs, and the two-letter shortcuts stand for tasks that | ||
36 | are running on a CPU. A '*' denotes the CPU that had the event, and | ||
37 | a dot signals an idle CPU. | ||
38 | |||
33 | OPTIONS | 39 | OPTIONS |
34 | ------- | 40 | ------- |
41 | -i:: | ||
42 | --input=<file>:: | ||
43 | Input file name. (default: perf.data) | ||
44 | |||
45 | -v:: | ||
46 | --verbose:: | ||
47 | Be more verbose. (show symbol address, etc) | ||
48 | |||
35 | -D:: | 49 | -D:: |
36 | --dump-raw-trace=:: | 50 | --dump-raw-trace=:: |
37 | Display verbose dump of the sched data. | 51 | Display verbose dump of the sched data. |
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index ee6525ee6d69..5bb41e55a3ac 100644 --- a/tools/perf/Documentation/perf-trace-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt | |||
@@ -1,19 +1,19 @@ | |||
1 | perf-trace-perl(1) | 1 | perf-script-perl(1) |
2 | ================== | 2 | ================== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-trace-perl - Process trace data with a Perl script | 6 | perf-script-perl - Process trace data with a Perl script |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf trace' [-s [Perl]:script[.pl] ] | 11 | 'perf script' [-s [Perl]:script[.pl] ] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | 15 | ||
16 | This perf trace option is used to process perf trace data using perf's | 16 | This perf script option is used to process perf script data using perf's |
17 | built-in Perl interpreter. It reads and processes the input file and | 17 | built-in Perl interpreter. It reads and processes the input file and |
18 | displays the results of the trace analysis implemented in the given | 18 | displays the results of the trace analysis implemented in the given |
19 | Perl script, if any. | 19 | Perl script, if any. |
@@ -21,7 +21,7 @@ Perl script, if any. | |||
21 | STARTER SCRIPTS | 21 | STARTER SCRIPTS |
22 | --------------- | 22 | --------------- |
23 | 23 | ||
24 | You can avoid reading the rest of this document by running 'perf trace | 24 | You can avoid reading the rest of this document by running 'perf script |
25 | -g perl' in the same directory as an existing perf.data trace file. | 25 | -g perl' in the same directory as an existing perf.data trace file. |
26 | That will generate a starter script containing a handler for each of | 26 | That will generate a starter script containing a handler for each of |
27 | the event types in the trace file; it simply prints every available | 27 | the event types in the trace file; it simply prints every available |
@@ -30,13 +30,13 @@ field for each event in the trace file. | |||
30 | You can also look at the existing scripts in | 30 | You can also look at the existing scripts in |
31 | ~/libexec/perf-core/scripts/perl for typical examples showing how to | 31 | ~/libexec/perf-core/scripts/perl for typical examples showing how to |
32 | do basic things like aggregate event data, print results, etc. Also, | 32 | do basic things like aggregate event data, print results, etc. Also, |
33 | the check-perf-trace.pl script, while not interesting for its results, | 33 | the check-perf-script.pl script, while not interesting for its results, |
34 | attempts to exercise all of the main scripting features. | 34 | attempts to exercise all of the main scripting features. |
35 | 35 | ||
36 | EVENT HANDLERS | 36 | EVENT HANDLERS |
37 | -------------- | 37 | -------------- |
38 | 38 | ||
39 | When perf trace is invoked using a trace script, a user-defined | 39 | When perf script is invoked using a trace script, a user-defined |
40 | 'handler function' is called for each event in the trace. If there's | 40 | 'handler function' is called for each event in the trace. If there's |
41 | no handler function defined for a given event type, the event is | 41 | no handler function defined for a given event type, the event is |
42 | ignored (or passed to a 'trace_handled' function, see below) and the | 42 | ignored (or passed to a 'trace_handled' function, see below) and the |
@@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest. | |||
112 | SCRIPT LAYOUT | 112 | SCRIPT LAYOUT |
113 | ------------- | 113 | ------------- |
114 | 114 | ||
115 | Every perf trace Perl script should start by setting up a Perl module | 115 | Every perf script Perl script should start by setting up a Perl module |
116 | search path and 'use'ing a few support modules (see module | 116 | search path and 'use'ing a few support modules (see module |
117 | descriptions below): | 117 | descriptions below): |
118 | 118 | ||
119 | ---- | 119 | ---- |
120 | use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; | 120 | use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; |
121 | use lib "./Perf-Trace-Util/lib"; | 121 | use lib "./perf-script-Util/lib"; |
122 | use Perf::Trace::Core; | 122 | use Perf::Trace::Core; |
123 | use Perf::Trace::Context; | 123 | use Perf::Trace::Context; |
124 | use Perf::Trace::Util; | 124 | use Perf::Trace::Util; |
@@ -162,7 +162,7 @@ sub trace_unhandled | |||
162 | ---- | 162 | ---- |
163 | 163 | ||
164 | The remaining sections provide descriptions of each of the available | 164 | The remaining sections provide descriptions of each of the available |
165 | built-in perf trace Perl modules and their associated functions. | 165 | built-in perf script Perl modules and their associated functions. |
166 | 166 | ||
167 | AVAILABLE MODULES AND FUNCTIONS | 167 | AVAILABLE MODULES AND FUNCTIONS |
168 | ------------------------------- | 168 | ------------------------------- |
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS | |||
170 | The following sections describe the functions and variables available | 170 | The following sections describe the functions and variables available |
171 | via the various Perf::Trace::* Perl modules. To use the functions and | 171 | via the various Perf::Trace::* Perl modules. To use the functions and |
172 | variables from the given module, add the corresponding 'use | 172 | variables from the given module, add the corresponding 'use |
173 | Perf::Trace::XXX' line to your perf trace script. | 173 | Perf::Trace::XXX' line to your perf script script. |
174 | 174 | ||
175 | Perf::Trace::Core Module | 175 | Perf::Trace::Core Module |
176 | ~~~~~~~~~~~~~~~~~~~~~~~~ | 176 | ~~~~~~~~~~~~~~~~~~~~~~~~ |
@@ -204,7 +204,7 @@ argument. | |||
204 | Perf::Trace::Util Module | 204 | Perf::Trace::Util Module |
205 | ~~~~~~~~~~~~~~~~~~~~~~~~ | 205 | ~~~~~~~~~~~~~~~~~~~~~~~~ |
206 | 206 | ||
207 | Various utility functions for use with perf trace: | 207 | Various utility functions for use with perf script: |
208 | 208 | ||
209 | nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair | 209 | nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair |
210 | nsecs_secs($nsecs) - returns whole secs portion given nsecs | 210 | nsecs_secs($nsecs) - returns whole secs portion given nsecs |
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace: | |||
214 | 214 | ||
215 | SEE ALSO | 215 | SEE ALSO |
216 | -------- | 216 | -------- |
217 | linkperf:perf-trace[1] | 217 | linkperf:perf-script[1] |
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt index 693be804dd3d..36b38277422c 100644 --- a/tools/perf/Documentation/perf-trace-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt | |||
@@ -1,19 +1,19 @@ | |||
1 | perf-trace-python(1) | 1 | perf-script-python(1) |
2 | ==================== | 2 | ==================== |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-trace-python - Process trace data with a Python script | 6 | perf-script-python - Process trace data with a Python script |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf trace' [-s [Python]:script[.py] ] | 11 | 'perf script' [-s [Python]:script[.py] ] |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | 15 | ||
16 | This perf trace option is used to process perf trace data using perf's | 16 | This perf script option is used to process perf script data using perf's |
17 | built-in Python interpreter. It reads and processes the input file and | 17 | built-in Python interpreter. It reads and processes the input file and |
18 | displays the results of the trace analysis implemented in the given | 18 | displays the results of the trace analysis implemented in the given |
19 | Python script, if any. | 19 | Python script, if any. |
@@ -23,15 +23,15 @@ A QUICK EXAMPLE | |||
23 | 23 | ||
24 | This section shows the process, start to finish, of creating a working | 24 | This section shows the process, start to finish, of creating a working |
25 | Python script that aggregates and extracts useful information from a | 25 | Python script that aggregates and extracts useful information from a |
26 | raw perf trace stream. You can avoid reading the rest of this | 26 | raw perf script stream. You can avoid reading the rest of this |
27 | document if an example is enough for you; the rest of the document | 27 | document if an example is enough for you; the rest of the document |
28 | provides more details on each step and lists the library functions | 28 | provides more details on each step and lists the library functions |
29 | available to script writers. | 29 | available to script writers. |
30 | 30 | ||
31 | This example actually details the steps that were used to create the | 31 | This example actually details the steps that were used to create the |
32 | 'syscall-counts' script you see when you list the available perf trace | 32 | 'syscall-counts' script you see when you list the available perf script |
33 | scripts via 'perf trace -l'. As such, this script also shows how to | 33 | scripts via 'perf script -l'. As such, this script also shows how to |
34 | integrate your script into the list of general-purpose 'perf trace' | 34 | integrate your script into the list of general-purpose 'perf script' |
35 | scripts listed by that command. | 35 | scripts listed by that command. |
36 | 36 | ||
37 | The syscall-counts script is a simple script, but demonstrates all the | 37 | The syscall-counts script is a simple script, but demonstrates all the |
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory | |||
105 | called perf.data. | 105 | called perf.data. |
106 | 106 | ||
107 | Once we have a perf.data file containing our data, we can use the -g | 107 | Once we have a perf.data file containing our data, we can use the -g |
108 | 'perf trace' option to generate a Python script that will contain a | 108 | 'perf script' option to generate a Python script that will contain a |
109 | callback handler for each event type found in the perf.data trace | 109 | callback handler for each event type found in the perf.data trace |
110 | stream (for more details, see the STARTER SCRIPTS section). | 110 | stream (for more details, see the STARTER SCRIPTS section). |
111 | 111 | ||
112 | ---- | 112 | ---- |
113 | # perf trace -g python | 113 | # perf script -g python |
114 | generated Python script: perf-trace.py | 114 | generated Python script: perf-script.py |
115 | 115 | ||
116 | The output file created also in the current directory is named | 116 | The output file created also in the current directory is named |
117 | perf-trace.py. Here's the file in its entirety: | 117 | perf-script.py. Here's the file in its entirety: |
118 | 118 | ||
119 | # perf trace event handlers, generated by perf trace -g python | 119 | # perf script event handlers, generated by perf script -g python |
120 | # Licensed under the terms of the GNU GPL License version 2 | 120 | # Licensed under the terms of the GNU GPL License version 2 |
121 | 121 | ||
122 | # The common_* event handler fields are the most useful fields common to | 122 | # The common_* event handler fields are the most useful fields common to |
123 | # all events. They don't necessarily correspond to the 'common_*' fields | 123 | # all events. They don't necessarily correspond to the 'common_*' fields |
124 | # in the format files. Those fields not available as handler params can | 124 | # in the format files. Those fields not available as handler params can |
125 | # be retrieved using Python functions of the form common_*(context). | 125 | # be retrieved using Python functions of the form common_*(context). |
126 | # See the perf-trace-python Documentation for the list of available functions. | 126 | # See the perf-script-python Documentation for the list of available functions. |
127 | 127 | ||
128 | import os | 128 | import os |
129 | import sys | 129 | import sys |
130 | 130 | ||
131 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 131 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
132 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | 132 | '/scripts/python/perf-script-Util/lib/Perf/Trace') |
133 | 133 | ||
134 | from perf_trace_context import * | 134 | from perf_trace_context import * |
135 | from Core import * | 135 | from Core import * |
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm): | |||
160 | ---- | 160 | ---- |
161 | 161 | ||
162 | At the top is a comment block followed by some import statements and a | 162 | At the top is a comment block followed by some import statements and a |
163 | path append which every perf trace script should include. | 163 | path append which every perf script script should include. |
164 | 164 | ||
165 | Following that are a couple generated functions, trace_begin() and | 165 | Following that are a couple generated functions, trace_begin() and |
166 | trace_end(), which are called at the beginning and the end of the | 166 | trace_end(), which are called at the beginning and the end of the |
@@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the | |||
189 | script and run it to see the default output: | 189 | script and run it to see the default output: |
190 | 190 | ||
191 | ---- | 191 | ---- |
192 | # mv perf-trace.py syscall-counts.py | 192 | # mv perf-script.py syscall-counts.py |
193 | # perf trace -s syscall-counts.py | 193 | # perf script -s syscall-counts.py |
194 | 194 | ||
195 | raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= | 195 | raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= |
196 | raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= | 196 | raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= |
@@ -216,7 +216,7 @@ import os | |||
216 | import sys | 216 | import sys |
217 | 217 | ||
218 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 218 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
219 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | 219 | '/scripts/python/perf-script-Util/lib/Perf/Trace') |
220 | 220 | ||
221 | from perf_trace_context import * | 221 | from perf_trace_context import * |
222 | from Core import * | 222 | from Core import * |
@@ -279,7 +279,7 @@ import os | |||
279 | import sys | 279 | import sys |
280 | 280 | ||
281 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 281 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
282 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | 282 | '/scripts/python/perf-script-Util/lib/Perf/Trace') |
283 | 283 | ||
284 | from perf_trace_context import * | 284 | from perf_trace_context import * |
285 | from Core import * | 285 | from Core import * |
@@ -315,7 +315,7 @@ def print_syscall_totals(): | |||
315 | 315 | ||
316 | The script can be run just as before: | 316 | The script can be run just as before: |
317 | 317 | ||
318 | # perf trace -s syscall-counts.py | 318 | # perf script -s syscall-counts.py |
319 | 319 | ||
320 | So those are the essential steps in writing and running a script. The | 320 | So those are the essential steps in writing and running a script. The |
321 | process can be generalized to any tracepoint or set of tracepoints | 321 | process can be generalized to any tracepoint or set of tracepoints |
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by | |||
324 | 'perf list' and/or look in /sys/kernel/debug/tracing events for | 324 | 'perf list' and/or look in /sys/kernel/debug/tracing events for |
325 | detailed event and field info, record the corresponding trace data | 325 | detailed event and field info, record the corresponding trace data |
326 | using 'perf record', passing it the list of interesting events, | 326 | using 'perf record', passing it the list of interesting events, |
327 | generate a skeleton script using 'perf trace -g python' and modify the | 327 | generate a skeleton script using 'perf script -g python' and modify the |
328 | code to aggregate and display it for your particular needs. | 328 | code to aggregate and display it for your particular needs. |
329 | 329 | ||
330 | After you've done that you may end up with a general-purpose script | 330 | After you've done that you may end up with a general-purpose script |
331 | that you want to keep around and have available for future use. By | 331 | that you want to keep around and have available for future use. By |
332 | writing a couple of very simple shell scripts and putting them in the | 332 | writing a couple of very simple shell scripts and putting them in the |
333 | right place, you can have your script listed alongside the other | 333 | right place, you can have your script listed alongside the other |
334 | scripts listed by the 'perf trace -l' command e.g.: | 334 | scripts listed by the 'perf script -l' command e.g.: |
335 | 335 | ||
336 | ---- | 336 | ---- |
337 | root@tropicana:~# perf trace -l | 337 | root@tropicana:~# perf script -l |
338 | List of available trace scripts: | 338 | List of available trace scripts: |
339 | workqueue-stats workqueue stats (ins/exe/create/destroy) | 339 | workqueue-stats workqueue stats (ins/exe/create/destroy) |
340 | wakeup-latency system-wide min/max/avg wakeup latency | 340 | wakeup-latency system-wide min/max/avg wakeup latency |
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter | |||
365 | The 'report' script is also a shell script with the same base name as | 365 | The 'report' script is also a shell script with the same base name as |
366 | your script, but with -report appended. It should also be located in | 366 | your script, but with -report appended. It should also be located in |
367 | the perf/scripts/python/bin directory. In that script, you write the | 367 | the perf/scripts/python/bin directory. In that script, you write the |
368 | 'perf trace -s' command-line needed for running your script: | 368 | 'perf script -s' command-line needed for running your script: |
369 | 369 | ||
370 | ---- | 370 | ---- |
371 | # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report | 371 | # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report |
372 | 372 | ||
373 | #!/bin/bash | 373 | #!/bin/bash |
374 | # description: system-wide syscall counts | 374 | # description: system-wide syscall counts |
375 | perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py | 375 | perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py |
376 | ---- | 376 | ---- |
377 | 377 | ||
378 | Note that the location of the Python script given in the shell script | 378 | Note that the location of the Python script given in the shell script |
@@ -390,17 +390,17 @@ total 32 | |||
390 | drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . | 390 | drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . |
391 | drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. | 391 | drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. |
392 | drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin | 392 | drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin |
393 | -rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py | 393 | -rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py |
394 | drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util | 394 | drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util |
395 | -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py | 395 | -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py |
396 | ---- | 396 | ---- |
397 | 397 | ||
398 | Once you've done that (don't forget to do a new 'make install', | 398 | Once you've done that (don't forget to do a new 'make install', |
399 | otherwise your script won't show up at run-time), 'perf trace -l' | 399 | otherwise your script won't show up at run-time), 'perf script -l' |
400 | should show a new entry for your script: | 400 | should show a new entry for your script: |
401 | 401 | ||
402 | ---- | 402 | ---- |
403 | root@tropicana:~# perf trace -l | 403 | root@tropicana:~# perf script -l |
404 | List of available trace scripts: | 404 | List of available trace scripts: |
405 | workqueue-stats workqueue stats (ins/exe/create/destroy) | 405 | workqueue-stats workqueue stats (ins/exe/create/destroy) |
406 | wakeup-latency system-wide min/max/avg wakeup latency | 406 | wakeup-latency system-wide min/max/avg wakeup latency |
@@ -409,19 +409,19 @@ List of available trace scripts: | |||
409 | syscall-counts system-wide syscall counts | 409 | syscall-counts system-wide syscall counts |
410 | ---- | 410 | ---- |
411 | 411 | ||
412 | You can now perform the record step via 'perf trace record': | 412 | You can now perform the record step via 'perf script record': |
413 | 413 | ||
414 | # perf trace record syscall-counts | 414 | # perf script record syscall-counts |
415 | 415 | ||
416 | and display the output using 'perf trace report': | 416 | and display the output using 'perf script report': |
417 | 417 | ||
418 | # perf trace report syscall-counts | 418 | # perf script report syscall-counts |
419 | 419 | ||
420 | STARTER SCRIPTS | 420 | STARTER SCRIPTS |
421 | --------------- | 421 | --------------- |
422 | 422 | ||
423 | You can quickly get started writing a script for a particular set of | 423 | You can quickly get started writing a script for a particular set of |
424 | trace data by generating a skeleton script using 'perf trace -g | 424 | trace data by generating a skeleton script using 'perf script -g |
425 | python' in the same directory as an existing perf.data trace file. | 425 | python' in the same directory as an existing perf.data trace file. |
426 | That will generate a starter script containing a handler for each of | 426 | That will generate a starter script containing a handler for each of |
427 | the event types in the trace file; it simply prints every available | 427 | the event types in the trace file; it simply prints every available |
@@ -430,13 +430,13 @@ field for each event in the trace file. | |||
430 | You can also look at the existing scripts in | 430 | You can also look at the existing scripts in |
431 | ~/libexec/perf-core/scripts/python for typical examples showing how to | 431 | ~/libexec/perf-core/scripts/python for typical examples showing how to |
432 | do basic things like aggregate event data, print results, etc. Also, | 432 | do basic things like aggregate event data, print results, etc. Also, |
433 | the check-perf-trace.py script, while not interesting for its results, | 433 | the check-perf-script.py script, while not interesting for its results, |
434 | attempts to exercise all of the main scripting features. | 434 | attempts to exercise all of the main scripting features. |
435 | 435 | ||
436 | EVENT HANDLERS | 436 | EVENT HANDLERS |
437 | -------------- | 437 | -------------- |
438 | 438 | ||
439 | When perf trace is invoked using a trace script, a user-defined | 439 | When perf script is invoked using a trace script, a user-defined |
440 | 'handler function' is called for each event in the trace. If there's | 440 | 'handler function' is called for each event in the trace. If there's |
441 | no handler function defined for a given event type, the event is | 441 | no handler function defined for a given event type, the event is |
442 | ignored (or passed to a 'trace_handled' function, see below) and the | 442 | ignored (or passed to a 'trace_handled' function, see below) and the |
@@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest. | |||
510 | SCRIPT LAYOUT | 510 | SCRIPT LAYOUT |
511 | ------------- | 511 | ------------- |
512 | 512 | ||
513 | Every perf trace Python script should start by setting up a Python | 513 | Every perf script Python script should start by setting up a Python |
514 | module search path and 'import'ing a few support modules (see module | 514 | module search path and 'import'ing a few support modules (see module |
515 | descriptions below): | 515 | descriptions below): |
516 | 516 | ||
@@ -519,7 +519,7 @@ descriptions below): | |||
519 | import sys | 519 | import sys |
520 | 520 | ||
521 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 521 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
522 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | 522 | '/scripts/python/perf-script-Util/lib/Perf/Trace') |
523 | 523 | ||
524 | from perf_trace_context import * | 524 | from perf_trace_context import * |
525 | from Core import * | 525 | from Core import * |
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs, | |||
559 | ---- | 559 | ---- |
560 | 560 | ||
561 | The remaining sections provide descriptions of each of the available | 561 | The remaining sections provide descriptions of each of the available |
562 | built-in perf trace Python modules and their associated functions. | 562 | built-in perf script Python modules and their associated functions. |
563 | 563 | ||
564 | AVAILABLE MODULES AND FUNCTIONS | 564 | AVAILABLE MODULES AND FUNCTIONS |
565 | ------------------------------- | 565 | ------------------------------- |
566 | 566 | ||
567 | The following sections describe the functions and variables available | 567 | The following sections describe the functions and variables available |
568 | via the various perf trace Python modules. To use the functions and | 568 | via the various perf script Python modules. To use the functions and |
569 | variables from the given module, add the corresponding 'from XXXX | 569 | variables from the given module, add the corresponding 'from XXXX |
570 | import' line to your perf trace script. | 570 | import' line to your perf script script. |
571 | 571 | ||
572 | Core.py Module | 572 | Core.py Module |
573 | ~~~~~~~~~~~~~~ | 573 | ~~~~~~~~~~~~~~ |
@@ -610,7 +610,7 @@ argument. | |||
610 | Util.py Module | 610 | Util.py Module |
611 | ~~~~~~~~~~~~~~ | 611 | ~~~~~~~~~~~~~~ |
612 | 612 | ||
613 | Various utility functions for use with perf trace: | 613 | Various utility functions for use with perf script: |
614 | 614 | ||
615 | nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair | 615 | nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair |
616 | nsecs_secs(nsecs) - returns whole secs portion given nsecs | 616 | nsecs_secs(nsecs) - returns whole secs portion given nsecs |
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace: | |||
620 | 620 | ||
621 | SEE ALSO | 621 | SEE ALSO |
622 | -------- | 622 | -------- |
623 | linkperf:perf-trace[1] | 623 | linkperf:perf-script[1] |
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt index 26aff6bf9e50..29ad94293cd2 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
@@ -1,71 +1,71 @@ | |||
1 | perf-trace(1) | 1 | perf-script(1) |
2 | ============= | 2 | ============= |
3 | 3 | ||
4 | NAME | 4 | NAME |
5 | ---- | 5 | ---- |
6 | perf-trace - Read perf.data (created by perf record) and display trace output | 6 | perf-script - Read perf.data (created by perf record) and display trace output |
7 | 7 | ||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf trace' [<options>] | 11 | 'perf script' [<options>] |
12 | 'perf trace' [<options>] record <script> [<record-options>] <command> | 12 | 'perf script' [<options>] record <script> [<record-options>] <command> |
13 | 'perf trace' [<options>] report <script> [script-args] | 13 | 'perf script' [<options>] report <script> [script-args] |
14 | 'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command> | 14 | 'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> |
15 | 'perf trace' [<options>] <top-script> [script-args] | 15 | 'perf script' [<options>] <top-script> [script-args] |
16 | 16 | ||
17 | DESCRIPTION | 17 | DESCRIPTION |
18 | ----------- | 18 | ----------- |
19 | This command reads the input file and displays the trace recorded. | 19 | This command reads the input file and displays the trace recorded. |
20 | 20 | ||
21 | There are several variants of perf trace: | 21 | There are several variants of perf script: |
22 | 22 | ||
23 | 'perf trace' to see a detailed trace of the workload that was | 23 | 'perf script' to see a detailed trace of the workload that was |
24 | recorded. | 24 | recorded. |
25 | 25 | ||
26 | You can also run a set of pre-canned scripts that aggregate and | 26 | You can also run a set of pre-canned scripts that aggregate and |
27 | summarize the raw trace data in various ways (the list of scripts is | 27 | summarize the raw trace data in various ways (the list of scripts is |
28 | available via 'perf trace -l'). The following variants allow you to | 28 | available via 'perf script -l'). The following variants allow you to |
29 | record and run those scripts: | 29 | record and run those scripts: |
30 | 30 | ||
31 | 'perf trace record <script> <command>' to record the events required | 31 | 'perf script record <script> <command>' to record the events required |
32 | for 'perf trace report'. <script> is the name displayed in the | 32 | for 'perf script report'. <script> is the name displayed in the |
33 | output of 'perf trace --list' i.e. the actual script name minus any | 33 | output of 'perf script --list' i.e. the actual script name minus any |
34 | language extension. If <command> is not specified, the events are | 34 | language extension. If <command> is not specified, the events are |
35 | recorded using the -a (system-wide) 'perf record' option. | 35 | recorded using the -a (system-wide) 'perf record' option. |
36 | 36 | ||
37 | 'perf trace report <script> [args]' to run and display the results | 37 | 'perf script report <script> [args]' to run and display the results |
38 | of <script>. <script> is the name displayed in the output of 'perf | 38 | of <script>. <script> is the name displayed in the output of 'perf |
39 | trace --list' i.e. the actual script name minus any language | 39 | trace --list' i.e. the actual script name minus any language |
40 | extension. The perf.data output from a previous run of 'perf trace | 40 | extension. The perf.data output from a previous run of 'perf script |
41 | record <script>' is used and should be present for this command to | 41 | record <script>' is used and should be present for this command to |
42 | succeed. [args] refers to the (mainly optional) args expected by | 42 | succeed. [args] refers to the (mainly optional) args expected by |
43 | the script. | 43 | the script. |
44 | 44 | ||
45 | 'perf trace <script> <required-script-args> <command>' to both | 45 | 'perf script <script> <required-script-args> <command>' to both |
46 | record the events required for <script> and to run the <script> | 46 | record the events required for <script> and to run the <script> |
47 | using 'live-mode' i.e. without writing anything to disk. <script> | 47 | using 'live-mode' i.e. without writing anything to disk. <script> |
48 | is the name displayed in the output of 'perf trace --list' i.e. the | 48 | is the name displayed in the output of 'perf script --list' i.e. the |
49 | actual script name minus any language extension. If <command> is | 49 | actual script name minus any language extension. If <command> is |
50 | not specified, the events are recorded using the -a (system-wide) | 50 | not specified, the events are recorded using the -a (system-wide) |
51 | 'perf record' option. If <script> has any required args, they | 51 | 'perf record' option. If <script> has any required args, they |
52 | should be specified before <command>. This mode doesn't allow for | 52 | should be specified before <command>. This mode doesn't allow for |
53 | optional script args to be specified; if optional script args are | 53 | optional script args to be specified; if optional script args are |
54 | desired, they can be specified using separate 'perf trace record' | 54 | desired, they can be specified using separate 'perf script record' |
55 | and 'perf trace report' commands, with the stdout of the record step | 55 | and 'perf script report' commands, with the stdout of the record step |
56 | piped to the stdin of the report script, using the '-o -' and '-i -' | 56 | piped to the stdin of the report script, using the '-o -' and '-i -' |
57 | options of the corresponding commands. | 57 | options of the corresponding commands. |
58 | 58 | ||
59 | 'perf trace <top-script>' to both record the events required for | 59 | 'perf script <top-script>' to both record the events required for |
60 | <top-script> and to run the <top-script> using 'live-mode' | 60 | <top-script> and to run the <top-script> using 'live-mode' |
61 | i.e. without writing anything to disk. <top-script> is the name | 61 | i.e. without writing anything to disk. <top-script> is the name |
62 | displayed in the output of 'perf trace --list' i.e. the actual | 62 | displayed in the output of 'perf script --list' i.e. the actual |
63 | script name minus any language extension; a <top-script> is defined | 63 | script name minus any language extension; a <top-script> is defined |
64 | as any script name ending with the string 'top'. | 64 | as any script name ending with the string 'top'. |
65 | 65 | ||
66 | [<record-options>] can be passed to the record steps of 'perf trace | 66 | [<record-options>] can be passed to the record steps of 'perf script |
67 | record' and 'live-mode' variants; this isn't possible however for | 67 | record' and 'live-mode' variants; this isn't possible however for |
68 | <top-script> 'live-mode' or 'perf trace report' variants. | 68 | <top-script> 'live-mode' or 'perf script report' variants. |
69 | 69 | ||
70 | See the 'SEE ALSO' section for links to language-specific | 70 | See the 'SEE ALSO' section for links to language-specific |
71 | information on how to write and run your own trace scripts. | 71 | information on how to write and run your own trace scripts. |
@@ -76,7 +76,7 @@ OPTIONS | |||
76 | Any command you can specify in a shell. | 76 | Any command you can specify in a shell. |
77 | 77 | ||
78 | -D:: | 78 | -D:: |
79 | --dump-raw-trace=:: | 79 | --dump-raw-script=:: |
80 | Display verbose dump of the trace data. | 80 | Display verbose dump of the trace data. |
81 | 81 | ||
82 | -L:: | 82 | -L:: |
@@ -95,7 +95,7 @@ OPTIONS | |||
95 | 95 | ||
96 | -g:: | 96 | -g:: |
97 | --gen-script=:: | 97 | --gen-script=:: |
98 | Generate perf-trace.[ext] starter script for given language, | 98 | Generate perf-script.[ext] starter script for given language, |
99 | using current perf.data. | 99 | using current perf.data. |
100 | 100 | ||
101 | -a:: | 101 | -a:: |
@@ -104,8 +104,15 @@ OPTIONS | |||
104 | normally don't - this option allows the latter to be run in | 104 | normally don't - this option allows the latter to be run in |
105 | system-wide mode. | 105 | system-wide mode. |
106 | 106 | ||
107 | -i:: | ||
108 | --input=:: | ||
109 | Input file name. | ||
110 | |||
111 | -d:: | ||
112 | --debug-mode:: | ||
113 | Do various checks like samples ordering and lost events. | ||
107 | 114 | ||
108 | SEE ALSO | 115 | SEE ALSO |
109 | -------- | 116 | -------- |
110 | linkperf:perf-record[1], linkperf:perf-trace-perl[1], | 117 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
111 | linkperf:perf-trace-python[1] | 118 | linkperf:perf-script-python[1] |
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4b3a2d46b437..b6da7affbbee 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> | 11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command> |
12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] | 12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] |
13 | 13 | ||
14 | DESCRIPTION | 14 | DESCRIPTION |
15 | ----------- | 15 | ----------- |
@@ -35,24 +35,54 @@ OPTIONS | |||
35 | child tasks do not inherit counters | 35 | child tasks do not inherit counters |
36 | -p:: | 36 | -p:: |
37 | --pid=<pid>:: | 37 | --pid=<pid>:: |
38 | stat events on existing pid | 38 | stat events on existing process id |
39 | |||
40 | -t:: | ||
41 | --tid=<tid>:: | ||
42 | stat events on existing thread id | ||
43 | |||
39 | 44 | ||
40 | -a:: | 45 | -a:: |
41 | system-wide collection | 46 | --all-cpus:: |
47 | system-wide collection from all CPUs | ||
42 | 48 | ||
43 | -c:: | 49 | -c:: |
44 | scale counter values | 50 | --scale:: |
51 | scale/normalize counter values | ||
52 | |||
53 | -r:: | ||
54 | --repeat=<n>:: | ||
55 | repeat command and print average + stddev (max: 100) | ||
45 | 56 | ||
46 | -B:: | 57 | -B:: |
58 | --big-num:: | ||
47 | print large numbers with thousands' separators according to locale | 59 | print large numbers with thousands' separators according to locale |
48 | 60 | ||
49 | -C:: | 61 | -C:: |
50 | --cpu=:: | 62 | --cpu=:: |
51 | Count only on the list of cpus provided. Multiple CPUs can be provided as a | 63 | Count only on the list of CPUs provided. Multiple CPUs can be provided as a |
52 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 64 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
53 | In per-thread mode, this option is ignored. The -a option is still necessary | 65 | In per-thread mode, this option is ignored. The -a option is still necessary |
54 | to activate system-wide monitoring. Default is to count on all CPUs. | 66 | to activate system-wide monitoring. Default is to count on all CPUs. |
55 | 67 | ||
68 | -A:: | ||
69 | --no-aggr:: | ||
70 | Do not aggregate counts across all monitored CPUs in system-wide mode (-a). | ||
71 | This option is only valid in system-wide mode. | ||
72 | |||
73 | -n:: | ||
74 | --null:: | ||
75 | null run - don't start any counters | ||
76 | |||
77 | -v:: | ||
78 | --verbose:: | ||
79 | be more verbose (show counter open errors, etc) | ||
80 | |||
81 | -x SEP:: | ||
82 | --field-separator SEP:: | ||
83 | print counts using a CSV-style output to make it easy to import directly into | ||
84 | spreadsheets. Columns are separated by the string specified in SEP. | ||
85 | |||
56 | EXAMPLES | 86 | EXAMPLES |
57 | -------- | 87 | -------- |
58 | 88 | ||
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 1c4b5f5b7f71..2c3b462f64b0 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt | |||
@@ -12,7 +12,7 @@ SYNOPSIS | |||
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command does assorted sanity tests, initially thru linked routines but | 15 | This command does assorted sanity tests, initially through linked routines but |
16 | also will look for a directory with more tests in the form of scripts. | 16 | also will look for a directory with more tests in the form of scripts. |
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 1f9687663f2a..f6eb1cdafb77 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -12,7 +12,7 @@ SYNOPSIS | |||
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
15 | This command generates and displays a performance counter profile in realtime. | 15 | This command generates and displays a performance counter profile in real time. |
16 | 16 | ||
17 | 17 | ||
18 | OPTIONS | 18 | OPTIONS |
@@ -27,8 +27,8 @@ OPTIONS | |||
27 | 27 | ||
28 | -C <cpu-list>:: | 28 | -C <cpu-list>:: |
29 | --cpu=<cpu>:: | 29 | --cpu=<cpu>:: |
30 | Monitor only on the list of cpus provided. Multiple CPUs can be provided as a | 30 | Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a |
31 | comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. | 31 | comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. |
32 | Default is to monitor all CPUS. | 32 | Default is to monitor all CPUS. |
33 | 33 | ||
34 | -d <seconds>:: | 34 | -d <seconds>:: |
@@ -50,6 +50,10 @@ Default is to monitor all CPUS. | |||
50 | --count-filter=<count>:: | 50 | --count-filter=<count>:: |
51 | Only display functions with more events than this. | 51 | Only display functions with more events than this. |
52 | 52 | ||
53 | -g:: | ||
54 | --group:: | ||
55 | Put the counters into a counter group. | ||
56 | |||
53 | -F <freq>:: | 57 | -F <freq>:: |
54 | --freq=<freq>:: | 58 | --freq=<freq>:: |
55 | Profile at this frequency. | 59 | Profile at this frequency. |
@@ -68,7 +72,11 @@ Default is to monitor all CPUS. | |||
68 | 72 | ||
69 | -p <pid>:: | 73 | -p <pid>:: |
70 | --pid=<pid>:: | 74 | --pid=<pid>:: |
71 | Profile events on existing pid. | 75 | Profile events on existing Process ID. |
76 | |||
77 | -t <tid>:: | ||
78 | --tid=<tid>:: | ||
79 | Profile events on existing thread ID. | ||
72 | 80 | ||
73 | -r <priority>:: | 81 | -r <priority>:: |
74 | --realtime=<priority>:: | 82 | --realtime=<priority>:: |
@@ -78,6 +86,18 @@ Default is to monitor all CPUS. | |||
78 | --sym-annotate=<symbol>:: | 86 | --sym-annotate=<symbol>:: |
79 | Annotate this symbol. | 87 | Annotate this symbol. |
80 | 88 | ||
89 | -K:: | ||
90 | --hide_kernel_symbols:: | ||
91 | Hide kernel symbols. | ||
92 | |||
93 | -U:: | ||
94 | --hide_user_symbols:: | ||
95 | Hide user symbols. | ||
96 | |||
97 | -D:: | ||
98 | --dump-symtab:: | ||
99 | Dump the symbol table used for profiling. | ||
100 | |||
81 | -v:: | 101 | -v:: |
82 | --verbose:: | 102 | --verbose:: |
83 | Be more verbose (show counter open errors, etc). | 103 | Be more verbose (show counter open errors, etc). |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8c7fc0c8f0b8..c12659d8cb26 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -7,6 +7,7 @@ include/linux/stringify.h | |||
7 | lib/rbtree.c | 7 | lib/rbtree.c |
8 | include/linux/swab.h | 8 | include/linux/swab.h |
9 | arch/*/include/asm/unistd*.h | 9 | arch/*/include/asm/unistd*.h |
10 | arch/*/lib/memcpy*.S | ||
10 | include/linux/poison.h | 11 | include/linux/poison.h |
11 | include/linux/magic.h | 12 | include/linux/magic.h |
12 | include/linux/hw_breakpoint.h | 13 | include/linux/hw_breakpoint.h |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d1db0f676a4b..ac6692cf5508 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386) | |||
185 | ARCH := x86 | 185 | ARCH := x86 |
186 | endif | 186 | endif |
187 | ifeq ($(ARCH),x86_64) | 187 | ifeq ($(ARCH),x86_64) |
188 | RAW_ARCH := x86_64 | ||
188 | ARCH := x86 | 189 | ARCH := x86 |
190 | ARCH_CFLAGS := -DARCH_X86_64 | ||
191 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S | ||
189 | endif | 192 | endif |
190 | 193 | ||
191 | # CFLAGS and LDFLAGS are for the users to override from the command line. | 194 | # CFLAGS and LDFLAGS are for the users to override from the command line. |
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h | |||
375 | LIB_H += util/include/linux/rbtree.h | 378 | LIB_H += util/include/linux/rbtree.h |
376 | LIB_H += util/include/linux/string.h | 379 | LIB_H += util/include/linux/string.h |
377 | LIB_H += util/include/linux/types.h | 380 | LIB_H += util/include/linux/types.h |
381 | LIB_H += util/include/linux/linkage.h | ||
378 | LIB_H += util/include/asm/asm-offsets.h | 382 | LIB_H += util/include/asm/asm-offsets.h |
379 | LIB_H += util/include/asm/bug.h | 383 | LIB_H += util/include/asm/bug.h |
380 | LIB_H += util/include/asm/byteorder.h | 384 | LIB_H += util/include/asm/byteorder.h |
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h | |||
383 | LIB_H += util/include/asm/system.h | 387 | LIB_H += util/include/asm/system.h |
384 | LIB_H += util/include/asm/uaccess.h | 388 | LIB_H += util/include/asm/uaccess.h |
385 | LIB_H += util/include/dwarf-regs.h | 389 | LIB_H += util/include/dwarf-regs.h |
390 | LIB_H += util/include/asm/dwarf2.h | ||
391 | LIB_H += util/include/asm/cpufeature.h | ||
386 | LIB_H += perf.h | 392 | LIB_H += perf.h |
387 | LIB_H += util/cache.h | 393 | LIB_H += util/cache.h |
388 | LIB_H += util/callchain.h | 394 | LIB_H += util/callchain.h |
@@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h | |||
417 | LIB_H += util/probe-event.h | 423 | LIB_H += util/probe-event.h |
418 | LIB_H += util/pstack.h | 424 | LIB_H += util/pstack.h |
419 | LIB_H += util/cpumap.h | 425 | LIB_H += util/cpumap.h |
426 | LIB_H += $(ARCH_INCLUDE) | ||
420 | 427 | ||
421 | LIB_OBJS += $(OUTPUT)util/abspath.o | 428 | LIB_OBJS += $(OUTPUT)util/abspath.o |
422 | LIB_OBJS += $(OUTPUT)util/alias.o | 429 | LIB_OBJS += $(OUTPUT)util/alias.o |
@@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o | |||
472 | # Benchmark modules | 479 | # Benchmark modules |
473 | BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o | 480 | BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o |
474 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o | 481 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o |
482 | ifeq ($(RAW_ARCH),x86_64) | ||
483 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | ||
484 | endif | ||
475 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 485 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
476 | 486 | ||
477 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o | 487 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o |
@@ -485,7 +495,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o | |||
485 | BUILTIN_OBJS += $(OUTPUT)builtin-stat.o | 495 | BUILTIN_OBJS += $(OUTPUT)builtin-stat.o |
486 | BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o | 496 | BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o |
487 | BUILTIN_OBJS += $(OUTPUT)builtin-top.o | 497 | BUILTIN_OBJS += $(OUTPUT)builtin-top.o |
488 | BUILTIN_OBJS += $(OUTPUT)builtin-trace.o | 498 | BUILTIN_OBJS += $(OUTPUT)builtin-script.o |
489 | BUILTIN_OBJS += $(OUTPUT)builtin-probe.o | 499 | BUILTIN_OBJS += $(OUTPUT)builtin-probe.o |
490 | BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o | 500 | BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o |
491 | BUILTIN_OBJS += $(OUTPUT)builtin-lock.o | 501 | BUILTIN_OBJS += $(OUTPUT)builtin-lock.o |
@@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE) | |||
507 | -include config.mak | 517 | -include config.mak |
508 | 518 | ||
509 | ifndef NO_DWARF | 519 | ifndef NO_DWARF |
510 | FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) | 520 | FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) |
511 | ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) | 521 | ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) |
512 | msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); | 522 | msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); |
513 | NO_DWARF := 1 | 523 | NO_DWARF := 1 |
@@ -554,7 +564,7 @@ ifndef NO_DWARF | |||
554 | ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) | 564 | ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) |
555 | msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); | 565 | msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); |
556 | else | 566 | else |
557 | BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT | 567 | BASIC_CFLAGS += -DDWARF_SUPPORT |
558 | EXTLIBS += -lelf -ldw | 568 | EXTLIBS += -lelf -ldw |
559 | LIB_OBJS += $(OUTPUT)util/probe-finder.o | 569 | LIB_OBJS += $(OUTPUT)util/probe-finder.o |
560 | endif # PERF_HAVE_DWARF_REGS | 570 | endif # PERF_HAVE_DWARF_REGS |
@@ -891,13 +901,14 @@ prefix_SQ = $(subst ','\'',$(prefix)) | |||
891 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) | 901 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) |
892 | PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) | 902 | PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) |
893 | 903 | ||
894 | LIBS = $(PERFLIBS) $(EXTLIBS) | 904 | LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) |
895 | 905 | ||
896 | BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ | 906 | BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ |
897 | $(COMPAT_CFLAGS) | 907 | $(COMPAT_CFLAGS) |
898 | LIB_OBJS += $(COMPAT_OBJS) | 908 | LIB_OBJS += $(COMPAT_OBJS) |
899 | 909 | ||
900 | ALL_CFLAGS += $(BASIC_CFLAGS) | 910 | ALL_CFLAGS += $(BASIC_CFLAGS) |
911 | ALL_CFLAGS += $(ARCH_CFLAGS) | ||
901 | ALL_LDFLAGS += $(BASIC_LDFLAGS) | 912 | ALL_LDFLAGS += $(BASIC_LDFLAGS) |
902 | 913 | ||
903 | export TAR INSTALL DESTDIR SHELL_PATH | 914 | export TAR INSTALL DESTDIR SHELL_PATH |
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h new file mode 100644 index 000000000000..a72e36cb5394 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-arch.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | #ifdef ARCH_X86_64 | ||
3 | |||
4 | #define MEMCPY_FN(fn, name, desc) \ | ||
5 | extern void *fn(void *, const void *, size_t); | ||
6 | |||
7 | #include "mem-memcpy-x86-64-asm-def.h" | ||
8 | |||
9 | #undef MEMCPY_FN | ||
10 | |||
11 | #endif | ||
12 | |||
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h new file mode 100644 index 000000000000..d588b87696fc --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h | |||
@@ -0,0 +1,4 @@ | |||
1 | |||
2 | MEMCPY_FN(__memcpy, | ||
3 | "x86-64-unrolled", | ||
4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") | ||
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S new file mode 100644 index 000000000000..a57b66e853c2 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S | |||
@@ -0,0 +1,2 @@ | |||
1 | |||
2 | #include "../../../arch/x86/lib/memcpy_64.S" | ||
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 38dae7465142..db82021f4b91 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "../util/parse-options.h" | 12 | #include "../util/parse-options.h" |
13 | #include "../util/header.h" | 13 | #include "../util/header.h" |
14 | #include "bench.h" | 14 | #include "bench.h" |
15 | #include "mem-memcpy-arch.h" | ||
15 | 16 | ||
16 | #include <stdio.h> | 17 | #include <stdio.h> |
17 | #include <stdlib.h> | 18 | #include <stdlib.h> |
@@ -23,8 +24,10 @@ | |||
23 | 24 | ||
24 | static const char *length_str = "1MB"; | 25 | static const char *length_str = "1MB"; |
25 | static const char *routine = "default"; | 26 | static const char *routine = "default"; |
26 | static bool use_clock = false; | 27 | static bool use_clock; |
27 | static int clock_fd; | 28 | static int clock_fd; |
29 | static bool only_prefault; | ||
30 | static bool no_prefault; | ||
28 | 31 | ||
29 | static const struct option options[] = { | 32 | static const struct option options[] = { |
30 | OPT_STRING('l', "length", &length_str, "1MB", | 33 | OPT_STRING('l', "length", &length_str, "1MB", |
@@ -34,19 +37,33 @@ static const struct option options[] = { | |||
34 | "Specify routine to copy"), | 37 | "Specify routine to copy"), |
35 | OPT_BOOLEAN('c', "clock", &use_clock, | 38 | OPT_BOOLEAN('c', "clock", &use_clock, |
36 | "Use CPU clock for measuring"), | 39 | "Use CPU clock for measuring"), |
40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
41 | "Show only the result with page faults before memcpy()"), | ||
42 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
43 | "Show only the result without page faults before memcpy()"), | ||
37 | OPT_END() | 44 | OPT_END() |
38 | }; | 45 | }; |
39 | 46 | ||
47 | typedef void *(*memcpy_t)(void *, const void *, size_t); | ||
48 | |||
40 | struct routine { | 49 | struct routine { |
41 | const char *name; | 50 | const char *name; |
42 | const char *desc; | 51 | const char *desc; |
43 | void * (*fn)(void *dst, const void *src, size_t len); | 52 | memcpy_t fn; |
44 | }; | 53 | }; |
45 | 54 | ||
46 | struct routine routines[] = { | 55 | struct routine routines[] = { |
47 | { "default", | 56 | { "default", |
48 | "Default memcpy() provided by glibc", | 57 | "Default memcpy() provided by glibc", |
49 | memcpy }, | 58 | memcpy }, |
59 | #ifdef ARCH_X86_64 | ||
60 | |||
61 | #define MEMCPY_FN(fn, name, desc) { name, desc, fn }, | ||
62 | #include "mem-memcpy-x86-64-asm-def.h" | ||
63 | #undef MEMCPY_FN | ||
64 | |||
65 | #endif | ||
66 | |||
50 | { NULL, | 67 | { NULL, |
51 | NULL, | 68 | NULL, |
52 | NULL } | 69 | NULL } |
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts) | |||
89 | (double)ts->tv_usec / (double)1000000; | 106 | (double)ts->tv_usec / (double)1000000; |
90 | } | 107 | } |
91 | 108 | ||
109 | static void alloc_mem(void **dst, void **src, size_t length) | ||
110 | { | ||
111 | *dst = zalloc(length); | ||
112 | if (!dst) | ||
113 | die("memory allocation failed - maybe length is too large?\n"); | ||
114 | |||
115 | *src = zalloc(length); | ||
116 | if (!src) | ||
117 | die("memory allocation failed - maybe length is too large?\n"); | ||
118 | } | ||
119 | |||
120 | static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | ||
121 | { | ||
122 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
123 | void *src = NULL, *dst = NULL; | ||
124 | |||
125 | alloc_mem(&src, &dst, len); | ||
126 | |||
127 | if (prefault) | ||
128 | fn(dst, src, len); | ||
129 | |||
130 | clock_start = get_clock(); | ||
131 | fn(dst, src, len); | ||
132 | clock_end = get_clock(); | ||
133 | |||
134 | free(src); | ||
135 | free(dst); | ||
136 | return clock_end - clock_start; | ||
137 | } | ||
138 | |||
139 | static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | ||
140 | { | ||
141 | struct timeval tv_start, tv_end, tv_diff; | ||
142 | void *src = NULL, *dst = NULL; | ||
143 | |||
144 | alloc_mem(&src, &dst, len); | ||
145 | |||
146 | if (prefault) | ||
147 | fn(dst, src, len); | ||
148 | |||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
150 | fn(dst, src, len); | ||
151 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
152 | |||
153 | timersub(&tv_end, &tv_start, &tv_diff); | ||
154 | |||
155 | free(src); | ||
156 | free(dst); | ||
157 | return (double)((double)len / timeval2double(&tv_diff)); | ||
158 | } | ||
159 | |||
160 | #define pf (no_prefault ? 0 : 1) | ||
161 | |||
162 | #define print_bps(x) do { \ | ||
163 | if (x < K) \ | ||
164 | printf(" %14lf B/Sec", x); \ | ||
165 | else if (x < K * K) \ | ||
166 | printf(" %14lfd KB/Sec", x / K); \ | ||
167 | else if (x < K * K * K) \ | ||
168 | printf(" %14lf MB/Sec", x / K / K); \ | ||
169 | else \ | ||
170 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
171 | } while (0) | ||
172 | |||
92 | int bench_mem_memcpy(int argc, const char **argv, | 173 | int bench_mem_memcpy(int argc, const char **argv, |
93 | const char *prefix __used) | 174 | const char *prefix __used) |
94 | { | 175 | { |
95 | int i; | 176 | int i; |
96 | void *dst, *src; | 177 | size_t len; |
97 | size_t length; | 178 | double result_bps[2]; |
98 | double bps = 0.0; | 179 | u64 result_clock[2]; |
99 | struct timeval tv_start, tv_end, tv_diff; | ||
100 | u64 clock_start, clock_end, clock_diff; | ||
101 | 180 | ||
102 | clock_start = clock_end = clock_diff = 0ULL; | ||
103 | argc = parse_options(argc, argv, options, | 181 | argc = parse_options(argc, argv, options, |
104 | bench_mem_memcpy_usage, 0); | 182 | bench_mem_memcpy_usage, 0); |
105 | 183 | ||
106 | tv_diff.tv_sec = 0; | 184 | if (use_clock) |
107 | tv_diff.tv_usec = 0; | 185 | init_clock(); |
108 | length = (size_t)perf_atoll((char *)length_str); | 186 | |
187 | len = (size_t)perf_atoll((char *)length_str); | ||
109 | 188 | ||
110 | if ((s64)length <= 0) { | 189 | result_clock[0] = result_clock[1] = 0ULL; |
190 | result_bps[0] = result_bps[1] = 0.0; | ||
191 | |||
192 | if ((s64)len <= 0) { | ||
111 | fprintf(stderr, "Invalid length:%s\n", length_str); | 193 | fprintf(stderr, "Invalid length:%s\n", length_str); |
112 | return 1; | 194 | return 1; |
113 | } | 195 | } |
114 | 196 | ||
197 | /* same to without specifying either of prefault and no-prefault */ | ||
198 | if (only_prefault && no_prefault) | ||
199 | only_prefault = no_prefault = false; | ||
200 | |||
115 | for (i = 0; routines[i].name; i++) { | 201 | for (i = 0; routines[i].name; i++) { |
116 | if (!strcmp(routines[i].name, routine)) | 202 | if (!strcmp(routines[i].name, routine)) |
117 | break; | 203 | break; |
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv, | |||
126 | return 1; | 212 | return 1; |
127 | } | 213 | } |
128 | 214 | ||
129 | dst = zalloc(length); | 215 | if (bench_format == BENCH_FORMAT_DEFAULT) |
130 | if (!dst) | 216 | printf("# Copying %s Bytes ...\n\n", length_str); |
131 | die("memory allocation failed - maybe length is too large?\n"); | ||
132 | |||
133 | src = zalloc(length); | ||
134 | if (!src) | ||
135 | die("memory allocation failed - maybe length is too large?\n"); | ||
136 | |||
137 | if (bench_format == BENCH_FORMAT_DEFAULT) { | ||
138 | printf("# Copying %s Bytes from %p to %p ...\n\n", | ||
139 | length_str, src, dst); | ||
140 | } | ||
141 | |||
142 | if (use_clock) { | ||
143 | init_clock(); | ||
144 | clock_start = get_clock(); | ||
145 | } else { | ||
146 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
147 | } | ||
148 | |||
149 | routines[i].fn(dst, src, length); | ||
150 | 217 | ||
151 | if (use_clock) { | 218 | if (!only_prefault && !no_prefault) { |
152 | clock_end = get_clock(); | 219 | /* show both of results */ |
153 | clock_diff = clock_end - clock_start; | 220 | if (use_clock) { |
221 | result_clock[0] = | ||
222 | do_memcpy_clock(routines[i].fn, len, false); | ||
223 | result_clock[1] = | ||
224 | do_memcpy_clock(routines[i].fn, len, true); | ||
225 | } else { | ||
226 | result_bps[0] = | ||
227 | do_memcpy_gettimeofday(routines[i].fn, | ||
228 | len, false); | ||
229 | result_bps[1] = | ||
230 | do_memcpy_gettimeofday(routines[i].fn, | ||
231 | len, true); | ||
232 | } | ||
154 | } else { | 233 | } else { |
155 | BUG_ON(gettimeofday(&tv_end, NULL)); | 234 | if (use_clock) { |
156 | timersub(&tv_end, &tv_start, &tv_diff); | 235 | result_clock[pf] = |
157 | bps = (double)((double)length / timeval2double(&tv_diff)); | 236 | do_memcpy_clock(routines[i].fn, |
237 | len, only_prefault); | ||
238 | } else { | ||
239 | result_bps[pf] = | ||
240 | do_memcpy_gettimeofday(routines[i].fn, | ||
241 | len, only_prefault); | ||
242 | } | ||
158 | } | 243 | } |
159 | 244 | ||
160 | switch (bench_format) { | 245 | switch (bench_format) { |
161 | case BENCH_FORMAT_DEFAULT: | 246 | case BENCH_FORMAT_DEFAULT: |
162 | if (use_clock) { | 247 | if (!only_prefault && !no_prefault) { |
163 | printf(" %14lf Clock/Byte\n", | 248 | if (use_clock) { |
164 | (double)clock_diff / (double)length); | 249 | printf(" %14lf Clock/Byte\n", |
165 | } else { | 250 | (double)result_clock[0] |
166 | if (bps < K) | 251 | / (double)len); |
167 | printf(" %14lf B/Sec\n", bps); | 252 | printf(" %14lf Clock/Byte (with prefault)\n", |
168 | else if (bps < K * K) | 253 | (double)result_clock[1] |
169 | printf(" %14lfd KB/Sec\n", bps / 1024); | 254 | / (double)len); |
170 | else if (bps < K * K * K) | 255 | } else { |
171 | printf(" %14lf MB/Sec\n", bps / 1024 / 1024); | 256 | print_bps(result_bps[0]); |
172 | else { | 257 | printf("\n"); |
173 | printf(" %14lf GB/Sec\n", | 258 | print_bps(result_bps[1]); |
174 | bps / 1024 / 1024 / 1024); | 259 | printf(" (with prefault)\n"); |
175 | } | 260 | } |
261 | } else { | ||
262 | if (use_clock) { | ||
263 | printf(" %14lf Clock/Byte", | ||
264 | (double)result_clock[pf] | ||
265 | / (double)len); | ||
266 | } else | ||
267 | print_bps(result_bps[pf]); | ||
268 | |||
269 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
176 | } | 270 | } |
177 | break; | 271 | break; |
178 | case BENCH_FORMAT_SIMPLE: | 272 | case BENCH_FORMAT_SIMPLE: |
179 | if (use_clock) { | 273 | if (!only_prefault && !no_prefault) { |
180 | printf("%14lf\n", | 274 | if (use_clock) { |
181 | (double)clock_diff / (double)length); | 275 | printf("%lf %lf\n", |
182 | } else | 276 | (double)result_clock[0] / (double)len, |
183 | printf("%lf\n", bps); | 277 | (double)result_clock[1] / (double)len); |
278 | } else { | ||
279 | printf("%lf %lf\n", | ||
280 | result_bps[0], result_bps[1]); | ||
281 | } | ||
282 | } else { | ||
283 | if (use_clock) { | ||
284 | printf("%lf\n", (double)result_clock[pf] | ||
285 | / (double)len); | ||
286 | } else | ||
287 | printf("%lf\n", result_bps[pf]); | ||
288 | } | ||
184 | break; | 289 | break; |
185 | default: | 290 | default: |
186 | /* reaching this means there's some disaster: */ | 291 | /* reaching this means there's some disaster: */ |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6d5604d8df95..569a2761b90a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) | |||
58 | return hist_entry__inc_addr_samples(he, al->addr); | 58 | return hist_entry__inc_addr_samples(he, al->addr); |
59 | } | 59 | } |
60 | 60 | ||
61 | static int process_sample_event(event_t *event, struct perf_session *session) | 61 | static int process_sample_event(event_t *event, struct sample_data *sample, |
62 | struct perf_session *session) | ||
62 | { | 63 | { |
63 | struct addr_location al; | 64 | struct addr_location al; |
64 | struct sample_data data; | ||
65 | 65 | ||
66 | if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { | 66 | if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { |
67 | pr_warning("problem processing %d event, skipping it.\n", | 67 | pr_warning("problem processing %d event, skipping it.\n", |
68 | event->header.type); | 68 | event->header.type); |
69 | return -1; | 69 | return -1; |
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index fca1d4402910..5e1a043aae03 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c | |||
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self, | |||
30 | return -ENOMEM; | 30 | return -ENOMEM; |
31 | } | 31 | } |
32 | 32 | ||
33 | static int diff__process_sample_event(event_t *event, struct perf_session *session) | 33 | static int diff__process_sample_event(event_t *event, |
34 | struct sample_data *sample, | ||
35 | struct perf_session *session) | ||
34 | { | 36 | { |
35 | struct addr_location al; | 37 | struct addr_location al; |
36 | struct sample_data data = { .period = 1, }; | ||
37 | 38 | ||
38 | if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { | 39 | if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { |
39 | pr_warning("problem processing %d event, skipping it.\n", | 40 | pr_warning("problem processing %d event, skipping it.\n", |
40 | event->header.type); | 41 | event->header.type); |
41 | return -1; | 42 | return -1; |
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi | |||
44 | if (al.filtered || al.sym == NULL) | 45 | if (al.filtered || al.sym == NULL) |
45 | return 0; | 46 | return 0; |
46 | 47 | ||
47 | if (hists__add_entry(&session->hists, &al, data.period)) { | 48 | if (hists__add_entry(&session->hists, &al, sample->period)) { |
48 | pr_warning("problem incrementing symbol period, skipping event\n"); | 49 | pr_warning("problem incrementing symbol period, skipping event\n"); |
49 | return -1; | 50 | return -1; |
50 | } | 51 | } |
51 | 52 | ||
52 | session->hists.stats.total_period += data.period; | 53 | session->hists.stats.total_period += sample->period; |
53 | return 0; | 54 | return 0; |
54 | } | 55 | } |
55 | 56 | ||
@@ -173,7 +174,7 @@ static const char * const diff_usage[] = { | |||
173 | static const struct option options[] = { | 174 | static const struct option options[] = { |
174 | OPT_INCR('v', "verbose", &verbose, | 175 | OPT_INCR('v', "verbose", &verbose, |
175 | "be more verbose (show symbol address, etc)"), | 176 | "be more verbose (show symbol address, etc)"), |
176 | OPT_BOOLEAN('m', "displacement", &show_displacement, | 177 | OPT_BOOLEAN('M', "displacement", &show_displacement, |
177 | "Show position displacement relative to baseline"), | 178 | "Show position displacement relative to baseline"), |
178 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 179 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
179 | "dump raw trace in ASCII"), | 180 | "dump raw trace in ASCII"), |
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8e3e47b064ce..4b66b8579410 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c | |||
@@ -16,8 +16,8 @@ | |||
16 | static char const *input_name = "-"; | 16 | static char const *input_name = "-"; |
17 | static bool inject_build_ids; | 17 | static bool inject_build_ids; |
18 | 18 | ||
19 | static int event__repipe(event_t *event __used, | 19 | static int event__repipe_synth(event_t *event, |
20 | struct perf_session *session __used) | 20 | struct perf_session *session __used) |
21 | { | 21 | { |
22 | uint32_t size; | 22 | uint32_t size; |
23 | void *buf = event; | 23 | void *buf = event; |
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used, | |||
36 | return 0; | 36 | return 0; |
37 | } | 37 | } |
38 | 38 | ||
39 | static int event__repipe_mmap(event_t *self, struct perf_session *session) | 39 | static int event__repipe(event_t *event, struct sample_data *sample __used, |
40 | struct perf_session *session) | ||
41 | { | ||
42 | return event__repipe_synth(event, session); | ||
43 | } | ||
44 | |||
45 | static int event__repipe_mmap(event_t *self, struct sample_data *sample, | ||
46 | struct perf_session *session) | ||
40 | { | 47 | { |
41 | int err; | 48 | int err; |
42 | 49 | ||
43 | err = event__process_mmap(self, session); | 50 | err = event__process_mmap(self, sample, session); |
44 | event__repipe(self, session); | 51 | event__repipe(self, sample, session); |
45 | 52 | ||
46 | return err; | 53 | return err; |
47 | } | 54 | } |
48 | 55 | ||
49 | static int event__repipe_task(event_t *self, struct perf_session *session) | 56 | static int event__repipe_task(event_t *self, struct sample_data *sample, |
57 | struct perf_session *session) | ||
50 | { | 58 | { |
51 | int err; | 59 | int err; |
52 | 60 | ||
53 | err = event__process_task(self, session); | 61 | err = event__process_task(self, sample, session); |
54 | event__repipe(self, session); | 62 | event__repipe(self, sample, session); |
55 | 63 | ||
56 | return err; | 64 | return err; |
57 | } | 65 | } |
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self, | |||
61 | { | 69 | { |
62 | int err; | 70 | int err; |
63 | 71 | ||
64 | event__repipe(self, session); | 72 | event__repipe_synth(self, session); |
65 | err = event__process_tracing_data(self, session); | 73 | err = event__process_tracing_data(self, session); |
66 | 74 | ||
67 | return err; | 75 | return err; |
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) | |||
111 | return 0; | 119 | return 0; |
112 | } | 120 | } |
113 | 121 | ||
114 | static int event__inject_buildid(event_t *event, struct perf_session *session) | 122 | static int event__inject_buildid(event_t *event, struct sample_data *sample, |
123 | struct perf_session *session) | ||
115 | { | 124 | { |
116 | struct addr_location al; | 125 | struct addr_location al; |
117 | struct thread *thread; | 126 | struct thread *thread; |
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session) | |||
146 | } | 155 | } |
147 | 156 | ||
148 | repipe: | 157 | repipe: |
149 | event__repipe(event, session); | 158 | event__repipe(event, sample, session); |
150 | return 0; | 159 | return 0; |
151 | } | 160 | } |
152 | 161 | ||
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = { | |||
160 | .read = event__repipe, | 169 | .read = event__repipe, |
161 | .throttle = event__repipe, | 170 | .throttle = event__repipe, |
162 | .unthrottle = event__repipe, | 171 | .unthrottle = event__repipe, |
163 | .attr = event__repipe, | 172 | .attr = event__repipe_synth, |
164 | .event_type = event__repipe, | 173 | .event_type = event__repipe_synth, |
165 | .tracing_data = event__repipe, | 174 | .tracing_data = event__repipe_synth, |
166 | .build_id = event__repipe, | 175 | .build_id = event__repipe_synth, |
167 | }; | 176 | }; |
168 | 177 | ||
169 | extern volatile int session_done; | 178 | extern volatile int session_done; |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 31f60a2535e0..c9620ff6496f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data, | |||
304 | } | 304 | } |
305 | } | 305 | } |
306 | 306 | ||
307 | static int process_sample_event(event_t *event, struct perf_session *session) | 307 | static int process_sample_event(event_t *event, struct sample_data *sample, |
308 | struct perf_session *session) | ||
308 | { | 309 | { |
309 | struct sample_data data; | 310 | struct thread *thread = perf_session__findnew(session, event->ip.pid); |
310 | struct thread *thread; | ||
311 | 311 | ||
312 | memset(&data, 0, sizeof(data)); | ||
313 | data.time = -1; | ||
314 | data.cpu = -1; | ||
315 | data.period = 1; | ||
316 | |||
317 | event__parse_sample(event, session->sample_type, &data); | ||
318 | |||
319 | dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, | ||
320 | data.pid, data.tid, data.ip, data.period); | ||
321 | |||
322 | thread = perf_session__findnew(session, event->ip.pid); | ||
323 | if (thread == NULL) { | 312 | if (thread == NULL) { |
324 | pr_debug("problem processing %d event, skipping it.\n", | 313 | pr_debug("problem processing %d event, skipping it.\n", |
325 | event->header.type); | 314 | event->header.type); |
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
328 | 317 | ||
329 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 318 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
330 | 319 | ||
331 | process_raw_event(event, data.raw_data, data.cpu, | 320 | process_raw_event(event, sample->raw_data, sample->cpu, |
332 | data.time, thread); | 321 | sample->time, thread); |
333 | 322 | ||
334 | return 0; | 323 | return 0; |
335 | } | 324 | } |
@@ -747,6 +736,9 @@ static int __cmd_record(int argc, const char **argv) | |||
747 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 736 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
748 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 737 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
749 | 738 | ||
739 | if (rec_argv == NULL) | ||
740 | return -ENOMEM; | ||
741 | |||
750 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 742 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
751 | rec_argv[i] = strdup(record_args[i]); | 743 | rec_argv[i] = strdup(record_args[i]); |
752 | 744 | ||
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 821c1586a22b..b41b4492b1cc 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
@@ -834,22 +834,18 @@ static void dump_info(void) | |||
834 | die("Unknown type of information\n"); | 834 | die("Unknown type of information\n"); |
835 | } | 835 | } |
836 | 836 | ||
837 | static int process_sample_event(event_t *self, struct perf_session *s) | 837 | static int process_sample_event(event_t *self, struct sample_data *sample, |
838 | struct perf_session *s) | ||
838 | { | 839 | { |
839 | struct sample_data data; | 840 | struct thread *thread = perf_session__findnew(s, sample->tid); |
840 | struct thread *thread; | ||
841 | 841 | ||
842 | bzero(&data, sizeof(data)); | ||
843 | event__parse_sample(self, s->sample_type, &data); | ||
844 | |||
845 | thread = perf_session__findnew(s, data.tid); | ||
846 | if (thread == NULL) { | 842 | if (thread == NULL) { |
847 | pr_debug("problem processing %d event, skipping it.\n", | 843 | pr_debug("problem processing %d event, skipping it.\n", |
848 | self->header.type); | 844 | self->header.type); |
849 | return -1; | 845 | return -1; |
850 | } | 846 | } |
851 | 847 | ||
852 | process_raw_event(data.raw_data, data.cpu, data.time, thread); | 848 | process_raw_event(sample->raw_data, sample->cpu, sample->time, thread); |
853 | 849 | ||
854 | return 0; | 850 | return 0; |
855 | } | 851 | } |
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv) | |||
947 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 943 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
948 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 944 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
949 | 945 | ||
946 | if (rec_argv == NULL) | ||
947 | return -ENOMEM; | ||
948 | |||
950 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 949 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
951 | rec_argv[i] = strdup(record_args[i]); | 950 | rec_argv[i] = strdup(record_args[i]); |
952 | 951 | ||
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) | |||
982 | usage_with_options(report_usage, report_options); | 981 | usage_with_options(report_usage, report_options); |
983 | } | 982 | } |
984 | __cmd_report(); | 983 | __cmd_report(); |
985 | } else if (!strcmp(argv[0], "trace")) { | 984 | } else if (!strcmp(argv[0], "script")) { |
986 | /* Aliased to 'perf trace' */ | 985 | /* Aliased to 'perf script' */ |
987 | return cmd_trace(argc, argv, prefix); | 986 | return cmd_script(argc, argv, prefix); |
988 | } else if (!strcmp(argv[0], "info")) { | 987 | } else if (!strcmp(argv[0], "info")) { |
989 | if (argc) { | 988 | if (argc) { |
990 | argc = parse_options(argc, argv, | 989 | argc = parse_options(argc, argv, |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 564491fa18b2..e9be6ae87a27 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -36,6 +36,7 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | |||
36 | 36 | ||
37 | static u64 user_interval = ULLONG_MAX; | 37 | static u64 user_interval = ULLONG_MAX; |
38 | static u64 default_interval = 0; | 38 | static u64 default_interval = 0; |
39 | static u64 sample_type; | ||
39 | 40 | ||
40 | static int nr_cpus = 0; | 41 | static int nr_cpus = 0; |
41 | static unsigned int page_size; | 42 | static unsigned int page_size; |
@@ -48,6 +49,7 @@ static const char *output_name = "perf.data"; | |||
48 | static int group = 0; | 49 | static int group = 0; |
49 | static int realtime_prio = 0; | 50 | static int realtime_prio = 0; |
50 | static bool raw_samples = false; | 51 | static bool raw_samples = false; |
52 | static bool sample_id_all_avail = true; | ||
51 | static bool system_wide = false; | 53 | static bool system_wide = false; |
52 | static pid_t target_pid = -1; | 54 | static pid_t target_pid = -1; |
53 | static pid_t target_tid = -1; | 55 | static pid_t target_tid = -1; |
@@ -60,7 +62,9 @@ static bool call_graph = false; | |||
60 | static bool inherit_stat = false; | 62 | static bool inherit_stat = false; |
61 | static bool no_samples = false; | 63 | static bool no_samples = false; |
62 | static bool sample_address = false; | 64 | static bool sample_address = false; |
65 | static bool sample_time = false; | ||
63 | static bool no_buildid = false; | 66 | static bool no_buildid = false; |
67 | static bool no_buildid_cache = false; | ||
64 | 68 | ||
65 | static long samples = 0; | 69 | static long samples = 0; |
66 | static u64 bytes_written = 0; | 70 | static u64 bytes_written = 0; |
@@ -128,6 +132,7 @@ static void write_output(void *buf, size_t size) | |||
128 | } | 132 | } |
129 | 133 | ||
130 | static int process_synthesized_event(event_t *event, | 134 | static int process_synthesized_event(event_t *event, |
135 | struct sample_data *sample __used, | ||
131 | struct perf_session *self __used) | 136 | struct perf_session *self __used) |
132 | { | 137 | { |
133 | write_output(event, event->header.size); | 138 | write_output(event, event->header.size); |
@@ -280,12 +285,18 @@ static void create_counter(int counter, int cpu) | |||
280 | if (system_wide) | 285 | if (system_wide) |
281 | attr->sample_type |= PERF_SAMPLE_CPU; | 286 | attr->sample_type |= PERF_SAMPLE_CPU; |
282 | 287 | ||
288 | if (sample_time) | ||
289 | attr->sample_type |= PERF_SAMPLE_TIME; | ||
290 | |||
283 | if (raw_samples) { | 291 | if (raw_samples) { |
284 | attr->sample_type |= PERF_SAMPLE_TIME; | 292 | attr->sample_type |= PERF_SAMPLE_TIME; |
285 | attr->sample_type |= PERF_SAMPLE_RAW; | 293 | attr->sample_type |= PERF_SAMPLE_RAW; |
286 | attr->sample_type |= PERF_SAMPLE_CPU; | 294 | attr->sample_type |= PERF_SAMPLE_CPU; |
287 | } | 295 | } |
288 | 296 | ||
297 | if (!sample_type) | ||
298 | sample_type = attr->sample_type; | ||
299 | |||
289 | attr->mmap = track; | 300 | attr->mmap = track; |
290 | attr->comm = track; | 301 | attr->comm = track; |
291 | attr->inherit = !no_inherit; | 302 | attr->inherit = !no_inherit; |
@@ -293,6 +304,8 @@ static void create_counter(int counter, int cpu) | |||
293 | attr->disabled = 1; | 304 | attr->disabled = 1; |
294 | attr->enable_on_exec = 1; | 305 | attr->enable_on_exec = 1; |
295 | } | 306 | } |
307 | retry_sample_id: | ||
308 | attr->sample_id_all = sample_id_all_avail ? 1 : 0; | ||
296 | 309 | ||
297 | for (thread_index = 0; thread_index < thread_num; thread_index++) { | 310 | for (thread_index = 0; thread_index < thread_num; thread_index++) { |
298 | try_again: | 311 | try_again: |
@@ -309,6 +322,12 @@ try_again: | |||
309 | else if (err == ENODEV && cpu_list) { | 322 | else if (err == ENODEV && cpu_list) { |
310 | die("No such device - did you specify" | 323 | die("No such device - did you specify" |
311 | " an out-of-range profile CPU?\n"); | 324 | " an out-of-range profile CPU?\n"); |
325 | } else if (err == EINVAL && sample_id_all_avail) { | ||
326 | /* | ||
327 | * Old kernel, no attr->sample_id_type_all field | ||
328 | */ | ||
329 | sample_id_all_avail = false; | ||
330 | goto retry_sample_id; | ||
312 | } | 331 | } |
313 | 332 | ||
314 | /* | 333 | /* |
@@ -326,7 +345,7 @@ try_again: | |||
326 | goto try_again; | 345 | goto try_again; |
327 | } | 346 | } |
328 | printf("\n"); | 347 | printf("\n"); |
329 | error("perfcounter syscall returned with %d (%s)\n", | 348 | error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", |
330 | fd[nr_cpu][counter][thread_index], strerror(err)); | 349 | fd[nr_cpu][counter][thread_index], strerror(err)); |
331 | 350 | ||
332 | #if defined(__i386__) || defined(__x86_64__) | 351 | #if defined(__i386__) || defined(__x86_64__) |
@@ -437,7 +456,8 @@ static void atexit_header(void) | |||
437 | if (!pipe_output) { | 456 | if (!pipe_output) { |
438 | session->header.data_size += bytes_written; | 457 | session->header.data_size += bytes_written; |
439 | 458 | ||
440 | process_buildids(); | 459 | if (!no_buildid) |
460 | process_buildids(); | ||
441 | perf_header__write(&session->header, output, true); | 461 | perf_header__write(&session->header, output, true); |
442 | perf_session__delete(session); | 462 | perf_session__delete(session); |
443 | symbol__exit(); | 463 | symbol__exit(); |
@@ -558,6 +578,9 @@ static int __cmd_record(int argc, const char **argv) | |||
558 | return -1; | 578 | return -1; |
559 | } | 579 | } |
560 | 580 | ||
581 | if (!no_buildid) | ||
582 | perf_header__set_feat(&session->header, HEADER_BUILD_ID); | ||
583 | |||
561 | if (!file_new) { | 584 | if (!file_new) { |
562 | err = perf_header__read(session, output); | 585 | err = perf_header__read(session, output); |
563 | if (err < 0) | 586 | if (err < 0) |
@@ -639,6 +662,8 @@ static int __cmd_record(int argc, const char **argv) | |||
639 | open_counters(cpumap[i]); | 662 | open_counters(cpumap[i]); |
640 | } | 663 | } |
641 | 664 | ||
665 | perf_session__set_sample_type(session, sample_type); | ||
666 | |||
642 | if (pipe_output) { | 667 | if (pipe_output) { |
643 | err = perf_header__write_pipe(output); | 668 | err = perf_header__write_pipe(output); |
644 | if (err < 0) | 669 | if (err < 0) |
@@ -651,6 +676,8 @@ static int __cmd_record(int argc, const char **argv) | |||
651 | 676 | ||
652 | post_processing_offset = lseek(output, 0, SEEK_CUR); | 677 | post_processing_offset = lseek(output, 0, SEEK_CUR); |
653 | 678 | ||
679 | perf_session__set_sample_id_all(session, sample_id_all_avail); | ||
680 | |||
654 | if (pipe_output) { | 681 | if (pipe_output) { |
655 | err = event__synthesize_attrs(&session->header, | 682 | err = event__synthesize_attrs(&session->header, |
656 | process_synthesized_event, | 683 | process_synthesized_event, |
@@ -831,10 +858,13 @@ const struct option record_options[] = { | |||
831 | "per thread counts"), | 858 | "per thread counts"), |
832 | OPT_BOOLEAN('d', "data", &sample_address, | 859 | OPT_BOOLEAN('d', "data", &sample_address, |
833 | "Sample addresses"), | 860 | "Sample addresses"), |
861 | OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), | ||
834 | OPT_BOOLEAN('n', "no-samples", &no_samples, | 862 | OPT_BOOLEAN('n', "no-samples", &no_samples, |
835 | "don't sample"), | 863 | "don't sample"), |
836 | OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid, | 864 | OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, |
837 | "do not update the buildid cache"), | 865 | "do not update the buildid cache"), |
866 | OPT_BOOLEAN('B', "no-buildid", &no_buildid, | ||
867 | "do not collect buildids in perf.data"), | ||
838 | OPT_END() | 868 | OPT_END() |
839 | }; | 869 | }; |
840 | 870 | ||
@@ -859,7 +889,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
859 | } | 889 | } |
860 | 890 | ||
861 | symbol__init(); | 891 | symbol__init(); |
862 | if (no_buildid) | 892 | |
893 | if (no_buildid_cache || no_buildid) | ||
863 | disable_buildid_cache(); | 894 | disable_buildid_cache(); |
864 | 895 | ||
865 | if (!nr_counters) { | 896 | if (!nr_counters) { |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5de405d45230..b6a2a899aa8f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session, | |||
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | 152 | ||
153 | static int process_sample_event(event_t *event, struct perf_session *session) | 153 | static int process_sample_event(event_t *event, struct sample_data *sample, |
154 | struct perf_session *session) | ||
154 | { | 155 | { |
155 | struct sample_data data = { .period = 1, }; | ||
156 | struct addr_location al; | 156 | struct addr_location al; |
157 | struct perf_event_attr *attr; | 157 | struct perf_event_attr *attr; |
158 | 158 | ||
159 | if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { | 159 | if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { |
160 | fprintf(stderr, "problem processing %d event, skipping it.\n", | 160 | fprintf(stderr, "problem processing %d event, skipping it.\n", |
161 | event->header.type); | 161 | event->header.type); |
162 | return -1; | 162 | return -1; |
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
165 | if (al.filtered || (hide_unresolved && al.sym == NULL)) | 165 | if (al.filtered || (hide_unresolved && al.sym == NULL)) |
166 | return 0; | 166 | return 0; |
167 | 167 | ||
168 | if (perf_session__add_hist_entry(session, &al, &data)) { | 168 | if (perf_session__add_hist_entry(session, &al, sample)) { |
169 | pr_debug("problem incrementing symbol period, skipping event\n"); | 169 | pr_debug("problem incrementing symbol period, skipping event\n"); |
170 | return -1; | 170 | return -1; |
171 | } | 171 | } |
172 | 172 | ||
173 | attr = perf_header__find_attr(data.id, &session->header); | 173 | attr = perf_header__find_attr(sample->id, &session->header); |
174 | 174 | ||
175 | if (add_event_total(session, &data, attr)) { | 175 | if (add_event_total(session, sample, attr)) { |
176 | pr_debug("problem adding event period\n"); | 176 | pr_debug("problem adding event period\n"); |
177 | return -1; | 177 | return -1; |
178 | } | 178 | } |
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
180 | return 0; | 180 | return 0; |
181 | } | 181 | } |
182 | 182 | ||
183 | static int process_read_event(event_t *event, struct perf_session *session __used) | 183 | static int process_read_event(event_t *event, struct sample_data *sample __used, |
184 | struct perf_session *session __used) | ||
184 | { | 185 | { |
185 | struct perf_event_attr *attr; | 186 | struct perf_event_attr *attr; |
186 | 187 | ||
@@ -442,6 +443,8 @@ static const struct option options[] = { | |||
442 | "dump raw trace in ASCII"), | 443 | "dump raw trace in ASCII"), |
443 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, | 444 | OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, |
444 | "file", "vmlinux pathname"), | 445 | "file", "vmlinux pathname"), |
446 | OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, | ||
447 | "file", "kallsyms pathname"), | ||
445 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), | 448 | OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), |
446 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, | 449 | OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, |
447 | "load module symbols - WARNING: use only with -k and LIVE kernel"), | 450 | "load module symbols - WARNING: use only with -k and LIVE kernel"), |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 55f3b5dcc731..c7753940aea0 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session, | |||
1606 | process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); | 1606 | process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); |
1607 | } | 1607 | } |
1608 | 1608 | ||
1609 | static int process_sample_event(event_t *event, struct perf_session *session) | 1609 | static int process_sample_event(event_t *event, struct sample_data *sample, |
1610 | struct perf_session *session) | ||
1610 | { | 1611 | { |
1611 | struct sample_data data; | ||
1612 | struct thread *thread; | 1612 | struct thread *thread; |
1613 | 1613 | ||
1614 | if (!(session->sample_type & PERF_SAMPLE_RAW)) | 1614 | if (!(session->sample_type & PERF_SAMPLE_RAW)) |
1615 | return 0; | 1615 | return 0; |
1616 | 1616 | ||
1617 | memset(&data, 0, sizeof(data)); | 1617 | thread = perf_session__findnew(session, sample->pid); |
1618 | data.time = -1; | ||
1619 | data.cpu = -1; | ||
1620 | data.period = -1; | ||
1621 | |||
1622 | event__parse_sample(event, session->sample_type, &data); | ||
1623 | |||
1624 | dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, | ||
1625 | data.pid, data.tid, data.ip, data.period); | ||
1626 | |||
1627 | thread = perf_session__findnew(session, data.pid); | ||
1628 | if (thread == NULL) { | 1618 | if (thread == NULL) { |
1629 | pr_debug("problem processing %d event, skipping it.\n", | 1619 | pr_debug("problem processing %d event, skipping it.\n", |
1630 | event->header.type); | 1620 | event->header.type); |
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
1633 | 1623 | ||
1634 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 1624 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
1635 | 1625 | ||
1636 | if (profile_cpu != -1 && profile_cpu != (int)data.cpu) | 1626 | if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) |
1637 | return 0; | 1627 | return 0; |
1638 | 1628 | ||
1639 | process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread); | 1629 | process_raw_event(event, session, sample->raw_data, sample->cpu, |
1630 | sample->time, thread); | ||
1640 | 1631 | ||
1641 | return 0; | 1632 | return 0; |
1642 | } | 1633 | } |
@@ -1869,6 +1860,9 @@ static int __cmd_record(int argc, const char **argv) | |||
1869 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 1860 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
1870 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 1861 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
1871 | 1862 | ||
1863 | if (rec_argv) | ||
1864 | return -ENOMEM; | ||
1865 | |||
1872 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 1866 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
1873 | rec_argv[i] = strdup(record_args[i]); | 1867 | rec_argv[i] = strdup(record_args[i]); |
1874 | 1868 | ||
@@ -1888,10 +1882,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) | |||
1888 | usage_with_options(sched_usage, sched_options); | 1882 | usage_with_options(sched_usage, sched_options); |
1889 | 1883 | ||
1890 | /* | 1884 | /* |
1891 | * Aliased to 'perf trace' for now: | 1885 | * Aliased to 'perf script' for now: |
1892 | */ | 1886 | */ |
1893 | if (!strcmp(argv[0], "trace")) | 1887 | if (!strcmp(argv[0], "script")) |
1894 | return cmd_trace(argc, argv, prefix); | 1888 | return cmd_script(argc, argv, prefix); |
1895 | 1889 | ||
1896 | symbol__init(); | 1890 | symbol__init(); |
1897 | if (!strncmp(argv[0], "rec", 3)) { | 1891 | if (!strncmp(argv[0], "rec", 3)) { |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c index 86cfe3800e6b..54f1ea808db5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-script.c | |||
@@ -56,29 +56,18 @@ static void setup_scripting(void) | |||
56 | 56 | ||
57 | static int cleanup_scripting(void) | 57 | static int cleanup_scripting(void) |
58 | { | 58 | { |
59 | pr_debug("\nperf trace script stopped\n"); | 59 | pr_debug("\nperf script stopped\n"); |
60 | 60 | ||
61 | return scripting_ops->stop_script(); | 61 | return scripting_ops->stop_script(); |
62 | } | 62 | } |
63 | 63 | ||
64 | static char const *input_name = "perf.data"; | 64 | static char const *input_name = "perf.data"; |
65 | 65 | ||
66 | static int process_sample_event(event_t *event, struct perf_session *session) | 66 | static int process_sample_event(event_t *event, struct sample_data *sample, |
67 | struct perf_session *session) | ||
67 | { | 68 | { |
68 | struct sample_data data; | 69 | struct thread *thread = perf_session__findnew(session, event->ip.pid); |
69 | struct thread *thread; | ||
70 | 70 | ||
71 | memset(&data, 0, sizeof(data)); | ||
72 | data.time = -1; | ||
73 | data.cpu = -1; | ||
74 | data.period = 1; | ||
75 | |||
76 | event__parse_sample(event, session->sample_type, &data); | ||
77 | |||
78 | dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, | ||
79 | data.pid, data.tid, data.ip, data.period); | ||
80 | |||
81 | thread = perf_session__findnew(session, event->ip.pid); | ||
82 | if (thread == NULL) { | 71 | if (thread == NULL) { |
83 | pr_debug("problem processing %d event, skipping it.\n", | 72 | pr_debug("problem processing %d event, skipping it.\n", |
84 | event->header.type); | 73 | event->header.type); |
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
87 | 76 | ||
88 | if (session->sample_type & PERF_SAMPLE_RAW) { | 77 | if (session->sample_type & PERF_SAMPLE_RAW) { |
89 | if (debug_mode) { | 78 | if (debug_mode) { |
90 | if (data.time < last_timestamp) { | 79 | if (sample->time < last_timestamp) { |
91 | pr_err("Samples misordered, previous: %llu " | 80 | pr_err("Samples misordered, previous: %llu " |
92 | "this: %llu\n", last_timestamp, | 81 | "this: %llu\n", last_timestamp, |
93 | data.time); | 82 | sample->time); |
94 | nr_unordered++; | 83 | nr_unordered++; |
95 | } | 84 | } |
96 | last_timestamp = data.time; | 85 | last_timestamp = sample->time; |
97 | return 0; | 86 | return 0; |
98 | } | 87 | } |
99 | /* | 88 | /* |
@@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
101 | * field, although it should be the same than this perf | 90 | * field, although it should be the same than this perf |
102 | * event pid | 91 | * event pid |
103 | */ | 92 | */ |
104 | scripting_ops->process_event(data.cpu, data.raw_data, | 93 | scripting_ops->process_event(sample->cpu, sample->raw_data, |
105 | data.raw_size, | 94 | sample->raw_size, |
106 | data.time, thread->comm); | 95 | sample->time, thread->comm); |
107 | } | 96 | } |
108 | 97 | ||
109 | session->hists.stats.total_period += data.period; | 98 | session->hists.stats.total_period += sample->period; |
110 | return 0; | 99 | return 0; |
111 | } | 100 | } |
112 | 101 | ||
113 | static u64 nr_lost; | 102 | static u64 nr_lost; |
114 | 103 | ||
115 | static int process_lost_event(event_t *event, struct perf_session *session __used) | 104 | static int process_lost_event(event_t *event, struct sample_data *sample __used, |
105 | struct perf_session *session __used) | ||
116 | { | 106 | { |
117 | nr_lost += event->lost.lost; | 107 | nr_lost += event->lost.lost; |
118 | 108 | ||
@@ -137,7 +127,7 @@ static void sig_handler(int sig __unused) | |||
137 | session_done = 1; | 127 | session_done = 1; |
138 | } | 128 | } |
139 | 129 | ||
140 | static int __cmd_trace(struct perf_session *session) | 130 | static int __cmd_script(struct perf_session *session) |
141 | { | 131 | { |
142 | int ret; | 132 | int ret; |
143 | 133 | ||
@@ -247,7 +237,7 @@ static void list_available_languages(void) | |||
247 | 237 | ||
248 | fprintf(stderr, "\n"); | 238 | fprintf(stderr, "\n"); |
249 | fprintf(stderr, "Scripting language extensions (used in " | 239 | fprintf(stderr, "Scripting language extensions (used in " |
250 | "perf trace -s [spec:]script.[spec]):\n\n"); | 240 | "perf script -s [spec:]script.[spec]):\n\n"); |
251 | 241 | ||
252 | list_for_each_entry(s, &script_specs, node) | 242 | list_for_each_entry(s, &script_specs, node) |
253 | fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); | 243 | fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); |
@@ -301,17 +291,34 @@ static int parse_scriptname(const struct option *opt __used, | |||
301 | return 0; | 291 | return 0; |
302 | } | 292 | } |
303 | 293 | ||
304 | #define for_each_lang(scripts_dir, lang_dirent, lang_next) \ | 294 | /* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ |
295 | static int is_directory(const char *base_path, const struct dirent *dent) | ||
296 | { | ||
297 | char path[PATH_MAX]; | ||
298 | struct stat st; | ||
299 | |||
300 | sprintf(path, "%s/%s", base_path, dent->d_name); | ||
301 | if (stat(path, &st)) | ||
302 | return 0; | ||
303 | |||
304 | return S_ISDIR(st.st_mode); | ||
305 | } | ||
306 | |||
307 | #define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\ | ||
305 | while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ | 308 | while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ |
306 | lang_next) \ | 309 | lang_next) \ |
307 | if (lang_dirent.d_type == DT_DIR && \ | 310 | if ((lang_dirent.d_type == DT_DIR || \ |
311 | (lang_dirent.d_type == DT_UNKNOWN && \ | ||
312 | is_directory(scripts_path, &lang_dirent))) && \ | ||
308 | (strcmp(lang_dirent.d_name, ".")) && \ | 313 | (strcmp(lang_dirent.d_name, ".")) && \ |
309 | (strcmp(lang_dirent.d_name, ".."))) | 314 | (strcmp(lang_dirent.d_name, ".."))) |
310 | 315 | ||
311 | #define for_each_script(lang_dir, script_dirent, script_next) \ | 316 | #define for_each_script(lang_path, lang_dir, script_dirent, script_next)\ |
312 | while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ | 317 | while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ |
313 | script_next) \ | 318 | script_next) \ |
314 | if (script_dirent.d_type != DT_DIR) | 319 | if (script_dirent.d_type != DT_DIR && \ |
320 | (script_dirent.d_type != DT_UNKNOWN || \ | ||
321 | !is_directory(lang_path, &script_dirent))) | ||
315 | 322 | ||
316 | 323 | ||
317 | #define RECORD_SUFFIX "-record" | 324 | #define RECORD_SUFFIX "-record" |
@@ -380,10 +387,10 @@ out_delete_desc: | |||
380 | return NULL; | 387 | return NULL; |
381 | } | 388 | } |
382 | 389 | ||
383 | static char *ends_with(char *str, const char *suffix) | 390 | static const char *ends_with(const char *str, const char *suffix) |
384 | { | 391 | { |
385 | size_t suffix_len = strlen(suffix); | 392 | size_t suffix_len = strlen(suffix); |
386 | char *p = str; | 393 | const char *p = str; |
387 | 394 | ||
388 | if (strlen(str) > suffix_len) { | 395 | if (strlen(str) > suffix_len) { |
389 | p = str + strlen(str) - suffix_len; | 396 | p = str + strlen(str) - suffix_len; |
@@ -466,16 +473,16 @@ static int list_available_scripts(const struct option *opt __used, | |||
466 | if (!scripts_dir) | 473 | if (!scripts_dir) |
467 | return -1; | 474 | return -1; |
468 | 475 | ||
469 | for_each_lang(scripts_dir, lang_dirent, lang_next) { | 476 | for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { |
470 | snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, | 477 | snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, |
471 | lang_dirent.d_name); | 478 | lang_dirent.d_name); |
472 | lang_dir = opendir(lang_path); | 479 | lang_dir = opendir(lang_path); |
473 | if (!lang_dir) | 480 | if (!lang_dir) |
474 | continue; | 481 | continue; |
475 | 482 | ||
476 | for_each_script(lang_dir, script_dirent, script_next) { | 483 | for_each_script(lang_path, lang_dir, script_dirent, script_next) { |
477 | script_root = strdup(script_dirent.d_name); | 484 | script_root = strdup(script_dirent.d_name); |
478 | str = ends_with(script_root, REPORT_SUFFIX); | 485 | str = (char *)ends_with(script_root, REPORT_SUFFIX); |
479 | if (str) { | 486 | if (str) { |
480 | *str = '\0'; | 487 | *str = '\0'; |
481 | desc = script_desc__findnew(script_root); | 488 | desc = script_desc__findnew(script_root); |
@@ -514,16 +521,16 @@ static char *get_script_path(const char *script_root, const char *suffix) | |||
514 | if (!scripts_dir) | 521 | if (!scripts_dir) |
515 | return NULL; | 522 | return NULL; |
516 | 523 | ||
517 | for_each_lang(scripts_dir, lang_dirent, lang_next) { | 524 | for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { |
518 | snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, | 525 | snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, |
519 | lang_dirent.d_name); | 526 | lang_dirent.d_name); |
520 | lang_dir = opendir(lang_path); | 527 | lang_dir = opendir(lang_path); |
521 | if (!lang_dir) | 528 | if (!lang_dir) |
522 | continue; | 529 | continue; |
523 | 530 | ||
524 | for_each_script(lang_dir, script_dirent, script_next) { | 531 | for_each_script(lang_path, lang_dir, script_dirent, script_next) { |
525 | __script_root = strdup(script_dirent.d_name); | 532 | __script_root = strdup(script_dirent.d_name); |
526 | str = ends_with(__script_root, suffix); | 533 | str = (char *)ends_with(__script_root, suffix); |
527 | if (str) { | 534 | if (str) { |
528 | *str = '\0'; | 535 | *str = '\0'; |
529 | if (strcmp(__script_root, script_root)) | 536 | if (strcmp(__script_root, script_root)) |
@@ -543,7 +550,7 @@ static char *get_script_path(const char *script_root, const char *suffix) | |||
543 | 550 | ||
544 | static bool is_top_script(const char *script_path) | 551 | static bool is_top_script(const char *script_path) |
545 | { | 552 | { |
546 | return ends_with((char *)script_path, "top") == NULL ? false : true; | 553 | return ends_with(script_path, "top") == NULL ? false : true; |
547 | } | 554 | } |
548 | 555 | ||
549 | static int has_required_arg(char *script_path) | 556 | static int has_required_arg(char *script_path) |
@@ -569,12 +576,12 @@ out: | |||
569 | return n_args; | 576 | return n_args; |
570 | } | 577 | } |
571 | 578 | ||
572 | static const char * const trace_usage[] = { | 579 | static const char * const script_usage[] = { |
573 | "perf trace [<options>]", | 580 | "perf script [<options>]", |
574 | "perf trace [<options>] record <script> [<record-options>] <command>", | 581 | "perf script [<options>] record <script> [<record-options>] <command>", |
575 | "perf trace [<options>] report <script> [script-args]", | 582 | "perf script [<options>] report <script> [script-args]", |
576 | "perf trace [<options>] <script> [<record-options>] <command>", | 583 | "perf script [<options>] <script> [<record-options>] <command>", |
577 | "perf trace [<options>] <top-script> [script-args]", | 584 | "perf script [<options>] <top-script> [script-args]", |
578 | NULL | 585 | NULL |
579 | }; | 586 | }; |
580 | 587 | ||
@@ -591,7 +598,7 @@ static const struct option options[] = { | |||
591 | "script file name (lang:script name, script name, or *)", | 598 | "script file name (lang:script name, script name, or *)", |
592 | parse_scriptname), | 599 | parse_scriptname), |
593 | OPT_STRING('g', "gen-script", &generate_script_lang, "lang", | 600 | OPT_STRING('g', "gen-script", &generate_script_lang, "lang", |
594 | "generate perf-trace.xx script in specified language"), | 601 | "generate perf-script.xx script in specified language"), |
595 | OPT_STRING('i', "input", &input_name, "file", | 602 | OPT_STRING('i', "input", &input_name, "file", |
596 | "input file name"), | 603 | "input file name"), |
597 | OPT_BOOLEAN('d', "debug-mode", &debug_mode, | 604 | OPT_BOOLEAN('d', "debug-mode", &debug_mode, |
@@ -614,7 +621,7 @@ static bool have_cmd(int argc, const char **argv) | |||
614 | return argc != 0; | 621 | return argc != 0; |
615 | } | 622 | } |
616 | 623 | ||
617 | int cmd_trace(int argc, const char **argv, const char *prefix __used) | 624 | int cmd_script(int argc, const char **argv, const char *prefix __used) |
618 | { | 625 | { |
619 | char *rec_script_path = NULL; | 626 | char *rec_script_path = NULL; |
620 | char *rep_script_path = NULL; | 627 | char *rep_script_path = NULL; |
@@ -626,7 +633,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
626 | 633 | ||
627 | setup_scripting(); | 634 | setup_scripting(); |
628 | 635 | ||
629 | argc = parse_options(argc, argv, options, trace_usage, | 636 | argc = parse_options(argc, argv, options, script_usage, |
630 | PARSE_OPT_STOP_AT_NON_OPTION); | 637 | PARSE_OPT_STOP_AT_NON_OPTION); |
631 | 638 | ||
632 | if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { | 639 | if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { |
@@ -640,7 +647,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
640 | if (!rep_script_path) { | 647 | if (!rep_script_path) { |
641 | fprintf(stderr, | 648 | fprintf(stderr, |
642 | "Please specify a valid report script" | 649 | "Please specify a valid report script" |
643 | "(see 'perf trace -l' for listing)\n"); | 650 | "(see 'perf script -l' for listing)\n"); |
644 | return -1; | 651 | return -1; |
645 | } | 652 | } |
646 | } | 653 | } |
@@ -658,8 +665,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
658 | 665 | ||
659 | if (!rec_script_path && !rep_script_path) { | 666 | if (!rec_script_path && !rep_script_path) { |
660 | fprintf(stderr, " Couldn't find script %s\n\n See perf" | 667 | fprintf(stderr, " Couldn't find script %s\n\n See perf" |
661 | " trace -l for available scripts.\n", argv[0]); | 668 | " script -l for available scripts.\n", argv[0]); |
662 | usage_with_options(trace_usage, options); | 669 | usage_with_options(script_usage, options); |
663 | } | 670 | } |
664 | 671 | ||
665 | if (is_top_script(argv[0])) { | 672 | if (is_top_script(argv[0])) { |
@@ -671,9 +678,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
671 | rec_args = (argc - 1) - rep_args; | 678 | rec_args = (argc - 1) - rep_args; |
672 | if (rec_args < 0) { | 679 | if (rec_args < 0) { |
673 | fprintf(stderr, " %s script requires options." | 680 | fprintf(stderr, " %s script requires options." |
674 | "\n\n See perf trace -l for available " | 681 | "\n\n See perf script -l for available " |
675 | "scripts and options.\n", argv[0]); | 682 | "scripts and options.\n", argv[0]); |
676 | usage_with_options(trace_usage, options); | 683 | usage_with_options(script_usage, options); |
677 | } | 684 | } |
678 | } | 685 | } |
679 | 686 | ||
@@ -806,7 +813,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
806 | return -1; | 813 | return -1; |
807 | } | 814 | } |
808 | 815 | ||
809 | err = scripting_ops->generate_script("perf-trace"); | 816 | err = scripting_ops->generate_script("perf-script"); |
810 | goto out; | 817 | goto out; |
811 | } | 818 | } |
812 | 819 | ||
@@ -814,10 +821,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) | |||
814 | err = scripting_ops->start_script(script_name, argc, argv); | 821 | err = scripting_ops->start_script(script_name, argc, argv); |
815 | if (err) | 822 | if (err) |
816 | goto out; | 823 | goto out; |
817 | pr_debug("perf trace started with script %s\n\n", script_name); | 824 | pr_debug("perf script started with script %s\n\n", script_name); |
818 | } | 825 | } |
819 | 826 | ||
820 | err = __cmd_trace(session); | 827 | err = __cmd_script(session); |
821 | 828 | ||
822 | perf_session__delete(session); | 829 | perf_session__delete(session); |
823 | cleanup_scripting(); | 830 | cleanup_scripting(); |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a6b4d44f9502..7ff746da7e6c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -52,6 +52,8 @@ | |||
52 | #include <math.h> | 52 | #include <math.h> |
53 | #include <locale.h> | 53 | #include <locale.h> |
54 | 54 | ||
55 | #define DEFAULT_SEPARATOR " " | ||
56 | |||
55 | static struct perf_event_attr default_attrs[] = { | 57 | static struct perf_event_attr default_attrs[] = { |
56 | 58 | ||
57 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | 59 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, |
@@ -75,20 +77,30 @@ static int run_idx = 0; | |||
75 | static int run_count = 1; | 77 | static int run_count = 1; |
76 | static bool no_inherit = false; | 78 | static bool no_inherit = false; |
77 | static bool scale = true; | 79 | static bool scale = true; |
80 | static bool no_aggr = false; | ||
78 | static pid_t target_pid = -1; | 81 | static pid_t target_pid = -1; |
79 | static pid_t target_tid = -1; | 82 | static pid_t target_tid = -1; |
80 | static pid_t *all_tids = NULL; | 83 | static pid_t *all_tids = NULL; |
81 | static int thread_num = 0; | 84 | static int thread_num = 0; |
82 | static pid_t child_pid = -1; | 85 | static pid_t child_pid = -1; |
83 | static bool null_run = false; | 86 | static bool null_run = false; |
84 | static bool big_num = false; | 87 | static bool big_num = true; |
88 | static int big_num_opt = -1; | ||
85 | static const char *cpu_list; | 89 | static const char *cpu_list; |
90 | static const char *csv_sep = NULL; | ||
91 | static bool csv_output = false; | ||
86 | 92 | ||
87 | 93 | ||
88 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; | 94 | static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; |
89 | 95 | ||
90 | static int event_scaled[MAX_COUNTERS]; | 96 | static int event_scaled[MAX_COUNTERS]; |
91 | 97 | ||
98 | static struct { | ||
99 | u64 val; | ||
100 | u64 ena; | ||
101 | u64 run; | ||
102 | } cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; | ||
103 | |||
92 | static volatile int done = 0; | 104 | static volatile int done = 0; |
93 | 105 | ||
94 | struct stats | 106 | struct stats |
@@ -136,19 +148,19 @@ static double stddev_stats(struct stats *stats) | |||
136 | } | 148 | } |
137 | 149 | ||
138 | struct stats event_res_stats[MAX_COUNTERS][3]; | 150 | struct stats event_res_stats[MAX_COUNTERS][3]; |
139 | struct stats runtime_nsecs_stats; | 151 | struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
152 | struct stats runtime_cycles_stats[MAX_NR_CPUS]; | ||
153 | struct stats runtime_branches_stats[MAX_NR_CPUS]; | ||
140 | struct stats walltime_nsecs_stats; | 154 | struct stats walltime_nsecs_stats; |
141 | struct stats runtime_cycles_stats; | ||
142 | struct stats runtime_branches_stats; | ||
143 | 155 | ||
144 | #define MATCH_EVENT(t, c, counter) \ | 156 | #define MATCH_EVENT(t, c, counter) \ |
145 | (attrs[counter].type == PERF_TYPE_##t && \ | 157 | (attrs[counter].type == PERF_TYPE_##t && \ |
146 | attrs[counter].config == PERF_COUNT_##c) | 158 | attrs[counter].config == PERF_COUNT_##c) |
147 | 159 | ||
148 | #define ERR_PERF_OPEN \ | 160 | #define ERR_PERF_OPEN \ |
149 | "Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" | 161 | "counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." |
150 | 162 | ||
151 | static int create_perf_stat_counter(int counter) | 163 | static int create_perf_stat_counter(int counter, bool *perm_err) |
152 | { | 164 | { |
153 | struct perf_event_attr *attr = attrs + counter; | 165 | struct perf_event_attr *attr = attrs + counter; |
154 | int thread; | 166 | int thread; |
@@ -164,11 +176,14 @@ static int create_perf_stat_counter(int counter) | |||
164 | for (cpu = 0; cpu < nr_cpus; cpu++) { | 176 | for (cpu = 0; cpu < nr_cpus; cpu++) { |
165 | fd[cpu][counter][0] = sys_perf_event_open(attr, | 177 | fd[cpu][counter][0] = sys_perf_event_open(attr, |
166 | -1, cpumap[cpu], -1, 0); | 178 | -1, cpumap[cpu], -1, 0); |
167 | if (fd[cpu][counter][0] < 0) | 179 | if (fd[cpu][counter][0] < 0) { |
168 | pr_debug(ERR_PERF_OPEN, counter, | 180 | if (errno == EPERM || errno == EACCES) |
181 | *perm_err = true; | ||
182 | error(ERR_PERF_OPEN, counter, | ||
169 | fd[cpu][counter][0], strerror(errno)); | 183 | fd[cpu][counter][0], strerror(errno)); |
170 | else | 184 | } else { |
171 | ++ncreated; | 185 | ++ncreated; |
186 | } | ||
172 | } | 187 | } |
173 | } else { | 188 | } else { |
174 | attr->inherit = !no_inherit; | 189 | attr->inherit = !no_inherit; |
@@ -179,12 +194,15 @@ static int create_perf_stat_counter(int counter) | |||
179 | for (thread = 0; thread < thread_num; thread++) { | 194 | for (thread = 0; thread < thread_num; thread++) { |
180 | fd[0][counter][thread] = sys_perf_event_open(attr, | 195 | fd[0][counter][thread] = sys_perf_event_open(attr, |
181 | all_tids[thread], -1, -1, 0); | 196 | all_tids[thread], -1, -1, 0); |
182 | if (fd[0][counter][thread] < 0) | 197 | if (fd[0][counter][thread] < 0) { |
183 | pr_debug(ERR_PERF_OPEN, counter, | 198 | if (errno == EPERM || errno == EACCES) |
199 | *perm_err = true; | ||
200 | error(ERR_PERF_OPEN, counter, | ||
184 | fd[0][counter][thread], | 201 | fd[0][counter][thread], |
185 | strerror(errno)); | 202 | strerror(errno)); |
186 | else | 203 | } else { |
187 | ++ncreated; | 204 | ++ncreated; |
205 | } | ||
188 | } | 206 | } |
189 | } | 207 | } |
190 | 208 | ||
@@ -205,8 +223,9 @@ static inline int nsec_counter(int counter) | |||
205 | 223 | ||
206 | /* | 224 | /* |
207 | * Read out the results of a single counter: | 225 | * Read out the results of a single counter: |
226 | * aggregate counts across CPUs in system-wide mode | ||
208 | */ | 227 | */ |
209 | static void read_counter(int counter) | 228 | static void read_counter_aggr(int counter) |
210 | { | 229 | { |
211 | u64 count[3], single_count[3]; | 230 | u64 count[3], single_count[3]; |
212 | int cpu; | 231 | int cpu; |
@@ -264,11 +283,58 @@ static void read_counter(int counter) | |||
264 | * Save the full runtime - to allow normalization during printout: | 283 | * Save the full runtime - to allow normalization during printout: |
265 | */ | 284 | */ |
266 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | 285 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) |
267 | update_stats(&runtime_nsecs_stats, count[0]); | 286 | update_stats(&runtime_nsecs_stats[0], count[0]); |
268 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | 287 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) |
269 | update_stats(&runtime_cycles_stats, count[0]); | 288 | update_stats(&runtime_cycles_stats[0], count[0]); |
270 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | 289 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) |
271 | update_stats(&runtime_branches_stats, count[0]); | 290 | update_stats(&runtime_branches_stats[0], count[0]); |
291 | } | ||
292 | |||
293 | /* | ||
294 | * Read out the results of a single counter: | ||
295 | * do not aggregate counts across CPUs in system-wide mode | ||
296 | */ | ||
297 | static void read_counter(int counter) | ||
298 | { | ||
299 | u64 count[3]; | ||
300 | int cpu; | ||
301 | size_t res, nv; | ||
302 | |||
303 | count[0] = count[1] = count[2] = 0; | ||
304 | |||
305 | nv = scale ? 3 : 1; | ||
306 | |||
307 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
308 | |||
309 | if (fd[cpu][counter][0] < 0) | ||
310 | continue; | ||
311 | |||
312 | res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); | ||
313 | |||
314 | assert(res == nv * sizeof(u64)); | ||
315 | |||
316 | close(fd[cpu][counter][0]); | ||
317 | fd[cpu][counter][0] = -1; | ||
318 | |||
319 | if (scale) { | ||
320 | if (count[2] == 0) { | ||
321 | count[0] = 0; | ||
322 | } else if (count[2] < count[1]) { | ||
323 | count[0] = (unsigned long long) | ||
324 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
325 | } | ||
326 | } | ||
327 | cpu_counts[cpu][counter].val = count[0]; /* scaled count */ | ||
328 | cpu_counts[cpu][counter].ena = count[1]; | ||
329 | cpu_counts[cpu][counter].run = count[2]; | ||
330 | |||
331 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) | ||
332 | update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||
333 | if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) | ||
334 | update_stats(&runtime_cycles_stats[cpu], count[0]); | ||
335 | if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) | ||
336 | update_stats(&runtime_branches_stats[cpu], count[0]); | ||
337 | } | ||
272 | } | 338 | } |
273 | 339 | ||
274 | static int run_perf_stat(int argc __used, const char **argv) | 340 | static int run_perf_stat(int argc __used, const char **argv) |
@@ -277,6 +343,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
277 | int status = 0; | 343 | int status = 0; |
278 | int counter, ncreated = 0; | 344 | int counter, ncreated = 0; |
279 | int child_ready_pipe[2], go_pipe[2]; | 345 | int child_ready_pipe[2], go_pipe[2]; |
346 | bool perm_err = false; | ||
280 | const bool forks = (argc > 0); | 347 | const bool forks = (argc > 0); |
281 | char buf; | 348 | char buf; |
282 | 349 | ||
@@ -335,12 +402,15 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
335 | } | 402 | } |
336 | 403 | ||
337 | for (counter = 0; counter < nr_counters; counter++) | 404 | for (counter = 0; counter < nr_counters; counter++) |
338 | ncreated += create_perf_stat_counter(counter); | 405 | ncreated += create_perf_stat_counter(counter, &perm_err); |
339 | 406 | ||
340 | if (ncreated == 0) { | 407 | if (ncreated < nr_counters) { |
341 | pr_err("No permission to collect %sstats.\n" | 408 | if (perm_err) |
342 | "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", | 409 | error("You may not have permission to collect %sstats.\n" |
343 | system_wide ? "system-wide " : ""); | 410 | "\t Consider tweaking" |
411 | " /proc/sys/kernel/perf_event_paranoid or running as root.", | ||
412 | system_wide ? "system-wide " : ""); | ||
413 | die("Not all events could be opened.\n"); | ||
344 | if (child_pid != -1) | 414 | if (child_pid != -1) |
345 | kill(child_pid, SIGTERM); | 415 | kill(child_pid, SIGTERM); |
346 | return -1; | 416 | return -1; |
@@ -362,9 +432,13 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
362 | 432 | ||
363 | update_stats(&walltime_nsecs_stats, t1 - t0); | 433 | update_stats(&walltime_nsecs_stats, t1 - t0); |
364 | 434 | ||
365 | for (counter = 0; counter < nr_counters; counter++) | 435 | if (no_aggr) { |
366 | read_counter(counter); | 436 | for (counter = 0; counter < nr_counters; counter++) |
367 | 437 | read_counter(counter); | |
438 | } else { | ||
439 | for (counter = 0; counter < nr_counters; counter++) | ||
440 | read_counter_aggr(counter); | ||
441 | } | ||
368 | return WEXITSTATUS(status); | 442 | return WEXITSTATUS(status); |
369 | } | 443 | } |
370 | 444 | ||
@@ -377,11 +451,21 @@ static void print_noise(int counter, double avg) | |||
377 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); | 451 | 100 * stddev_stats(&event_res_stats[counter][0]) / avg); |
378 | } | 452 | } |
379 | 453 | ||
380 | static void nsec_printout(int counter, double avg) | 454 | static void nsec_printout(int cpu, int counter, double avg) |
381 | { | 455 | { |
382 | double msecs = avg / 1e6; | 456 | double msecs = avg / 1e6; |
457 | char cpustr[16] = { '\0', }; | ||
458 | const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; | ||
459 | |||
460 | if (no_aggr) | ||
461 | sprintf(cpustr, "CPU%*d%s", | ||
462 | csv_output ? 0 : -4, | ||
463 | cpumap[cpu], csv_sep); | ||
383 | 464 | ||
384 | fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); | 465 | fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter)); |
466 | |||
467 | if (csv_output) | ||
468 | return; | ||
385 | 469 | ||
386 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { | 470 | if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { |
387 | fprintf(stderr, " # %10.3f CPUs ", | 471 | fprintf(stderr, " # %10.3f CPUs ", |
@@ -389,33 +473,49 @@ static void nsec_printout(int counter, double avg) | |||
389 | } | 473 | } |
390 | } | 474 | } |
391 | 475 | ||
392 | static void abs_printout(int counter, double avg) | 476 | static void abs_printout(int cpu, int counter, double avg) |
393 | { | 477 | { |
394 | double total, ratio = 0.0; | 478 | double total, ratio = 0.0; |
479 | char cpustr[16] = { '\0', }; | ||
480 | const char *fmt; | ||
481 | |||
482 | if (csv_output) | ||
483 | fmt = "%s%.0f%s%s"; | ||
484 | else if (big_num) | ||
485 | fmt = "%s%'18.0f%s%-24s"; | ||
486 | else | ||
487 | fmt = "%s%18.0f%s%-24s"; | ||
395 | 488 | ||
396 | if (big_num) | 489 | if (no_aggr) |
397 | fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); | 490 | sprintf(cpustr, "CPU%*d%s", |
491 | csv_output ? 0 : -4, | ||
492 | cpumap[cpu], csv_sep); | ||
398 | else | 493 | else |
399 | fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); | 494 | cpu = 0; |
495 | |||
496 | fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter)); | ||
497 | |||
498 | if (csv_output) | ||
499 | return; | ||
400 | 500 | ||
401 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { | 501 | if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { |
402 | total = avg_stats(&runtime_cycles_stats); | 502 | total = avg_stats(&runtime_cycles_stats[cpu]); |
403 | 503 | ||
404 | if (total) | 504 | if (total) |
405 | ratio = avg / total; | 505 | ratio = avg / total; |
406 | 506 | ||
407 | fprintf(stderr, " # %10.3f IPC ", ratio); | 507 | fprintf(stderr, " # %10.3f IPC ", ratio); |
408 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && | 508 | } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && |
409 | runtime_branches_stats.n != 0) { | 509 | runtime_branches_stats[cpu].n != 0) { |
410 | total = avg_stats(&runtime_branches_stats); | 510 | total = avg_stats(&runtime_branches_stats[cpu]); |
411 | 511 | ||
412 | if (total) | 512 | if (total) |
413 | ratio = avg * 100 / total; | 513 | ratio = avg * 100 / total; |
414 | 514 | ||
415 | fprintf(stderr, " # %10.3f %% ", ratio); | 515 | fprintf(stderr, " # %10.3f %% ", ratio); |
416 | 516 | ||
417 | } else if (runtime_nsecs_stats.n != 0) { | 517 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
418 | total = avg_stats(&runtime_nsecs_stats); | 518 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
419 | 519 | ||
420 | if (total) | 520 | if (total) |
421 | ratio = 1000.0 * avg / total; | 521 | ratio = 1000.0 * avg / total; |
@@ -426,22 +526,29 @@ static void abs_printout(int counter, double avg) | |||
426 | 526 | ||
427 | /* | 527 | /* |
428 | * Print out the results of a single counter: | 528 | * Print out the results of a single counter: |
529 | * aggregated counts in system-wide mode | ||
429 | */ | 530 | */ |
430 | static void print_counter(int counter) | 531 | static void print_counter_aggr(int counter) |
431 | { | 532 | { |
432 | double avg = avg_stats(&event_res_stats[counter][0]); | 533 | double avg = avg_stats(&event_res_stats[counter][0]); |
433 | int scaled = event_scaled[counter]; | 534 | int scaled = event_scaled[counter]; |
434 | 535 | ||
435 | if (scaled == -1) { | 536 | if (scaled == -1) { |
436 | fprintf(stderr, " %18s %-24s\n", | 537 | fprintf(stderr, "%*s%s%-24s\n", |
437 | "<not counted>", event_name(counter)); | 538 | csv_output ? 0 : 18, |
539 | "<not counted>", csv_sep, event_name(counter)); | ||
438 | return; | 540 | return; |
439 | } | 541 | } |
440 | 542 | ||
441 | if (nsec_counter(counter)) | 543 | if (nsec_counter(counter)) |
442 | nsec_printout(counter, avg); | 544 | nsec_printout(-1, counter, avg); |
443 | else | 545 | else |
444 | abs_printout(counter, avg); | 546 | abs_printout(-1, counter, avg); |
547 | |||
548 | if (csv_output) { | ||
549 | fputc('\n', stderr); | ||
550 | return; | ||
551 | } | ||
445 | 552 | ||
446 | print_noise(counter, avg); | 553 | print_noise(counter, avg); |
447 | 554 | ||
@@ -458,40 +565,91 @@ static void print_counter(int counter) | |||
458 | fprintf(stderr, "\n"); | 565 | fprintf(stderr, "\n"); |
459 | } | 566 | } |
460 | 567 | ||
568 | /* | ||
569 | * Print out the results of a single counter: | ||
570 | * does not use aggregated count in system-wide | ||
571 | */ | ||
572 | static void print_counter(int counter) | ||
573 | { | ||
574 | u64 ena, run, val; | ||
575 | int cpu; | ||
576 | |||
577 | for (cpu = 0; cpu < nr_cpus; cpu++) { | ||
578 | val = cpu_counts[cpu][counter].val; | ||
579 | ena = cpu_counts[cpu][counter].ena; | ||
580 | run = cpu_counts[cpu][counter].run; | ||
581 | if (run == 0 || ena == 0) { | ||
582 | fprintf(stderr, "CPU%*d%s%*s%s%-24s", | ||
583 | csv_output ? 0 : -4, | ||
584 | cpumap[cpu], csv_sep, | ||
585 | csv_output ? 0 : 18, | ||
586 | "<not counted>", csv_sep, | ||
587 | event_name(counter)); | ||
588 | |||
589 | fprintf(stderr, "\n"); | ||
590 | continue; | ||
591 | } | ||
592 | |||
593 | if (nsec_counter(counter)) | ||
594 | nsec_printout(cpu, counter, val); | ||
595 | else | ||
596 | abs_printout(cpu, counter, val); | ||
597 | |||
598 | if (!csv_output) { | ||
599 | print_noise(counter, 1.0); | ||
600 | |||
601 | if (run != ena) { | ||
602 | fprintf(stderr, " (scaled from %.2f%%)", | ||
603 | 100.0 * run / ena); | ||
604 | } | ||
605 | } | ||
606 | fprintf(stderr, "\n"); | ||
607 | } | ||
608 | } | ||
609 | |||
461 | static void print_stat(int argc, const char **argv) | 610 | static void print_stat(int argc, const char **argv) |
462 | { | 611 | { |
463 | int i, counter; | 612 | int i, counter; |
464 | 613 | ||
465 | fflush(stdout); | 614 | fflush(stdout); |
466 | 615 | ||
467 | fprintf(stderr, "\n"); | 616 | if (!csv_output) { |
468 | fprintf(stderr, " Performance counter stats for "); | 617 | fprintf(stderr, "\n"); |
469 | if(target_pid == -1 && target_tid == -1) { | 618 | fprintf(stderr, " Performance counter stats for "); |
470 | fprintf(stderr, "\'%s", argv[0]); | 619 | if(target_pid == -1 && target_tid == -1) { |
471 | for (i = 1; i < argc; i++) | 620 | fprintf(stderr, "\'%s", argv[0]); |
472 | fprintf(stderr, " %s", argv[i]); | 621 | for (i = 1; i < argc; i++) |
473 | } else if (target_pid != -1) | 622 | fprintf(stderr, " %s", argv[i]); |
474 | fprintf(stderr, "process id \'%d", target_pid); | 623 | } else if (target_pid != -1) |
475 | else | 624 | fprintf(stderr, "process id \'%d", target_pid); |
476 | fprintf(stderr, "thread id \'%d", target_tid); | 625 | else |
477 | 626 | fprintf(stderr, "thread id \'%d", target_tid); | |
478 | fprintf(stderr, "\'"); | 627 | |
479 | if (run_count > 1) | 628 | fprintf(stderr, "\'"); |
480 | fprintf(stderr, " (%d runs)", run_count); | 629 | if (run_count > 1) |
481 | fprintf(stderr, ":\n\n"); | 630 | fprintf(stderr, " (%d runs)", run_count); |
631 | fprintf(stderr, ":\n\n"); | ||
632 | } | ||
482 | 633 | ||
483 | for (counter = 0; counter < nr_counters; counter++) | 634 | if (no_aggr) { |
484 | print_counter(counter); | 635 | for (counter = 0; counter < nr_counters; counter++) |
636 | print_counter(counter); | ||
637 | } else { | ||
638 | for (counter = 0; counter < nr_counters; counter++) | ||
639 | print_counter_aggr(counter); | ||
640 | } | ||
485 | 641 | ||
486 | fprintf(stderr, "\n"); | 642 | if (!csv_output) { |
487 | fprintf(stderr, " %18.9f seconds time elapsed", | 643 | fprintf(stderr, "\n"); |
488 | avg_stats(&walltime_nsecs_stats)/1e9); | 644 | fprintf(stderr, " %18.9f seconds time elapsed", |
489 | if (run_count > 1) { | 645 | avg_stats(&walltime_nsecs_stats)/1e9); |
490 | fprintf(stderr, " ( +- %7.3f%% )", | 646 | if (run_count > 1) { |
647 | fprintf(stderr, " ( +- %7.3f%% )", | ||
491 | 100*stddev_stats(&walltime_nsecs_stats) / | 648 | 100*stddev_stats(&walltime_nsecs_stats) / |
492 | avg_stats(&walltime_nsecs_stats)); | 649 | avg_stats(&walltime_nsecs_stats)); |
650 | } | ||
651 | fprintf(stderr, "\n\n"); | ||
493 | } | 652 | } |
494 | fprintf(stderr, "\n\n"); | ||
495 | } | 653 | } |
496 | 654 | ||
497 | static volatile int signr = -1; | 655 | static volatile int signr = -1; |
@@ -521,6 +679,13 @@ static const char * const stat_usage[] = { | |||
521 | NULL | 679 | NULL |
522 | }; | 680 | }; |
523 | 681 | ||
682 | static int stat__set_big_num(const struct option *opt __used, | ||
683 | const char *s __used, int unset) | ||
684 | { | ||
685 | big_num_opt = unset ? 0 : 1; | ||
686 | return 0; | ||
687 | } | ||
688 | |||
524 | static const struct option options[] = { | 689 | static const struct option options[] = { |
525 | OPT_CALLBACK('e', "event", NULL, "event", | 690 | OPT_CALLBACK('e', "event", NULL, "event", |
526 | "event selector. use 'perf list' to list available events", | 691 | "event selector. use 'perf list' to list available events", |
@@ -541,10 +706,15 @@ static const struct option options[] = { | |||
541 | "repeat command and print average + stddev (max: 100)"), | 706 | "repeat command and print average + stddev (max: 100)"), |
542 | OPT_BOOLEAN('n', "null", &null_run, | 707 | OPT_BOOLEAN('n', "null", &null_run, |
543 | "null run - dont start any counters"), | 708 | "null run - dont start any counters"), |
544 | OPT_BOOLEAN('B', "big-num", &big_num, | 709 | OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, |
545 | "print large numbers with thousands\' separators"), | 710 | "print large numbers with thousands\' separators", |
711 | stat__set_big_num), | ||
546 | OPT_STRING('C', "cpu", &cpu_list, "cpu", | 712 | OPT_STRING('C', "cpu", &cpu_list, "cpu", |
547 | "list of cpus to monitor in system-wide"), | 713 | "list of cpus to monitor in system-wide"), |
714 | OPT_BOOLEAN('A', "no-aggr", &no_aggr, | ||
715 | "disable CPU count aggregation"), | ||
716 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | ||
717 | "print counts with custom separator"), | ||
548 | OPT_END() | 718 | OPT_END() |
549 | }; | 719 | }; |
550 | 720 | ||
@@ -557,11 +727,34 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
557 | 727 | ||
558 | argc = parse_options(argc, argv, options, stat_usage, | 728 | argc = parse_options(argc, argv, options, stat_usage, |
559 | PARSE_OPT_STOP_AT_NON_OPTION); | 729 | PARSE_OPT_STOP_AT_NON_OPTION); |
730 | |||
731 | if (csv_sep) | ||
732 | csv_output = true; | ||
733 | else | ||
734 | csv_sep = DEFAULT_SEPARATOR; | ||
735 | |||
736 | /* | ||
737 | * let the spreadsheet do the pretty-printing | ||
738 | */ | ||
739 | if (csv_output) { | ||
740 | /* User explicitely passed -B? */ | ||
741 | if (big_num_opt == 1) { | ||
742 | fprintf(stderr, "-B option not supported with -x\n"); | ||
743 | usage_with_options(stat_usage, options); | ||
744 | } else /* Nope, so disable big number formatting */ | ||
745 | big_num = false; | ||
746 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | ||
747 | big_num = false; | ||
748 | |||
560 | if (!argc && target_pid == -1 && target_tid == -1) | 749 | if (!argc && target_pid == -1 && target_tid == -1) |
561 | usage_with_options(stat_usage, options); | 750 | usage_with_options(stat_usage, options); |
562 | if (run_count <= 0) | 751 | if (run_count <= 0) |
563 | usage_with_options(stat_usage, options); | 752 | usage_with_options(stat_usage, options); |
564 | 753 | ||
754 | /* no_aggr is for system-wide only */ | ||
755 | if (no_aggr && !system_wide) | ||
756 | usage_with_options(stat_usage, options); | ||
757 | |||
565 | /* Set attrs and nr_counters if no event is selected and !null_run */ | 758 | /* Set attrs and nr_counters if no event is selected and !null_run */ |
566 | if (!null_run && !nr_counters) { | 759 | if (!null_run && !nr_counters) { |
567 | memcpy(attrs, default_attrs, sizeof(default_attrs)); | 760 | memcpy(attrs, default_attrs, sizeof(default_attrs)); |
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9bcc38f0b706..d2fc46103f83 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c | |||
@@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS]; | |||
272 | static u64 cpus_pstate_start_times[MAX_CPUS]; | 272 | static u64 cpus_pstate_start_times[MAX_CPUS]; |
273 | static u64 cpus_pstate_state[MAX_CPUS]; | 273 | static u64 cpus_pstate_state[MAX_CPUS]; |
274 | 274 | ||
275 | static int process_comm_event(event_t *event, struct perf_session *session __used) | 275 | static int process_comm_event(event_t *event, struct sample_data *sample __used, |
276 | struct perf_session *session __used) | ||
276 | { | 277 | { |
277 | pid_set_comm(event->comm.tid, event->comm.comm); | 278 | pid_set_comm(event->comm.tid, event->comm.comm); |
278 | return 0; | 279 | return 0; |
279 | } | 280 | } |
280 | 281 | ||
281 | static int process_fork_event(event_t *event, struct perf_session *session __used) | 282 | static int process_fork_event(event_t *event, struct sample_data *sample __used, |
283 | struct perf_session *session __used) | ||
282 | { | 284 | { |
283 | pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); | 285 | pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); |
284 | return 0; | 286 | return 0; |
285 | } | 287 | } |
286 | 288 | ||
287 | static int process_exit_event(event_t *event, struct perf_session *session __used) | 289 | static int process_exit_event(event_t *event, struct sample_data *sample __used, |
290 | struct perf_session *session __used) | ||
288 | { | 291 | { |
289 | pid_exit(event->fork.pid, event->fork.time); | 292 | pid_exit(event->fork.pid, event->fork.time); |
290 | return 0; | 293 | return 0; |
@@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) | |||
470 | } | 473 | } |
471 | 474 | ||
472 | 475 | ||
473 | static int process_sample_event(event_t *event, struct perf_session *session) | 476 | static int process_sample_event(event_t *event __used, |
477 | struct sample_data *sample, | ||
478 | struct perf_session *session) | ||
474 | { | 479 | { |
475 | struct sample_data data; | ||
476 | struct trace_entry *te; | 480 | struct trace_entry *te; |
477 | 481 | ||
478 | memset(&data, 0, sizeof(data)); | ||
479 | |||
480 | event__parse_sample(event, session->sample_type, &data); | ||
481 | |||
482 | if (session->sample_type & PERF_SAMPLE_TIME) { | 482 | if (session->sample_type & PERF_SAMPLE_TIME) { |
483 | if (!first_time || first_time > data.time) | 483 | if (!first_time || first_time > sample->time) |
484 | first_time = data.time; | 484 | first_time = sample->time; |
485 | if (last_time < data.time) | 485 | if (last_time < sample->time) |
486 | last_time = data.time; | 486 | last_time = sample->time; |
487 | } | 487 | } |
488 | 488 | ||
489 | te = (void *)data.raw_data; | 489 | te = (void *)sample->raw_data; |
490 | if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) { | 490 | if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) { |
491 | char *event_str; | 491 | char *event_str; |
492 | struct power_entry *pe; | 492 | struct power_entry *pe; |
493 | 493 | ||
@@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session) | |||
499 | return 0; | 499 | return 0; |
500 | 500 | ||
501 | if (strcmp(event_str, "power:power_start") == 0) | 501 | if (strcmp(event_str, "power:power_start") == 0) |
502 | c_state_start(pe->cpu_id, data.time, pe->value); | 502 | c_state_start(pe->cpu_id, sample->time, pe->value); |
503 | 503 | ||
504 | if (strcmp(event_str, "power:power_end") == 0) | 504 | if (strcmp(event_str, "power:power_end") == 0) |
505 | c_state_end(pe->cpu_id, data.time); | 505 | c_state_end(pe->cpu_id, sample->time); |
506 | 506 | ||
507 | if (strcmp(event_str, "power:power_frequency") == 0) | 507 | if (strcmp(event_str, "power:power_frequency") == 0) |
508 | p_state_change(pe->cpu_id, data.time, pe->value); | 508 | p_state_change(pe->cpu_id, sample->time, pe->value); |
509 | 509 | ||
510 | if (strcmp(event_str, "sched:sched_wakeup") == 0) | 510 | if (strcmp(event_str, "sched:sched_wakeup") == 0) |
511 | sched_wakeup(data.cpu, data.time, data.pid, te); | 511 | sched_wakeup(sample->cpu, sample->time, sample->pid, te); |
512 | 512 | ||
513 | if (strcmp(event_str, "sched:sched_switch") == 0) | 513 | if (strcmp(event_str, "sched:sched_switch") == 0) |
514 | sched_switch(data.cpu, data.time, te); | 514 | sched_switch(sample->cpu, sample->time, te); |
515 | } | 515 | } |
516 | return 0; | 516 | return 0; |
517 | } | 517 | } |
@@ -989,6 +989,9 @@ static int __cmd_record(int argc, const char **argv) | |||
989 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | 989 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; |
990 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | 990 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
991 | 991 | ||
992 | if (rec_argv == NULL) | ||
993 | return -ENOMEM; | ||
994 | |||
992 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | 995 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
993 | rec_argv[i] = strdup(record_args[i]); | 996 | rec_argv[i] = strdup(record_args[i]); |
994 | 997 | ||
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index dd625808c2a5..0515ce9d3d3e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym) | |||
977 | } | 977 | } |
978 | 978 | ||
979 | static void event__process_sample(const event_t *self, | 979 | static void event__process_sample(const event_t *self, |
980 | struct perf_session *session, int counter) | 980 | struct sample_data *sample, |
981 | struct perf_session *session, int counter) | ||
981 | { | 982 | { |
982 | u64 ip = self->ip.ip; | 983 | u64 ip = self->ip.ip; |
983 | struct sym_entry *syme; | 984 | struct sym_entry *syme; |
984 | struct addr_location al; | 985 | struct addr_location al; |
985 | struct sample_data data; | ||
986 | struct machine *machine; | 986 | struct machine *machine; |
987 | u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 987 | u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
988 | 988 | ||
@@ -1025,7 +1025,7 @@ static void event__process_sample(const event_t *self, | |||
1025 | if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) | 1025 | if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) |
1026 | exact_samples++; | 1026 | exact_samples++; |
1027 | 1027 | ||
1028 | if (event__preprocess_sample(self, session, &al, &data, | 1028 | if (event__preprocess_sample(self, session, &al, sample, |
1029 | symbol_filter) < 0 || | 1029 | symbol_filter) < 0 || |
1030 | al.filtered) | 1030 | al.filtered) |
1031 | return; | 1031 | return; |
@@ -1105,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self, | |||
1105 | unsigned int head = mmap_read_head(md); | 1105 | unsigned int head = mmap_read_head(md); |
1106 | unsigned int old = md->prev; | 1106 | unsigned int old = md->prev; |
1107 | unsigned char *data = md->base + page_size; | 1107 | unsigned char *data = md->base + page_size; |
1108 | struct sample_data sample; | ||
1108 | int diff; | 1109 | int diff; |
1109 | 1110 | ||
1110 | /* | 1111 | /* |
@@ -1152,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self, | |||
1152 | event = &event_copy; | 1153 | event = &event_copy; |
1153 | } | 1154 | } |
1154 | 1155 | ||
1156 | event__parse_sample(event, self, &sample); | ||
1155 | if (event->header.type == PERF_RECORD_SAMPLE) | 1157 | if (event->header.type == PERF_RECORD_SAMPLE) |
1156 | event__process_sample(event, self, md->counter); | 1158 | event__process_sample(event, &sample, self, md->counter); |
1157 | else | 1159 | else |
1158 | event__process(event, self); | 1160 | event__process(event, &sample, self); |
1159 | old += size; | 1161 | old += size; |
1160 | } | 1162 | } |
1161 | 1163 | ||
@@ -1214,7 +1216,9 @@ try_again: | |||
1214 | int err = errno; | 1216 | int err = errno; |
1215 | 1217 | ||
1216 | if (err == EPERM || err == EACCES) | 1218 | if (err == EPERM || err == EACCES) |
1217 | die("No permission - are you root?\n"); | 1219 | die("Permission error - are you root?\n" |
1220 | "\t Consider tweaking" | ||
1221 | " /proc/sys/kernel/perf_event_paranoid.\n"); | ||
1218 | /* | 1222 | /* |
1219 | * If it's cycles then fall back to hrtimer | 1223 | * If it's cycles then fall back to hrtimer |
1220 | * based cpu-clock-tick sw counter, which | 1224 | * based cpu-clock-tick sw counter, which |
@@ -1231,7 +1235,7 @@ try_again: | |||
1231 | goto try_again; | 1235 | goto try_again; |
1232 | } | 1236 | } |
1233 | printf("\n"); | 1237 | printf("\n"); |
1234 | error("perfcounter syscall returned with %d (%s)\n", | 1238 | error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", |
1235 | fd[i][counter][thread_index], strerror(err)); | 1239 | fd[i][counter][thread_index], strerror(err)); |
1236 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | 1240 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
1237 | exit(-1); | 1241 | exit(-1); |
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 921245b28583..c7798c7f24ed 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h | |||
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix); | |||
27 | extern int cmd_stat(int argc, const char **argv, const char *prefix); | 27 | extern int cmd_stat(int argc, const char **argv, const char *prefix); |
28 | extern int cmd_timechart(int argc, const char **argv, const char *prefix); | 28 | extern int cmd_timechart(int argc, const char **argv, const char *prefix); |
29 | extern int cmd_top(int argc, const char **argv, const char *prefix); | 29 | extern int cmd_top(int argc, const char **argv, const char *prefix); |
30 | extern int cmd_trace(int argc, const char **argv, const char *prefix); | 30 | extern int cmd_script(int argc, const char **argv, const char *prefix); |
31 | extern int cmd_version(int argc, const char **argv, const char *prefix); | 31 | extern int cmd_version(int argc, const char **argv, const char *prefix); |
32 | extern int cmd_probe(int argc, const char **argv, const char *prefix); | 32 | extern int cmd_probe(int argc, const char **argv, const char *prefix); |
33 | extern int cmd_kmem(int argc, const char **argv, const char *prefix); | 33 | extern int cmd_kmem(int argc, const char **argv, const char *prefix); |
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 949d77fc0b97..16b5088cf8f4 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt | |||
@@ -16,7 +16,7 @@ perf-report mainporcelain common | |||
16 | perf-stat mainporcelain common | 16 | perf-stat mainporcelain common |
17 | perf-timechart mainporcelain common | 17 | perf-timechart mainporcelain common |
18 | perf-top mainporcelain common | 18 | perf-top mainporcelain common |
19 | perf-trace mainporcelain common | 19 | perf-script mainporcelain common |
20 | perf-probe mainporcelain common | 20 | perf-probe mainporcelain common |
21 | perf-kmem mainporcelain common | 21 | perf-kmem mainporcelain common |
22 | perf-lock mainporcelain common | 22 | perf-lock mainporcelain common |
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index b253db634f04..b041ca67a2cb 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak | |||
@@ -9,8 +9,8 @@ endef | |||
9 | ifndef NO_DWARF | 9 | ifndef NO_DWARF |
10 | define SOURCE_DWARF | 10 | define SOURCE_DWARF |
11 | #include <dwarf.h> | 11 | #include <dwarf.h> |
12 | #include <libdw.h> | 12 | #include <elfutils/libdw.h> |
13 | #include <version.h> | 13 | #include <elfutils/version.h> |
14 | #ifndef _ELFUTILS_PREREQ | 14 | #ifndef _ELFUTILS_PREREQ |
15 | #error | 15 | #error |
16 | #endif | 16 | #endif |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index cdd6c03f1e14..595d0f4a7103 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -323,7 +323,7 @@ static void handle_internal_command(int argc, const char **argv) | |||
323 | { "top", cmd_top, 0 }, | 323 | { "top", cmd_top, 0 }, |
324 | { "annotate", cmd_annotate, 0 }, | 324 | { "annotate", cmd_annotate, 0 }, |
325 | { "version", cmd_version, 0 }, | 325 | { "version", cmd_version, 0 }, |
326 | { "trace", cmd_trace, 0 }, | 326 | { "script", cmd_script, 0 }, |
327 | { "sched", cmd_sched, 0 }, | 327 | { "sched", cmd_sched, 0 }, |
328 | { "probe", cmd_probe, 0 }, | 328 | { "probe", cmd_probe, 0 }, |
329 | { "kmem", cmd_kmem, 0 }, | 329 | { "kmem", cmd_kmem, 0 }, |
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 957085dd5d8d..315067b8f552 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Context.c. Python interfaces for perf trace. | 2 | * Context.c. Python interfaces for perf script. |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> | 4 | * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> |
5 | * | 5 | * |
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e437edb72417..deffb8c96071 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c | |||
@@ -14,7 +14,9 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include "debug.h" | 15 | #include "debug.h" |
16 | 16 | ||
17 | static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) | 17 | static int build_id__mark_dso_hit(event_t *event, |
18 | struct sample_data *sample __used, | ||
19 | struct perf_session *session) | ||
18 | { | 20 | { |
19 | struct addr_location al; | 21 | struct addr_location al; |
20 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 22 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) | |||
35 | return 0; | 37 | return 0; |
36 | } | 38 | } |
37 | 39 | ||
38 | static int event__exit_del_thread(event_t *self, struct perf_session *session) | 40 | static int event__exit_del_thread(event_t *self, struct sample_data *sample __used, |
41 | struct perf_session *session) | ||
39 | { | 42 | { |
40 | struct thread *thread = perf_session__findnew(session, self->fork.tid); | 43 | struct thread *thread = perf_session__findnew(session, self->fork.tid); |
41 | 44 | ||
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index c8d81b00089d..01bbe8ecec3f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c | |||
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...) | |||
46 | return ret; | 46 | return ret; |
47 | } | 47 | } |
48 | 48 | ||
49 | static int dump_printf_color(const char *fmt, const char *color, ...) | 49 | #ifdef NO_NEWT_SUPPORT |
50 | void ui__warning(const char *format, ...) | ||
50 | { | 51 | { |
51 | va_list args; | 52 | va_list args; |
52 | int ret = 0; | ||
53 | 53 | ||
54 | if (dump_trace) { | 54 | va_start(args, format); |
55 | va_start(args, color); | 55 | vfprintf(stderr, format, args); |
56 | ret = color_vfprintf(stdout, color, fmt, args); | 56 | va_end(args); |
57 | va_end(args); | ||
58 | } | ||
59 | |||
60 | return ret; | ||
61 | } | 57 | } |
62 | 58 | #endif | |
63 | 59 | ||
64 | void trace_event(event_t *event) | 60 | void trace_event(event_t *event) |
65 | { | 61 | { |
@@ -70,29 +66,29 @@ void trace_event(event_t *event) | |||
70 | if (!dump_trace) | 66 | if (!dump_trace) |
71 | return; | 67 | return; |
72 | 68 | ||
73 | dump_printf("."); | 69 | printf("."); |
74 | dump_printf_color("\n. ... raw event: size %d bytes\n", color, | 70 | color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", |
75 | event->header.size); | 71 | event->header.size); |
76 | 72 | ||
77 | for (i = 0; i < event->header.size; i++) { | 73 | for (i = 0; i < event->header.size; i++) { |
78 | if ((i & 15) == 0) { | 74 | if ((i & 15) == 0) { |
79 | dump_printf("."); | 75 | printf("."); |
80 | dump_printf_color(" %04x: ", color, i); | 76 | color_fprintf(stdout, color, " %04x: ", i); |
81 | } | 77 | } |
82 | 78 | ||
83 | dump_printf_color(" %02x", color, raw_event[i]); | 79 | color_fprintf(stdout, color, " %02x", raw_event[i]); |
84 | 80 | ||
85 | if (((i & 15) == 15) || i == event->header.size-1) { | 81 | if (((i & 15) == 15) || i == event->header.size-1) { |
86 | dump_printf_color(" ", color); | 82 | color_fprintf(stdout, color, " "); |
87 | for (j = 0; j < 15-(i & 15); j++) | 83 | for (j = 0; j < 15-(i & 15); j++) |
88 | dump_printf_color(" ", color); | 84 | color_fprintf(stdout, color, " "); |
89 | for (j = i & ~15; j <= i; j++) { | 85 | for (j = i & ~15; j <= i; j++) { |
90 | dump_printf_color("%c", color, | 86 | color_fprintf(stdout, color, "%c", |
91 | isprint(raw_event[j]) ? | 87 | isprint(raw_event[j]) ? |
92 | raw_event[j] : '.'); | 88 | raw_event[j] : '.'); |
93 | } | 89 | } |
94 | dump_printf_color("\n", color); | 90 | color_fprintf(stdout, color, "\n"); |
95 | } | 91 | } |
96 | } | 92 | } |
97 | dump_printf(".\n"); | 93 | printf(".\n"); |
98 | } | 94 | } |
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 7b514082bbaf..ca35fd66b5df 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h | |||
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap); | |||
35 | #include "ui/progress.h" | 35 | #include "ui/progress.h" |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); | ||
39 | |||
38 | #endif /* __PERF_DEBUG_H */ | 40 | #endif /* __PERF_DEBUG_H */ |
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dab9e754a281..183aedd4db83 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include "strlist.h" | 7 | #include "strlist.h" |
8 | #include "thread.h" | 8 | #include "thread.h" |
9 | 9 | ||
10 | const char *event__name[] = { | 10 | static const char *event__name[] = { |
11 | [0] = "TOTAL", | 11 | [0] = "TOTAL", |
12 | [PERF_RECORD_MMAP] = "MMAP", | 12 | [PERF_RECORD_MMAP] = "MMAP", |
13 | [PERF_RECORD_LOST] = "LOST", | 13 | [PERF_RECORD_LOST] = "LOST", |
@@ -22,13 +22,31 @@ const char *event__name[] = { | |||
22 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", | 22 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", |
23 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", | 23 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", |
24 | [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", | 24 | [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", |
25 | [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", | ||
25 | }; | 26 | }; |
26 | 27 | ||
27 | static pid_t event__synthesize_comm(pid_t pid, int full, | 28 | const char *event__get_event_name(unsigned int id) |
29 | { | ||
30 | if (id >= ARRAY_SIZE(event__name)) | ||
31 | return "INVALID"; | ||
32 | if (!event__name[id]) | ||
33 | return "UNKNOWN"; | ||
34 | return event__name[id]; | ||
35 | } | ||
36 | |||
37 | static struct sample_data synth_sample = { | ||
38 | .pid = -1, | ||
39 | .tid = -1, | ||
40 | .time = -1, | ||
41 | .stream_id = -1, | ||
42 | .cpu = -1, | ||
43 | .period = 1, | ||
44 | }; | ||
45 | |||
46 | static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full, | ||
28 | event__handler_t process, | 47 | event__handler_t process, |
29 | struct perf_session *session) | 48 | struct perf_session *session) |
30 | { | 49 | { |
31 | event_t ev; | ||
32 | char filename[PATH_MAX]; | 50 | char filename[PATH_MAX]; |
33 | char bf[BUFSIZ]; | 51 | char bf[BUFSIZ]; |
34 | FILE *fp; | 52 | FILE *fp; |
@@ -49,34 +67,39 @@ out_race: | |||
49 | return 0; | 67 | return 0; |
50 | } | 68 | } |
51 | 69 | ||
52 | memset(&ev.comm, 0, sizeof(ev.comm)); | 70 | memset(&event->comm, 0, sizeof(event->comm)); |
53 | while (!ev.comm.comm[0] || !ev.comm.pid) { | 71 | |
54 | if (fgets(bf, sizeof(bf), fp) == NULL) | 72 | while (!event->comm.comm[0] || !event->comm.pid) { |
55 | goto out_failure; | 73 | if (fgets(bf, sizeof(bf), fp) == NULL) { |
74 | pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); | ||
75 | goto out; | ||
76 | } | ||
56 | 77 | ||
57 | if (memcmp(bf, "Name:", 5) == 0) { | 78 | if (memcmp(bf, "Name:", 5) == 0) { |
58 | char *name = bf + 5; | 79 | char *name = bf + 5; |
59 | while (*name && isspace(*name)) | 80 | while (*name && isspace(*name)) |
60 | ++name; | 81 | ++name; |
61 | size = strlen(name) - 1; | 82 | size = strlen(name) - 1; |
62 | memcpy(ev.comm.comm, name, size++); | 83 | memcpy(event->comm.comm, name, size++); |
63 | } else if (memcmp(bf, "Tgid:", 5) == 0) { | 84 | } else if (memcmp(bf, "Tgid:", 5) == 0) { |
64 | char *tgids = bf + 5; | 85 | char *tgids = bf + 5; |
65 | while (*tgids && isspace(*tgids)) | 86 | while (*tgids && isspace(*tgids)) |
66 | ++tgids; | 87 | ++tgids; |
67 | tgid = ev.comm.pid = atoi(tgids); | 88 | tgid = event->comm.pid = atoi(tgids); |
68 | } | 89 | } |
69 | } | 90 | } |
70 | 91 | ||
71 | ev.comm.header.type = PERF_RECORD_COMM; | 92 | event->comm.header.type = PERF_RECORD_COMM; |
72 | size = ALIGN(size, sizeof(u64)); | 93 | size = ALIGN(size, sizeof(u64)); |
73 | ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); | 94 | memset(event->comm.comm + size, 0, session->id_hdr_size); |
74 | 95 | event->comm.header.size = (sizeof(event->comm) - | |
96 | (sizeof(event->comm.comm) - size) + | ||
97 | session->id_hdr_size); | ||
75 | if (!full) { | 98 | if (!full) { |
76 | ev.comm.tid = pid; | 99 | event->comm.tid = pid; |
77 | 100 | ||
78 | process(&ev, session); | 101 | process(event, &synth_sample, session); |
79 | goto out_fclose; | 102 | goto out; |
80 | } | 103 | } |
81 | 104 | ||
82 | snprintf(filename, sizeof(filename), "/proc/%d/task", pid); | 105 | snprintf(filename, sizeof(filename), "/proc/%d/task", pid); |
@@ -91,22 +114,19 @@ out_race: | |||
91 | if (*end) | 114 | if (*end) |
92 | continue; | 115 | continue; |
93 | 116 | ||
94 | ev.comm.tid = pid; | 117 | event->comm.tid = pid; |
95 | 118 | ||
96 | process(&ev, session); | 119 | process(event, &synth_sample, session); |
97 | } | 120 | } |
98 | closedir(tasks); | ||
99 | 121 | ||
100 | out_fclose: | 122 | closedir(tasks); |
123 | out: | ||
101 | fclose(fp); | 124 | fclose(fp); |
102 | return tgid; | ||
103 | 125 | ||
104 | out_failure: | 126 | return tgid; |
105 | pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); | ||
106 | return -1; | ||
107 | } | 127 | } |
108 | 128 | ||
109 | static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, | 129 | static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, |
110 | event__handler_t process, | 130 | event__handler_t process, |
111 | struct perf_session *session) | 131 | struct perf_session *session) |
112 | { | 132 | { |
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, | |||
124 | return -1; | 144 | return -1; |
125 | } | 145 | } |
126 | 146 | ||
147 | event->header.type = PERF_RECORD_MMAP; | ||
148 | /* | ||
149 | * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c | ||
150 | */ | ||
151 | event->header.misc = PERF_RECORD_MISC_USER; | ||
152 | |||
127 | while (1) { | 153 | while (1) { |
128 | char bf[BUFSIZ], *pbf = bf; | 154 | char bf[BUFSIZ], *pbf = bf; |
129 | event_t ev = { | ||
130 | .header = { | ||
131 | .type = PERF_RECORD_MMAP, | ||
132 | /* | ||
133 | * Just like the kernel, see __perf_event_mmap | ||
134 | * in kernel/perf_event.c | ||
135 | */ | ||
136 | .misc = PERF_RECORD_MISC_USER, | ||
137 | }, | ||
138 | }; | ||
139 | int n; | 155 | int n; |
140 | size_t size; | 156 | size_t size; |
141 | if (fgets(bf, sizeof(bf), fp) == NULL) | 157 | if (fgets(bf, sizeof(bf), fp) == NULL) |
142 | break; | 158 | break; |
143 | 159 | ||
144 | /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ | 160 | /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ |
145 | n = hex2u64(pbf, &ev.mmap.start); | 161 | n = hex2u64(pbf, &event->mmap.start); |
146 | if (n < 0) | 162 | if (n < 0) |
147 | continue; | 163 | continue; |
148 | pbf += n + 1; | 164 | pbf += n + 1; |
149 | n = hex2u64(pbf, &ev.mmap.len); | 165 | n = hex2u64(pbf, &event->mmap.len); |
150 | if (n < 0) | 166 | if (n < 0) |
151 | continue; | 167 | continue; |
152 | pbf += n + 3; | 168 | pbf += n + 3; |
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, | |||
161 | continue; | 177 | continue; |
162 | 178 | ||
163 | pbf += 3; | 179 | pbf += 3; |
164 | n = hex2u64(pbf, &ev.mmap.pgoff); | 180 | n = hex2u64(pbf, &event->mmap.pgoff); |
165 | 181 | ||
166 | size = strlen(execname); | 182 | size = strlen(execname); |
167 | execname[size - 1] = '\0'; /* Remove \n */ | 183 | execname[size - 1] = '\0'; /* Remove \n */ |
168 | memcpy(ev.mmap.filename, execname, size); | 184 | memcpy(event->mmap.filename, execname, size); |
169 | size = ALIGN(size, sizeof(u64)); | 185 | size = ALIGN(size, sizeof(u64)); |
170 | ev.mmap.len -= ev.mmap.start; | 186 | event->mmap.len -= event->mmap.start; |
171 | ev.mmap.header.size = (sizeof(ev.mmap) - | 187 | event->mmap.header.size = (sizeof(event->mmap) - |
172 | (sizeof(ev.mmap.filename) - size)); | 188 | (sizeof(event->mmap.filename) - size)); |
173 | ev.mmap.pid = tgid; | 189 | memset(event->mmap.filename + size, 0, session->id_hdr_size); |
174 | ev.mmap.tid = pid; | 190 | event->mmap.header.size += session->id_hdr_size; |
175 | 191 | event->mmap.pid = tgid; | |
176 | process(&ev, session); | 192 | event->mmap.tid = pid; |
193 | |||
194 | process(event, &synth_sample, session); | ||
177 | } | 195 | } |
178 | } | 196 | } |
179 | 197 | ||
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process, | |||
187 | { | 205 | { |
188 | struct rb_node *nd; | 206 | struct rb_node *nd; |
189 | struct map_groups *kmaps = &machine->kmaps; | 207 | struct map_groups *kmaps = &machine->kmaps; |
190 | u16 misc; | 208 | event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); |
209 | |||
210 | if (event == NULL) { | ||
211 | pr_debug("Not enough memory synthesizing mmap event " | ||
212 | "for kernel modules\n"); | ||
213 | return -1; | ||
214 | } | ||
215 | |||
216 | event->header.type = PERF_RECORD_MMAP; | ||
191 | 217 | ||
192 | /* | 218 | /* |
193 | * kernel uses 0 for user space maps, see kernel/perf_event.c | 219 | * kernel uses 0 for user space maps, see kernel/perf_event.c |
194 | * __perf_event_mmap | 220 | * __perf_event_mmap |
195 | */ | 221 | */ |
196 | if (machine__is_host(machine)) | 222 | if (machine__is_host(machine)) |
197 | misc = PERF_RECORD_MISC_KERNEL; | 223 | event->header.misc = PERF_RECORD_MISC_KERNEL; |
198 | else | 224 | else |
199 | misc = PERF_RECORD_MISC_GUEST_KERNEL; | 225 | event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; |
200 | 226 | ||
201 | for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); | 227 | for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); |
202 | nd; nd = rb_next(nd)) { | 228 | nd; nd = rb_next(nd)) { |
203 | event_t ev; | ||
204 | size_t size; | 229 | size_t size; |
205 | struct map *pos = rb_entry(nd, struct map, rb_node); | 230 | struct map *pos = rb_entry(nd, struct map, rb_node); |
206 | 231 | ||
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process, | |||
208 | continue; | 233 | continue; |
209 | 234 | ||
210 | size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); | 235 | size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); |
211 | memset(&ev, 0, sizeof(ev)); | 236 | event->mmap.header.type = PERF_RECORD_MMAP; |
212 | ev.mmap.header.misc = misc; | 237 | event->mmap.header.size = (sizeof(event->mmap) - |
213 | ev.mmap.header.type = PERF_RECORD_MMAP; | 238 | (sizeof(event->mmap.filename) - size)); |
214 | ev.mmap.header.size = (sizeof(ev.mmap) - | 239 | memset(event->mmap.filename + size, 0, session->id_hdr_size); |
215 | (sizeof(ev.mmap.filename) - size)); | 240 | event->mmap.header.size += session->id_hdr_size; |
216 | ev.mmap.start = pos->start; | 241 | event->mmap.start = pos->start; |
217 | ev.mmap.len = pos->end - pos->start; | 242 | event->mmap.len = pos->end - pos->start; |
218 | ev.mmap.pid = machine->pid; | 243 | event->mmap.pid = machine->pid; |
219 | 244 | ||
220 | memcpy(ev.mmap.filename, pos->dso->long_name, | 245 | memcpy(event->mmap.filename, pos->dso->long_name, |
221 | pos->dso->long_name_len + 1); | 246 | pos->dso->long_name_len + 1); |
222 | process(&ev, session); | 247 | process(event, &synth_sample, session); |
223 | } | 248 | } |
224 | 249 | ||
250 | free(event); | ||
225 | return 0; | 251 | return 0; |
226 | } | 252 | } |
227 | 253 | ||
228 | int event__synthesize_thread(pid_t pid, event__handler_t process, | 254 | static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, |
229 | struct perf_session *session) | 255 | pid_t pid, event__handler_t process, |
256 | struct perf_session *session) | ||
230 | { | 257 | { |
231 | pid_t tgid = event__synthesize_comm(pid, 1, process, session); | 258 | pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process, |
259 | session); | ||
232 | if (tgid == -1) | 260 | if (tgid == -1) |
233 | return -1; | 261 | return -1; |
234 | return event__synthesize_mmap_events(pid, tgid, process, session); | 262 | return event__synthesize_mmap_events(mmap_event, pid, tgid, |
263 | process, session); | ||
264 | } | ||
265 | |||
266 | int event__synthesize_thread(pid_t pid, event__handler_t process, | ||
267 | struct perf_session *session) | ||
268 | { | ||
269 | event_t *comm_event, *mmap_event; | ||
270 | int err = -1; | ||
271 | |||
272 | comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); | ||
273 | if (comm_event == NULL) | ||
274 | goto out; | ||
275 | |||
276 | mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); | ||
277 | if (mmap_event == NULL) | ||
278 | goto out_free_comm; | ||
279 | |||
280 | err = __event__synthesize_thread(comm_event, mmap_event, pid, | ||
281 | process, session); | ||
282 | free(mmap_event); | ||
283 | out_free_comm: | ||
284 | free(comm_event); | ||
285 | out: | ||
286 | return err; | ||
235 | } | 287 | } |
236 | 288 | ||
237 | void event__synthesize_threads(event__handler_t process, | 289 | int event__synthesize_threads(event__handler_t process, |
238 | struct perf_session *session) | 290 | struct perf_session *session) |
239 | { | 291 | { |
240 | DIR *proc; | 292 | DIR *proc; |
241 | struct dirent dirent, *next; | 293 | struct dirent dirent, *next; |
294 | event_t *comm_event, *mmap_event; | ||
295 | int err = -1; | ||
296 | |||
297 | comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); | ||
298 | if (comm_event == NULL) | ||
299 | goto out; | ||
300 | |||
301 | mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); | ||
302 | if (mmap_event == NULL) | ||
303 | goto out_free_comm; | ||
242 | 304 | ||
243 | proc = opendir("/proc"); | 305 | proc = opendir("/proc"); |
306 | if (proc == NULL) | ||
307 | goto out_free_mmap; | ||
244 | 308 | ||
245 | while (!readdir_r(proc, &dirent, &next) && next) { | 309 | while (!readdir_r(proc, &dirent, &next) && next) { |
246 | char *end; | 310 | char *end; |
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process, | |||
249 | if (*end) /* only interested in proper numerical dirents */ | 313 | if (*end) /* only interested in proper numerical dirents */ |
250 | continue; | 314 | continue; |
251 | 315 | ||
252 | event__synthesize_thread(pid, process, session); | 316 | __event__synthesize_thread(comm_event, mmap_event, pid, |
317 | process, session); | ||
253 | } | 318 | } |
254 | 319 | ||
255 | closedir(proc); | 320 | closedir(proc); |
321 | err = 0; | ||
322 | out_free_mmap: | ||
323 | free(mmap_event); | ||
324 | out_free_comm: | ||
325 | free(comm_event); | ||
326 | out: | ||
327 | return err; | ||
256 | } | 328 | } |
257 | 329 | ||
258 | struct process_symbol_args { | 330 | struct process_symbol_args { |
@@ -286,18 +358,20 @@ int event__synthesize_kernel_mmap(event__handler_t process, | |||
286 | char path[PATH_MAX]; | 358 | char path[PATH_MAX]; |
287 | char name_buff[PATH_MAX]; | 359 | char name_buff[PATH_MAX]; |
288 | struct map *map; | 360 | struct map *map; |
289 | 361 | int err; | |
290 | event_t ev = { | ||
291 | .header = { | ||
292 | .type = PERF_RECORD_MMAP, | ||
293 | }, | ||
294 | }; | ||
295 | /* | 362 | /* |
296 | * We should get this from /sys/kernel/sections/.text, but till that is | 363 | * We should get this from /sys/kernel/sections/.text, but till that is |
297 | * available use this, and after it is use this as a fallback for older | 364 | * available use this, and after it is use this as a fallback for older |
298 | * kernels. | 365 | * kernels. |
299 | */ | 366 | */ |
300 | struct process_symbol_args args = { .name = symbol_name, }; | 367 | struct process_symbol_args args = { .name = symbol_name, }; |
368 | event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); | ||
369 | |||
370 | if (event == NULL) { | ||
371 | pr_debug("Not enough memory synthesizing mmap event " | ||
372 | "for kernel modules\n"); | ||
373 | return -1; | ||
374 | } | ||
301 | 375 | ||
302 | mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); | 376 | mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); |
303 | if (machine__is_host(machine)) { | 377 | if (machine__is_host(machine)) { |
@@ -305,10 +379,10 @@ int event__synthesize_kernel_mmap(event__handler_t process, | |||
305 | * kernel uses PERF_RECORD_MISC_USER for user space maps, | 379 | * kernel uses PERF_RECORD_MISC_USER for user space maps, |
306 | * see kernel/perf_event.c __perf_event_mmap | 380 | * see kernel/perf_event.c __perf_event_mmap |
307 | */ | 381 | */ |
308 | ev.header.misc = PERF_RECORD_MISC_KERNEL; | 382 | event->header.misc = PERF_RECORD_MISC_KERNEL; |
309 | filename = "/proc/kallsyms"; | 383 | filename = "/proc/kallsyms"; |
310 | } else { | 384 | } else { |
311 | ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL; | 385 | event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; |
312 | if (machine__is_default_guest(machine)) | 386 | if (machine__is_default_guest(machine)) |
313 | filename = (char *) symbol_conf.default_guest_kallsyms; | 387 | filename = (char *) symbol_conf.default_guest_kallsyms; |
314 | else { | 388 | else { |
@@ -321,17 +395,21 @@ int event__synthesize_kernel_mmap(event__handler_t process, | |||
321 | return -ENOENT; | 395 | return -ENOENT; |
322 | 396 | ||
323 | map = machine->vmlinux_maps[MAP__FUNCTION]; | 397 | map = machine->vmlinux_maps[MAP__FUNCTION]; |
324 | size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), | 398 | size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), |
325 | "%s%s", mmap_name, symbol_name) + 1; | 399 | "%s%s", mmap_name, symbol_name) + 1; |
326 | size = ALIGN(size, sizeof(u64)); | 400 | size = ALIGN(size, sizeof(u64)); |
327 | ev.mmap.header.size = (sizeof(ev.mmap) - | 401 | event->mmap.header.type = PERF_RECORD_MMAP; |
328 | (sizeof(ev.mmap.filename) - size)); | 402 | event->mmap.header.size = (sizeof(event->mmap) - |
329 | ev.mmap.pgoff = args.start; | 403 | (sizeof(event->mmap.filename) - size) + session->id_hdr_size); |
330 | ev.mmap.start = map->start; | 404 | event->mmap.pgoff = args.start; |
331 | ev.mmap.len = map->end - ev.mmap.start; | 405 | event->mmap.start = map->start; |
332 | ev.mmap.pid = machine->pid; | 406 | event->mmap.len = map->end - event->mmap.start; |
333 | 407 | event->mmap.pid = machine->pid; | |
334 | return process(&ev, session); | 408 | |
409 | err = process(event, &synth_sample, session); | ||
410 | free(event); | ||
411 | |||
412 | return err; | ||
335 | } | 413 | } |
336 | 414 | ||
337 | static void thread__comm_adjust(struct thread *self, struct hists *hists) | 415 | static void thread__comm_adjust(struct thread *self, struct hists *hists) |
@@ -361,7 +439,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm, | |||
361 | return 0; | 439 | return 0; |
362 | } | 440 | } |
363 | 441 | ||
364 | int event__process_comm(event_t *self, struct perf_session *session) | 442 | int event__process_comm(event_t *self, struct sample_data *sample __used, |
443 | struct perf_session *session) | ||
365 | { | 444 | { |
366 | struct thread *thread = perf_session__findnew(session, self->comm.tid); | 445 | struct thread *thread = perf_session__findnew(session, self->comm.tid); |
367 | 446 | ||
@@ -376,7 +455,8 @@ int event__process_comm(event_t *self, struct perf_session *session) | |||
376 | return 0; | 455 | return 0; |
377 | } | 456 | } |
378 | 457 | ||
379 | int event__process_lost(event_t *self, struct perf_session *session) | 458 | int event__process_lost(event_t *self, struct sample_data *sample __used, |
459 | struct perf_session *session) | ||
380 | { | 460 | { |
381 | dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); | 461 | dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); |
382 | session->hists.stats.total_lost += self->lost.lost; | 462 | session->hists.stats.total_lost += self->lost.lost; |
@@ -392,7 +472,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self) | |||
392 | * a zero sized synthesized MMAP event for the kernel. | 472 | * a zero sized synthesized MMAP event for the kernel. |
393 | */ | 473 | */ |
394 | if (maps[MAP__FUNCTION]->end == 0) | 474 | if (maps[MAP__FUNCTION]->end == 0) |
395 | maps[MAP__FUNCTION]->end = ~0UL; | 475 | maps[MAP__FUNCTION]->end = ~0ULL; |
396 | } | 476 | } |
397 | 477 | ||
398 | static int event__process_kernel_mmap(event_t *self, | 478 | static int event__process_kernel_mmap(event_t *self, |
@@ -485,7 +565,8 @@ out_problem: | |||
485 | return -1; | 565 | return -1; |
486 | } | 566 | } |
487 | 567 | ||
488 | int event__process_mmap(event_t *self, struct perf_session *session) | 568 | int event__process_mmap(event_t *self, struct sample_data *sample __used, |
569 | struct perf_session *session) | ||
489 | { | 570 | { |
490 | struct machine *machine; | 571 | struct machine *machine; |
491 | struct thread *thread; | 572 | struct thread *thread; |
@@ -526,7 +607,8 @@ out_problem: | |||
526 | return 0; | 607 | return 0; |
527 | } | 608 | } |
528 | 609 | ||
529 | int event__process_task(event_t *self, struct perf_session *session) | 610 | int event__process_task(event_t *self, struct sample_data *sample __used, |
611 | struct perf_session *session) | ||
530 | { | 612 | { |
531 | struct thread *thread = perf_session__findnew(session, self->fork.tid); | 613 | struct thread *thread = perf_session__findnew(session, self->fork.tid); |
532 | struct thread *parent = perf_session__findnew(session, self->fork.ptid); | 614 | struct thread *parent = perf_session__findnew(session, self->fork.ptid); |
@@ -548,18 +630,19 @@ int event__process_task(event_t *self, struct perf_session *session) | |||
548 | return 0; | 630 | return 0; |
549 | } | 631 | } |
550 | 632 | ||
551 | int event__process(event_t *event, struct perf_session *session) | 633 | int event__process(event_t *event, struct sample_data *sample, |
634 | struct perf_session *session) | ||
552 | { | 635 | { |
553 | switch (event->header.type) { | 636 | switch (event->header.type) { |
554 | case PERF_RECORD_COMM: | 637 | case PERF_RECORD_COMM: |
555 | event__process_comm(event, session); | 638 | event__process_comm(event, sample, session); |
556 | break; | 639 | break; |
557 | case PERF_RECORD_MMAP: | 640 | case PERF_RECORD_MMAP: |
558 | event__process_mmap(event, session); | 641 | event__process_mmap(event, sample, session); |
559 | break; | 642 | break; |
560 | case PERF_RECORD_FORK: | 643 | case PERF_RECORD_FORK: |
561 | case PERF_RECORD_EXIT: | 644 | case PERF_RECORD_EXIT: |
562 | event__process_task(event, session); | 645 | event__process_task(event, sample, session); |
563 | break; | 646 | break; |
564 | default: | 647 | default: |
565 | break; | 648 | break; |
@@ -674,32 +757,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session, | |||
674 | symbol_filter_t filter) | 757 | symbol_filter_t filter) |
675 | { | 758 | { |
676 | u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 759 | u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
677 | struct thread *thread; | 760 | struct thread *thread = perf_session__findnew(session, self->ip.pid); |
678 | |||
679 | event__parse_sample(self, session->sample_type, data); | ||
680 | |||
681 | dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n", | ||
682 | self->header.misc, data->pid, data->tid, data->ip, | ||
683 | data->period, data->cpu); | ||
684 | |||
685 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
686 | unsigned int i; | ||
687 | |||
688 | dump_printf("... chain: nr:%Lu\n", data->callchain->nr); | ||
689 | 761 | ||
690 | if (!ip_callchain__valid(data->callchain, self)) { | ||
691 | pr_debug("call-chain problem with event, " | ||
692 | "skipping it.\n"); | ||
693 | goto out_filtered; | ||
694 | } | ||
695 | |||
696 | if (dump_trace) { | ||
697 | for (i = 0; i < data->callchain->nr; i++) | ||
698 | dump_printf("..... %2d: %016Lx\n", | ||
699 | i, data->callchain->ips[i]); | ||
700 | } | ||
701 | } | ||
702 | thread = perf_session__findnew(session, self->ip.pid); | ||
703 | if (thread == NULL) | 762 | if (thread == NULL) |
704 | return -1; | 763 | return -1; |
705 | 764 | ||
@@ -766,9 +825,65 @@ out_filtered: | |||
766 | return 0; | 825 | return 0; |
767 | } | 826 | } |
768 | 827 | ||
769 | int event__parse_sample(const event_t *event, u64 type, struct sample_data *data) | 828 | static int event__parse_id_sample(const event_t *event, |
829 | struct perf_session *session, | ||
830 | struct sample_data *sample) | ||
770 | { | 831 | { |
771 | const u64 *array = event->sample.array; | 832 | const u64 *array; |
833 | u64 type; | ||
834 | |||
835 | sample->cpu = sample->pid = sample->tid = -1; | ||
836 | sample->stream_id = sample->id = sample->time = -1ULL; | ||
837 | |||
838 | if (!session->sample_id_all) | ||
839 | return 0; | ||
840 | |||
841 | array = event->sample.array; | ||
842 | array += ((event->header.size - | ||
843 | sizeof(event->header)) / sizeof(u64)) - 1; | ||
844 | type = session->sample_type; | ||
845 | |||
846 | if (type & PERF_SAMPLE_CPU) { | ||
847 | u32 *p = (u32 *)array; | ||
848 | sample->cpu = *p; | ||
849 | array--; | ||
850 | } | ||
851 | |||
852 | if (type & PERF_SAMPLE_STREAM_ID) { | ||
853 | sample->stream_id = *array; | ||
854 | array--; | ||
855 | } | ||
856 | |||
857 | if (type & PERF_SAMPLE_ID) { | ||
858 | sample->id = *array; | ||
859 | array--; | ||
860 | } | ||
861 | |||
862 | if (type & PERF_SAMPLE_TIME) { | ||
863 | sample->time = *array; | ||
864 | array--; | ||
865 | } | ||
866 | |||
867 | if (type & PERF_SAMPLE_TID) { | ||
868 | u32 *p = (u32 *)array; | ||
869 | sample->pid = p[0]; | ||
870 | sample->tid = p[1]; | ||
871 | } | ||
872 | |||
873 | return 0; | ||
874 | } | ||
875 | |||
876 | int event__parse_sample(const event_t *event, struct perf_session *session, | ||
877 | struct sample_data *data) | ||
878 | { | ||
879 | const u64 *array; | ||
880 | u64 type; | ||
881 | |||
882 | if (event->header.type != PERF_RECORD_SAMPLE) | ||
883 | return event__parse_id_sample(event, session, data); | ||
884 | |||
885 | array = event->sample.array; | ||
886 | type = session->sample_type; | ||
772 | 887 | ||
773 | if (type & PERF_SAMPLE_IP) { | 888 | if (type & PERF_SAMPLE_IP) { |
774 | data->ip = event->ip.ip; | 889 | data->ip = event->ip.ip; |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8e790dae7026..2b7e91902f10 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -85,6 +85,7 @@ struct build_id_event { | |||
85 | }; | 85 | }; |
86 | 86 | ||
87 | enum perf_user_event_type { /* above any possible kernel type */ | 87 | enum perf_user_event_type { /* above any possible kernel type */ |
88 | PERF_RECORD_USER_TYPE_START = 64, | ||
88 | PERF_RECORD_HEADER_ATTR = 64, | 89 | PERF_RECORD_HEADER_ATTR = 64, |
89 | PERF_RECORD_HEADER_EVENT_TYPE = 65, | 90 | PERF_RECORD_HEADER_EVENT_TYPE = 65, |
90 | PERF_RECORD_HEADER_TRACING_DATA = 66, | 91 | PERF_RECORD_HEADER_TRACING_DATA = 66, |
@@ -135,12 +136,15 @@ void event__print_totals(void); | |||
135 | 136 | ||
136 | struct perf_session; | 137 | struct perf_session; |
137 | 138 | ||
138 | typedef int (*event__handler_t)(event_t *event, struct perf_session *session); | 139 | typedef int (*event__handler_synth_t)(event_t *event, |
140 | struct perf_session *session); | ||
141 | typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, | ||
142 | struct perf_session *session); | ||
139 | 143 | ||
140 | int event__synthesize_thread(pid_t pid, event__handler_t process, | 144 | int event__synthesize_thread(pid_t pid, event__handler_t process, |
141 | struct perf_session *session); | 145 | struct perf_session *session); |
142 | void event__synthesize_threads(event__handler_t process, | 146 | int event__synthesize_threads(event__handler_t process, |
143 | struct perf_session *session); | 147 | struct perf_session *session); |
144 | int event__synthesize_kernel_mmap(event__handler_t process, | 148 | int event__synthesize_kernel_mmap(event__handler_t process, |
145 | struct perf_session *session, | 149 | struct perf_session *session, |
146 | struct machine *machine, | 150 | struct machine *machine, |
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process, | |||
150 | struct perf_session *session, | 154 | struct perf_session *session, |
151 | struct machine *machine); | 155 | struct machine *machine); |
152 | 156 | ||
153 | int event__process_comm(event_t *self, struct perf_session *session); | 157 | int event__process_comm(event_t *self, struct sample_data *sample, |
154 | int event__process_lost(event_t *self, struct perf_session *session); | 158 | struct perf_session *session); |
155 | int event__process_mmap(event_t *self, struct perf_session *session); | 159 | int event__process_lost(event_t *self, struct sample_data *sample, |
156 | int event__process_task(event_t *self, struct perf_session *session); | 160 | struct perf_session *session); |
157 | int event__process(event_t *event, struct perf_session *session); | 161 | int event__process_mmap(event_t *self, struct sample_data *sample, |
162 | struct perf_session *session); | ||
163 | int event__process_task(event_t *self, struct sample_data *sample, | ||
164 | struct perf_session *session); | ||
165 | int event__process(event_t *event, struct sample_data *sample, | ||
166 | struct perf_session *session); | ||
158 | 167 | ||
159 | struct addr_location; | 168 | struct addr_location; |
160 | int event__preprocess_sample(const event_t *self, struct perf_session *session, | 169 | int event__preprocess_sample(const event_t *self, struct perf_session *session, |
161 | struct addr_location *al, struct sample_data *data, | 170 | struct addr_location *al, struct sample_data *data, |
162 | symbol_filter_t filter); | 171 | symbol_filter_t filter); |
163 | int event__parse_sample(const event_t *event, u64 type, struct sample_data *data); | 172 | int event__parse_sample(const event_t *event, struct perf_session *session, |
173 | struct sample_data *sample); | ||
164 | 174 | ||
165 | extern const char *event__name[]; | 175 | const char *event__get_event_name(unsigned int id); |
166 | 176 | ||
167 | #endif /* __PERF_RECORD_H */ | 177 | #endif /* __PERF_RECORD_H */ |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 64a85bafde63..16a16021eaa6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat) | |||
152 | set_bit(feat, self->adds_features); | 152 | set_bit(feat, self->adds_features); |
153 | } | 153 | } |
154 | 154 | ||
155 | void perf_header__clear_feat(struct perf_header *self, int feat) | ||
156 | { | ||
157 | clear_bit(feat, self->adds_features); | ||
158 | } | ||
159 | |||
155 | bool perf_header__has_feat(const struct perf_header *self, int feat) | 160 | bool perf_header__has_feat(const struct perf_header *self, int feat) |
156 | { | 161 | { |
157 | return test_bit(feat, self->adds_features); | 162 | return test_bit(feat, self->adds_features); |
@@ -431,8 +436,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd) | |||
431 | int idx = 0, err; | 436 | int idx = 0, err; |
432 | 437 | ||
433 | session = container_of(self, struct perf_session, header); | 438 | session = container_of(self, struct perf_session, header); |
434 | if (perf_session__read_build_ids(session, true)) | 439 | |
435 | perf_header__set_feat(self, HEADER_BUILD_ID); | 440 | if (perf_header__has_feat(self, HEADER_BUILD_ID && |
441 | !perf_session__read_build_ids(session, true))) | ||
442 | perf_header__clear_feat(self, HEADER_BUILD_ID); | ||
436 | 443 | ||
437 | nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); | 444 | nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); |
438 | if (!nr_sections) | 445 | if (!nr_sections) |
@@ -939,6 +946,24 @@ u64 perf_header__sample_type(struct perf_header *header) | |||
939 | return type; | 946 | return type; |
940 | } | 947 | } |
941 | 948 | ||
949 | bool perf_header__sample_id_all(const struct perf_header *header) | ||
950 | { | ||
951 | bool value = false, first = true; | ||
952 | int i; | ||
953 | |||
954 | for (i = 0; i < header->attrs; i++) { | ||
955 | struct perf_header_attr *attr = header->attr[i]; | ||
956 | |||
957 | if (first) { | ||
958 | value = attr->attr.sample_id_all; | ||
959 | first = false; | ||
960 | } else if (value != attr->attr.sample_id_all) | ||
961 | die("non matching sample_id_all"); | ||
962 | } | ||
963 | |||
964 | return value; | ||
965 | } | ||
966 | |||
942 | struct perf_event_attr * | 967 | struct perf_event_attr * |
943 | perf_header__find_attr(u64 id, struct perf_header *header) | 968 | perf_header__find_attr(u64 id, struct perf_header *header) |
944 | { | 969 | { |
@@ -985,21 +1010,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, | |||
985 | 1010 | ||
986 | ev = malloc(size); | 1011 | ev = malloc(size); |
987 | 1012 | ||
1013 | if (ev == NULL) | ||
1014 | return -ENOMEM; | ||
1015 | |||
988 | ev->attr.attr = *attr; | 1016 | ev->attr.attr = *attr; |
989 | memcpy(ev->attr.id, id, ids * sizeof(u64)); | 1017 | memcpy(ev->attr.id, id, ids * sizeof(u64)); |
990 | 1018 | ||
991 | ev->attr.header.type = PERF_RECORD_HEADER_ATTR; | 1019 | ev->attr.header.type = PERF_RECORD_HEADER_ATTR; |
992 | ev->attr.header.size = size; | 1020 | ev->attr.header.size = size; |
993 | 1021 | ||
994 | err = process(ev, session); | 1022 | err = process(ev, NULL, session); |
995 | 1023 | ||
996 | free(ev); | 1024 | free(ev); |
997 | 1025 | ||
998 | return err; | 1026 | return err; |
999 | } | 1027 | } |
1000 | 1028 | ||
1001 | int event__synthesize_attrs(struct perf_header *self, | 1029 | int event__synthesize_attrs(struct perf_header *self, event__handler_t process, |
1002 | event__handler_t process, | ||
1003 | struct perf_session *session) | 1030 | struct perf_session *session) |
1004 | { | 1031 | { |
1005 | struct perf_header_attr *attr; | 1032 | struct perf_header_attr *attr; |
@@ -1069,7 +1096,7 @@ int event__synthesize_event_type(u64 event_id, char *name, | |||
1069 | ev.event_type.header.size = sizeof(ev.event_type) - | 1096 | ev.event_type.header.size = sizeof(ev.event_type) - |
1070 | (sizeof(ev.event_type.event_type.name) - size); | 1097 | (sizeof(ev.event_type.event_type.name) - size); |
1071 | 1098 | ||
1072 | err = process(&ev, session); | 1099 | err = process(&ev, NULL, session); |
1073 | 1100 | ||
1074 | return err; | 1101 | return err; |
1075 | } | 1102 | } |
@@ -1124,7 +1151,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs, | |||
1124 | ev.tracing_data.header.size = sizeof(ev.tracing_data); | 1151 | ev.tracing_data.header.size = sizeof(ev.tracing_data); |
1125 | ev.tracing_data.size = aligned_size; | 1152 | ev.tracing_data.size = aligned_size; |
1126 | 1153 | ||
1127 | process(&ev, session); | 1154 | process(&ev, NULL, session); |
1128 | 1155 | ||
1129 | err = read_tracing_data(fd, pattrs, nb_events); | 1156 | err = read_tracing_data(fd, pattrs, nb_events); |
1130 | write_padded(fd, NULL, 0, padding); | 1157 | write_padded(fd, NULL, 0, padding); |
@@ -1184,7 +1211,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc, | |||
1184 | ev.build_id.header.size = sizeof(ev.build_id) + len; | 1211 | ev.build_id.header.size = sizeof(ev.build_id) + len; |
1185 | memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); | 1212 | memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); |
1186 | 1213 | ||
1187 | err = process(&ev, session); | 1214 | err = process(&ev, NULL, session); |
1188 | 1215 | ||
1189 | return err; | 1216 | return err; |
1190 | } | 1217 | } |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 402ac2454cf8..6335965e1f93 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self); | |||
81 | int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); | 81 | int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); |
82 | 82 | ||
83 | u64 perf_header__sample_type(struct perf_header *header); | 83 | u64 perf_header__sample_type(struct perf_header *header); |
84 | bool perf_header__sample_id_all(const struct perf_header *header); | ||
84 | struct perf_event_attr * | 85 | struct perf_event_attr * |
85 | perf_header__find_attr(u64 id, struct perf_header *header); | 86 | perf_header__find_attr(u64 id, struct perf_header *header); |
86 | void perf_header__set_feat(struct perf_header *self, int feat); | 87 | void perf_header__set_feat(struct perf_header *self, int feat); |
88 | void perf_header__clear_feat(struct perf_header *self, int feat); | ||
87 | bool perf_header__has_feat(const struct perf_header *self, int feat); | 89 | bool perf_header__has_feat(const struct perf_header *self, int feat); |
88 | 90 | ||
89 | int perf_header__process_sections(struct perf_header *self, int fd, | 91 | int perf_header__process_sections(struct perf_header *self, int fd, |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2022e8740994..a3b84160c42e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -1168,10 +1168,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) | |||
1168 | size_t ret = 0; | 1168 | size_t ret = 0; |
1169 | 1169 | ||
1170 | for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { | 1170 | for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { |
1171 | if (!event__name[i]) | 1171 | const char *name = event__get_event_name(i); |
1172 | |||
1173 | if (!strcmp(name, "UNKNOWN")) | ||
1172 | continue; | 1174 | continue; |
1173 | ret += fprintf(fp, "%10s events: %10d\n", | 1175 | |
1174 | event__name[i], self->stats.nr_events[i]); | 1176 | ret += fprintf(fp, "%16s events: %10d\n", name, |
1177 | self->stats.nr_events[i]); | ||
1175 | } | 1178 | } |
1176 | 1179 | ||
1177 | return ret; | 1180 | return ret; |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 587d375d3430..ee789856a8c9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -52,8 +52,10 @@ struct sym_priv { | |||
52 | struct events_stats { | 52 | struct events_stats { |
53 | u64 total_period; | 53 | u64 total_period; |
54 | u64 total_lost; | 54 | u64 total_lost; |
55 | u64 total_invalid_chains; | ||
55 | u32 nr_events[PERF_RECORD_HEADER_MAX]; | 56 | u32 nr_events[PERF_RECORD_HEADER_MAX]; |
56 | u32 nr_unknown_events; | 57 | u32 nr_unknown_events; |
58 | u32 nr_invalid_chains; | ||
57 | }; | 59 | }; |
58 | 60 | ||
59 | enum hist_column { | 61 | enum hist_column { |
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h new file mode 100644 index 000000000000..acffd5e4d1d4 --- /dev/null +++ b/tools/perf/util/include/asm/cpufeature.h | |||
@@ -0,0 +1,9 @@ | |||
1 | |||
2 | #ifndef PERF_CPUFEATURE_H | ||
3 | #define PERF_CPUFEATURE_H | ||
4 | |||
5 | /* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | ||
6 | |||
7 | #define X86_FEATURE_REP_GOOD 0 | ||
8 | |||
9 | #endif /* PERF_CPUFEATURE_H */ | ||
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h new file mode 100644 index 000000000000..bb4198e7837a --- /dev/null +++ b/tools/perf/util/include/asm/dwarf2.h | |||
@@ -0,0 +1,11 @@ | |||
1 | |||
2 | #ifndef PERF_DWARF2_H | ||
3 | #define PERF_DWARF2_H | ||
4 | |||
5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | ||
6 | |||
7 | #define CFI_STARTPROC | ||
8 | #define CFI_ENDPROC | ||
9 | |||
10 | #endif /* PERF_DWARF2_H */ | ||
11 | |||
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index bb4ac2e05385..8be0b968ca0b 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h | |||
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr) | |||
13 | addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); | 13 | addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); |
14 | } | 14 | } |
15 | 15 | ||
16 | static inline void clear_bit(int nr, unsigned long *addr) | ||
17 | { | ||
18 | addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); | ||
19 | } | ||
20 | |||
16 | static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) | 21 | static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) |
17 | { | 22 | { |
18 | return ((1UL << (nr % BITS_PER_LONG)) & | 23 | return ((1UL << (nr % BITS_PER_LONG)) & |
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h new file mode 100644 index 000000000000..06387cffe125 --- /dev/null +++ b/tools/perf/util/include/linux/linkage.h | |||
@@ -0,0 +1,13 @@ | |||
1 | |||
2 | #ifndef PERF_LINUX_LINKAGE_H_ | ||
3 | #define PERF_LINUX_LINKAGE_H_ | ||
4 | |||
5 | /* linkage.h ... for including arch/x86/lib/memcpy_64.S */ | ||
6 | |||
7 | #define ENTRY(name) \ | ||
8 | .globl name; \ | ||
9 | name: | ||
10 | |||
11 | #define ENDPROC(name) | ||
12 | |||
13 | #endif /* PERF_LINUX_LINKAGE_H_ */ | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4af5bd59cfd1..c305305a3884 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name, | |||
434 | id = atoll(id_buf); | 434 | id = atoll(id_buf); |
435 | attr->config = id; | 435 | attr->config = id; |
436 | attr->type = PERF_TYPE_TRACEPOINT; | 436 | attr->type = PERF_TYPE_TRACEPOINT; |
437 | *strp = evt_name + evt_length; | 437 | *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */ |
438 | 438 | ||
439 | attr->sample_type |= PERF_SAMPLE_RAW; | 439 | attr->sample_type |= PERF_SAMPLE_RAW; |
440 | attr->sample_type |= PERF_SAMPLE_TIME; | 440 | attr->sample_type |= PERF_SAMPLE_TIME; |
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp, | |||
495 | struct perf_event_attr *attr) | 495 | struct perf_event_attr *attr) |
496 | { | 496 | { |
497 | const char *evt_name; | 497 | const char *evt_name; |
498 | char *flags; | 498 | char *flags = NULL, *comma_loc; |
499 | char sys_name[MAX_EVENT_LENGTH]; | 499 | char sys_name[MAX_EVENT_LENGTH]; |
500 | unsigned int sys_length, evt_length; | 500 | unsigned int sys_length, evt_length; |
501 | 501 | ||
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp, | |||
514 | sys_name[sys_length] = '\0'; | 514 | sys_name[sys_length] = '\0'; |
515 | evt_name = evt_name + 1; | 515 | evt_name = evt_name + 1; |
516 | 516 | ||
517 | comma_loc = strchr(evt_name, ','); | ||
518 | if (comma_loc) { | ||
519 | /* take the event name up to the comma */ | ||
520 | evt_name = strndup(evt_name, comma_loc - evt_name); | ||
521 | } | ||
517 | flags = strchr(evt_name, ':'); | 522 | flags = strchr(evt_name, ':'); |
518 | if (flags) { | 523 | if (flags) { |
519 | /* split it out: */ | 524 | /* split it out: */ |
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp, | |||
524 | evt_length = strlen(evt_name); | 529 | evt_length = strlen(evt_name); |
525 | if (evt_length >= MAX_EVENT_LENGTH) | 530 | if (evt_length >= MAX_EVENT_LENGTH) |
526 | return EVT_FAILED; | 531 | return EVT_FAILED; |
527 | |||
528 | if (strpbrk(evt_name, "*?")) { | 532 | if (strpbrk(evt_name, "*?")) { |
529 | *strp = evt_name + evt_length; | 533 | *strp += strlen(sys_name) + evt_length; |
530 | return parse_multiple_tracepoint_event(sys_name, evt_name, | 534 | return parse_multiple_tracepoint_event(sys_name, evt_name, |
531 | flags); | 535 | flags); |
532 | } else | 536 | } else |
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index c7d72dce54b2..abc31a1dac1a 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h | |||
@@ -119,6 +119,10 @@ struct option { | |||
119 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } | 119 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } |
120 | #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ | 120 | #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ |
121 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } | 121 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } |
122 | #define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \ | ||
123 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\ | ||
124 | .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ | ||
125 | .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG} | ||
122 | 126 | ||
123 | /* parse_options() will filter out the processed options and leave the | 127 | /* parse_options() will filter out the processed options and leave the |
124 | * non-option argments in argv[]. | 128 | * non-option argments in argv[]. |
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index bba69d455699..beaefc3c1223 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h | |||
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev, | |||
34 | bool externs); | 34 | bool externs); |
35 | 35 | ||
36 | #include <dwarf.h> | 36 | #include <dwarf.h> |
37 | #include <libdw.h> | 37 | #include <elfutils/libdw.h> |
38 | #include <libdwfl.h> | 38 | #include <elfutils/libdwfl.h> |
39 | #include <version.h> | 39 | #include <elfutils/version.h> |
40 | 40 | ||
41 | struct probe_finder { | 41 | struct probe_finder { |
42 | struct perf_probe_event *pev; /* Target probe event */ | 42 | struct perf_probe_event *pev; /* Target probe event */ |
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b059dc50cc2d..93680818e244 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. | 2 | * trace-event-perl. Feed perf script events to an embedded Perl interpreter. |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> | 4 | * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> |
5 | * | 5 | * |
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile) | |||
411 | return -1; | 411 | return -1; |
412 | } | 412 | } |
413 | 413 | ||
414 | fprintf(ofp, "# perf trace event handlers, " | 414 | fprintf(ofp, "# perf script event handlers, " |
415 | "generated by perf trace -g perl\n"); | 415 | "generated by perf script -g perl\n"); |
416 | 416 | ||
417 | fprintf(ofp, "# Licensed under the terms of the GNU GPL" | 417 | fprintf(ofp, "# Licensed under the terms of the GNU GPL" |
418 | " License version 2\n\n"); | 418 | " License version 2\n\n"); |
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 33a632523743..c6d99334bdfa 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c | |||
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile) | |||
442 | fprintf(stderr, "couldn't open %s\n", fname); | 442 | fprintf(stderr, "couldn't open %s\n", fname); |
443 | return -1; | 443 | return -1; |
444 | } | 444 | } |
445 | fprintf(ofp, "# perf trace event handlers, " | 445 | fprintf(ofp, "# perf script event handlers, " |
446 | "generated by perf trace -g python\n"); | 446 | "generated by perf script -g python\n"); |
447 | 447 | ||
448 | fprintf(ofp, "# Licensed under the terms of the GNU GPL" | 448 | fprintf(ofp, "# Licensed under the terms of the GNU GPL" |
449 | " License version 2\n\n"); | 449 | " License version 2\n\n"); |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fa9d652c2dc3..b59abf5aba36 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -65,9 +65,49 @@ out_close: | |||
65 | return -1; | 65 | return -1; |
66 | } | 66 | } |
67 | 67 | ||
68 | static void perf_session__id_header_size(struct perf_session *session) | ||
69 | { | ||
70 | struct sample_data *data; | ||
71 | u64 sample_type = session->sample_type; | ||
72 | u16 size = 0; | ||
73 | |||
74 | if (!session->sample_id_all) | ||
75 | goto out; | ||
76 | |||
77 | if (sample_type & PERF_SAMPLE_TID) | ||
78 | size += sizeof(data->tid) * 2; | ||
79 | |||
80 | if (sample_type & PERF_SAMPLE_TIME) | ||
81 | size += sizeof(data->time); | ||
82 | |||
83 | if (sample_type & PERF_SAMPLE_ID) | ||
84 | size += sizeof(data->id); | ||
85 | |||
86 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
87 | size += sizeof(data->stream_id); | ||
88 | |||
89 | if (sample_type & PERF_SAMPLE_CPU) | ||
90 | size += sizeof(data->cpu) * 2; | ||
91 | out: | ||
92 | session->id_hdr_size = size; | ||
93 | } | ||
94 | |||
95 | void perf_session__set_sample_id_all(struct perf_session *session, bool value) | ||
96 | { | ||
97 | session->sample_id_all = value; | ||
98 | perf_session__id_header_size(session); | ||
99 | } | ||
100 | |||
101 | void perf_session__set_sample_type(struct perf_session *session, u64 type) | ||
102 | { | ||
103 | session->sample_type = type; | ||
104 | } | ||
105 | |||
68 | void perf_session__update_sample_type(struct perf_session *self) | 106 | void perf_session__update_sample_type(struct perf_session *self) |
69 | { | 107 | { |
70 | self->sample_type = perf_header__sample_type(&self->header); | 108 | self->sample_type = perf_header__sample_type(&self->header); |
109 | self->sample_id_all = perf_header__sample_id_all(&self->header); | ||
110 | perf_session__id_header_size(self); | ||
71 | } | 111 | } |
72 | 112 | ||
73 | int perf_session__create_kernel_maps(struct perf_session *self) | 113 | int perf_session__create_kernel_maps(struct perf_session *self) |
@@ -101,10 +141,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc | |||
101 | INIT_LIST_HEAD(&self->dead_threads); | 141 | INIT_LIST_HEAD(&self->dead_threads); |
102 | self->hists_tree = RB_ROOT; | 142 | self->hists_tree = RB_ROOT; |
103 | self->last_match = NULL; | 143 | self->last_match = NULL; |
104 | self->mmap_window = 32; | 144 | /* |
145 | * On 64bit we can mmap the data file in one go. No need for tiny mmap | ||
146 | * slices. On 32bit we use 32MB. | ||
147 | */ | ||
148 | #if BITS_PER_LONG == 64 | ||
149 | self->mmap_window = ULLONG_MAX; | ||
150 | #else | ||
151 | self->mmap_window = 32 * 1024 * 1024ULL; | ||
152 | #endif | ||
105 | self->machines = RB_ROOT; | 153 | self->machines = RB_ROOT; |
106 | self->repipe = repipe; | 154 | self->repipe = repipe; |
107 | INIT_LIST_HEAD(&self->ordered_samples.samples_head); | 155 | INIT_LIST_HEAD(&self->ordered_samples.samples); |
156 | INIT_LIST_HEAD(&self->ordered_samples.sample_cache); | ||
157 | INIT_LIST_HEAD(&self->ordered_samples.to_free); | ||
108 | machine__init(&self->host_machine, "", HOST_KERNEL_ID); | 158 | machine__init(&self->host_machine, "", HOST_KERNEL_ID); |
109 | 159 | ||
110 | if (mode == O_RDONLY) { | 160 | if (mode == O_RDONLY) { |
@@ -230,7 +280,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, | |||
230 | return syms; | 280 | return syms; |
231 | } | 281 | } |
232 | 282 | ||
283 | static int process_event_synth_stub(event_t *event __used, | ||
284 | struct perf_session *session __used) | ||
285 | { | ||
286 | dump_printf(": unhandled!\n"); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
233 | static int process_event_stub(event_t *event __used, | 290 | static int process_event_stub(event_t *event __used, |
291 | struct sample_data *sample __used, | ||
234 | struct perf_session *session __used) | 292 | struct perf_session *session __used) |
235 | { | 293 | { |
236 | dump_printf(": unhandled!\n"); | 294 | dump_printf(": unhandled!\n"); |
@@ -262,7 +320,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) | |||
262 | if (handler->exit == NULL) | 320 | if (handler->exit == NULL) |
263 | handler->exit = process_event_stub; | 321 | handler->exit = process_event_stub; |
264 | if (handler->lost == NULL) | 322 | if (handler->lost == NULL) |
265 | handler->lost = process_event_stub; | 323 | handler->lost = event__process_lost; |
266 | if (handler->read == NULL) | 324 | if (handler->read == NULL) |
267 | handler->read = process_event_stub; | 325 | handler->read = process_event_stub; |
268 | if (handler->throttle == NULL) | 326 | if (handler->throttle == NULL) |
@@ -270,13 +328,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) | |||
270 | if (handler->unthrottle == NULL) | 328 | if (handler->unthrottle == NULL) |
271 | handler->unthrottle = process_event_stub; | 329 | handler->unthrottle = process_event_stub; |
272 | if (handler->attr == NULL) | 330 | if (handler->attr == NULL) |
273 | handler->attr = process_event_stub; | 331 | handler->attr = process_event_synth_stub; |
274 | if (handler->event_type == NULL) | 332 | if (handler->event_type == NULL) |
275 | handler->event_type = process_event_stub; | 333 | handler->event_type = process_event_synth_stub; |
276 | if (handler->tracing_data == NULL) | 334 | if (handler->tracing_data == NULL) |
277 | handler->tracing_data = process_event_stub; | 335 | handler->tracing_data = process_event_synth_stub; |
278 | if (handler->build_id == NULL) | 336 | if (handler->build_id == NULL) |
279 | handler->build_id = process_event_stub; | 337 | handler->build_id = process_event_synth_stub; |
280 | if (handler->finished_round == NULL) { | 338 | if (handler->finished_round == NULL) { |
281 | if (handler->ordered_samples) | 339 | if (handler->ordered_samples) |
282 | handler->finished_round = process_finished_round; | 340 | handler->finished_round = process_finished_round; |
@@ -386,33 +444,61 @@ static event__swap_op event__swap_ops[] = { | |||
386 | 444 | ||
387 | struct sample_queue { | 445 | struct sample_queue { |
388 | u64 timestamp; | 446 | u64 timestamp; |
389 | struct sample_event *event; | 447 | u64 file_offset; |
448 | event_t *event; | ||
390 | struct list_head list; | 449 | struct list_head list; |
391 | }; | 450 | }; |
392 | 451 | ||
452 | static void perf_session_free_sample_buffers(struct perf_session *session) | ||
453 | { | ||
454 | struct ordered_samples *os = &session->ordered_samples; | ||
455 | |||
456 | while (!list_empty(&os->to_free)) { | ||
457 | struct sample_queue *sq; | ||
458 | |||
459 | sq = list_entry(os->to_free.next, struct sample_queue, list); | ||
460 | list_del(&sq->list); | ||
461 | free(sq); | ||
462 | } | ||
463 | } | ||
464 | |||
465 | static int perf_session_deliver_event(struct perf_session *session, | ||
466 | event_t *event, | ||
467 | struct sample_data *sample, | ||
468 | struct perf_event_ops *ops, | ||
469 | u64 file_offset); | ||
470 | |||
393 | static void flush_sample_queue(struct perf_session *s, | 471 | static void flush_sample_queue(struct perf_session *s, |
394 | struct perf_event_ops *ops) | 472 | struct perf_event_ops *ops) |
395 | { | 473 | { |
396 | struct list_head *head = &s->ordered_samples.samples_head; | 474 | struct ordered_samples *os = &s->ordered_samples; |
397 | u64 limit = s->ordered_samples.next_flush; | 475 | struct list_head *head = &os->samples; |
398 | struct sample_queue *tmp, *iter; | 476 | struct sample_queue *tmp, *iter; |
477 | struct sample_data sample; | ||
478 | u64 limit = os->next_flush; | ||
479 | u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; | ||
399 | 480 | ||
400 | if (!ops->ordered_samples || !limit) | 481 | if (!ops->ordered_samples || !limit) |
401 | return; | 482 | return; |
402 | 483 | ||
403 | list_for_each_entry_safe(iter, tmp, head, list) { | 484 | list_for_each_entry_safe(iter, tmp, head, list) { |
404 | if (iter->timestamp > limit) | 485 | if (iter->timestamp > limit) |
405 | return; | 486 | break; |
406 | |||
407 | if (iter == s->ordered_samples.last_inserted) | ||
408 | s->ordered_samples.last_inserted = NULL; | ||
409 | 487 | ||
410 | ops->sample((event_t *)iter->event, s); | 488 | event__parse_sample(iter->event, s, &sample); |
489 | perf_session_deliver_event(s, iter->event, &sample, ops, | ||
490 | iter->file_offset); | ||
411 | 491 | ||
412 | s->ordered_samples.last_flush = iter->timestamp; | 492 | os->last_flush = iter->timestamp; |
413 | list_del(&iter->list); | 493 | list_del(&iter->list); |
414 | free(iter->event); | 494 | list_add(&iter->list, &os->sample_cache); |
415 | free(iter); | 495 | } |
496 | |||
497 | if (list_empty(head)) { | ||
498 | os->last_sample = NULL; | ||
499 | } else if (last_ts <= limit) { | ||
500 | os->last_sample = | ||
501 | list_entry(head->prev, struct sample_queue, list); | ||
416 | } | 502 | } |
417 | } | 503 | } |
418 | 504 | ||
@@ -465,178 +551,265 @@ static int process_finished_round(event_t *event __used, | |||
465 | return 0; | 551 | return 0; |
466 | } | 552 | } |
467 | 553 | ||
468 | static void __queue_sample_end(struct sample_queue *new, struct list_head *head) | ||
469 | { | ||
470 | struct sample_queue *iter; | ||
471 | |||
472 | list_for_each_entry_reverse(iter, head, list) { | ||
473 | if (iter->timestamp < new->timestamp) { | ||
474 | list_add(&new->list, &iter->list); | ||
475 | return; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | list_add(&new->list, head); | ||
480 | } | ||
481 | |||
482 | static void __queue_sample_before(struct sample_queue *new, | ||
483 | struct sample_queue *iter, | ||
484 | struct list_head *head) | ||
485 | { | ||
486 | list_for_each_entry_continue_reverse(iter, head, list) { | ||
487 | if (iter->timestamp < new->timestamp) { | ||
488 | list_add(&new->list, &iter->list); | ||
489 | return; | ||
490 | } | ||
491 | } | ||
492 | |||
493 | list_add(&new->list, head); | ||
494 | } | ||
495 | |||
496 | static void __queue_sample_after(struct sample_queue *new, | ||
497 | struct sample_queue *iter, | ||
498 | struct list_head *head) | ||
499 | { | ||
500 | list_for_each_entry_continue(iter, head, list) { | ||
501 | if (iter->timestamp > new->timestamp) { | ||
502 | list_add_tail(&new->list, &iter->list); | ||
503 | return; | ||
504 | } | ||
505 | } | ||
506 | list_add_tail(&new->list, head); | ||
507 | } | ||
508 | |||
509 | /* The queue is ordered by time */ | 554 | /* The queue is ordered by time */ |
510 | static void __queue_sample_event(struct sample_queue *new, | 555 | static void __queue_event(struct sample_queue *new, struct perf_session *s) |
511 | struct perf_session *s) | ||
512 | { | 556 | { |
513 | struct sample_queue *last_inserted = s->ordered_samples.last_inserted; | 557 | struct ordered_samples *os = &s->ordered_samples; |
514 | struct list_head *head = &s->ordered_samples.samples_head; | 558 | struct sample_queue *sample = os->last_sample; |
559 | u64 timestamp = new->timestamp; | ||
560 | struct list_head *p; | ||
515 | 561 | ||
562 | os->last_sample = new; | ||
516 | 563 | ||
517 | if (!last_inserted) { | 564 | if (!sample) { |
518 | __queue_sample_end(new, head); | 565 | list_add(&new->list, &os->samples); |
566 | os->max_timestamp = timestamp; | ||
519 | return; | 567 | return; |
520 | } | 568 | } |
521 | 569 | ||
522 | /* | 570 | /* |
523 | * Most of the time the current event has a timestamp | 571 | * last_sample might point to some random place in the list as it's |
524 | * very close to the last event inserted, unless we just switched | 572 | * the last queued event. We expect that the new event is close to |
525 | * to another event buffer. Having a sorting based on a list and | 573 | * this. |
526 | * on the last inserted event that is close to the current one is | ||
527 | * probably more efficient than an rbtree based sorting. | ||
528 | */ | 574 | */ |
529 | if (last_inserted->timestamp >= new->timestamp) | 575 | if (sample->timestamp <= timestamp) { |
530 | __queue_sample_before(new, last_inserted, head); | 576 | while (sample->timestamp <= timestamp) { |
531 | else | 577 | p = sample->list.next; |
532 | __queue_sample_after(new, last_inserted, head); | 578 | if (p == &os->samples) { |
579 | list_add_tail(&new->list, &os->samples); | ||
580 | os->max_timestamp = timestamp; | ||
581 | return; | ||
582 | } | ||
583 | sample = list_entry(p, struct sample_queue, list); | ||
584 | } | ||
585 | list_add_tail(&new->list, &sample->list); | ||
586 | } else { | ||
587 | while (sample->timestamp > timestamp) { | ||
588 | p = sample->list.prev; | ||
589 | if (p == &os->samples) { | ||
590 | list_add(&new->list, &os->samples); | ||
591 | return; | ||
592 | } | ||
593 | sample = list_entry(p, struct sample_queue, list); | ||
594 | } | ||
595 | list_add(&new->list, &sample->list); | ||
596 | } | ||
533 | } | 597 | } |
534 | 598 | ||
535 | static int queue_sample_event(event_t *event, struct sample_data *data, | 599 | #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) |
536 | struct perf_session *s) | 600 | |
601 | static int perf_session_queue_event(struct perf_session *s, event_t *event, | ||
602 | struct sample_data *data, u64 file_offset) | ||
537 | { | 603 | { |
604 | struct ordered_samples *os = &s->ordered_samples; | ||
605 | struct list_head *sc = &os->sample_cache; | ||
538 | u64 timestamp = data->time; | 606 | u64 timestamp = data->time; |
539 | struct sample_queue *new; | 607 | struct sample_queue *new; |
540 | 608 | ||
609 | if (!timestamp || timestamp == ~0ULL) | ||
610 | return -ETIME; | ||
541 | 611 | ||
542 | if (timestamp < s->ordered_samples.last_flush) { | 612 | if (timestamp < s->ordered_samples.last_flush) { |
543 | printf("Warning: Timestamp below last timeslice flush\n"); | 613 | printf("Warning: Timestamp below last timeslice flush\n"); |
544 | return -EINVAL; | 614 | return -EINVAL; |
545 | } | 615 | } |
546 | 616 | ||
547 | new = malloc(sizeof(*new)); | 617 | if (!list_empty(sc)) { |
548 | if (!new) | 618 | new = list_entry(sc->next, struct sample_queue, list); |
549 | return -ENOMEM; | 619 | list_del(&new->list); |
620 | } else if (os->sample_buffer) { | ||
621 | new = os->sample_buffer + os->sample_buffer_idx; | ||
622 | if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER) | ||
623 | os->sample_buffer = NULL; | ||
624 | } else { | ||
625 | os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new)); | ||
626 | if (!os->sample_buffer) | ||
627 | return -ENOMEM; | ||
628 | list_add(&os->sample_buffer->list, &os->to_free); | ||
629 | os->sample_buffer_idx = 2; | ||
630 | new = os->sample_buffer + 1; | ||
631 | } | ||
550 | 632 | ||
551 | new->timestamp = timestamp; | 633 | new->timestamp = timestamp; |
634 | new->file_offset = file_offset; | ||
635 | new->event = event; | ||
552 | 636 | ||
553 | new->event = malloc(event->header.size); | 637 | __queue_event(new, s); |
554 | if (!new->event) { | ||
555 | free(new); | ||
556 | return -ENOMEM; | ||
557 | } | ||
558 | 638 | ||
559 | memcpy(new->event, event, event->header.size); | 639 | return 0; |
640 | } | ||
560 | 641 | ||
561 | __queue_sample_event(new, s); | 642 | static void callchain__printf(struct sample_data *sample) |
562 | s->ordered_samples.last_inserted = new; | 643 | { |
644 | unsigned int i; | ||
563 | 645 | ||
564 | if (new->timestamp > s->ordered_samples.max_timestamp) | 646 | printf("... chain: nr:%Lu\n", sample->callchain->nr); |
565 | s->ordered_samples.max_timestamp = new->timestamp; | ||
566 | 647 | ||
567 | return 0; | 648 | for (i = 0; i < sample->callchain->nr; i++) |
649 | printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]); | ||
568 | } | 650 | } |
569 | 651 | ||
570 | static int perf_session__process_sample(event_t *event, struct perf_session *s, | 652 | static void perf_session__print_tstamp(struct perf_session *session, |
571 | struct perf_event_ops *ops) | 653 | event_t *event, |
654 | struct sample_data *sample) | ||
572 | { | 655 | { |
573 | struct sample_data data; | 656 | if (event->header.type != PERF_RECORD_SAMPLE && |
657 | !session->sample_id_all) { | ||
658 | fputs("-1 -1 ", stdout); | ||
659 | return; | ||
660 | } | ||
574 | 661 | ||
575 | if (!ops->ordered_samples) | 662 | if ((session->sample_type & PERF_SAMPLE_CPU)) |
576 | return ops->sample(event, s); | 663 | printf("%u ", sample->cpu); |
577 | 664 | ||
578 | bzero(&data, sizeof(struct sample_data)); | 665 | if (session->sample_type & PERF_SAMPLE_TIME) |
579 | event__parse_sample(event, s->sample_type, &data); | 666 | printf("%Lu ", sample->time); |
667 | } | ||
580 | 668 | ||
581 | queue_sample_event(event, &data, s); | 669 | static void dump_event(struct perf_session *session, event_t *event, |
670 | u64 file_offset, struct sample_data *sample) | ||
671 | { | ||
672 | if (!dump_trace) | ||
673 | return; | ||
582 | 674 | ||
583 | return 0; | 675 | printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size, |
676 | event->header.type); | ||
677 | |||
678 | trace_event(event); | ||
679 | |||
680 | if (sample) | ||
681 | perf_session__print_tstamp(session, event, sample); | ||
682 | |||
683 | printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size, | ||
684 | event__get_event_name(event->header.type)); | ||
584 | } | 685 | } |
585 | 686 | ||
586 | static int perf_session__process_event(struct perf_session *self, | 687 | static void dump_sample(struct perf_session *session, event_t *event, |
587 | event_t *event, | 688 | struct sample_data *sample) |
588 | struct perf_event_ops *ops, | ||
589 | u64 offset, u64 head) | ||
590 | { | 689 | { |
591 | trace_event(event); | 690 | if (!dump_trace) |
691 | return; | ||
592 | 692 | ||
593 | if (event->header.type < PERF_RECORD_HEADER_MAX) { | 693 | printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, |
594 | dump_printf("%#Lx [%#x]: PERF_RECORD_%s", | 694 | sample->pid, sample->tid, sample->ip, sample->period); |
595 | offset + head, event->header.size, | ||
596 | event__name[event->header.type]); | ||
597 | hists__inc_nr_events(&self->hists, event->header.type); | ||
598 | } | ||
599 | 695 | ||
600 | if (self->header.needs_swap && event__swap_ops[event->header.type]) | 696 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) |
601 | event__swap_ops[event->header.type](event); | 697 | callchain__printf(sample); |
698 | } | ||
699 | |||
700 | static int perf_session_deliver_event(struct perf_session *session, | ||
701 | event_t *event, | ||
702 | struct sample_data *sample, | ||
703 | struct perf_event_ops *ops, | ||
704 | u64 file_offset) | ||
705 | { | ||
706 | dump_event(session, event, file_offset, sample); | ||
602 | 707 | ||
603 | switch (event->header.type) { | 708 | switch (event->header.type) { |
604 | case PERF_RECORD_SAMPLE: | 709 | case PERF_RECORD_SAMPLE: |
605 | return perf_session__process_sample(event, self, ops); | 710 | dump_sample(session, event, sample); |
711 | return ops->sample(event, sample, session); | ||
606 | case PERF_RECORD_MMAP: | 712 | case PERF_RECORD_MMAP: |
607 | return ops->mmap(event, self); | 713 | return ops->mmap(event, sample, session); |
608 | case PERF_RECORD_COMM: | 714 | case PERF_RECORD_COMM: |
609 | return ops->comm(event, self); | 715 | return ops->comm(event, sample, session); |
610 | case PERF_RECORD_FORK: | 716 | case PERF_RECORD_FORK: |
611 | return ops->fork(event, self); | 717 | return ops->fork(event, sample, session); |
612 | case PERF_RECORD_EXIT: | 718 | case PERF_RECORD_EXIT: |
613 | return ops->exit(event, self); | 719 | return ops->exit(event, sample, session); |
614 | case PERF_RECORD_LOST: | 720 | case PERF_RECORD_LOST: |
615 | return ops->lost(event, self); | 721 | return ops->lost(event, sample, session); |
616 | case PERF_RECORD_READ: | 722 | case PERF_RECORD_READ: |
617 | return ops->read(event, self); | 723 | return ops->read(event, sample, session); |
618 | case PERF_RECORD_THROTTLE: | 724 | case PERF_RECORD_THROTTLE: |
619 | return ops->throttle(event, self); | 725 | return ops->throttle(event, sample, session); |
620 | case PERF_RECORD_UNTHROTTLE: | 726 | case PERF_RECORD_UNTHROTTLE: |
621 | return ops->unthrottle(event, self); | 727 | return ops->unthrottle(event, sample, session); |
728 | default: | ||
729 | ++session->hists.stats.nr_unknown_events; | ||
730 | return -1; | ||
731 | } | ||
732 | } | ||
733 | |||
734 | static int perf_session__preprocess_sample(struct perf_session *session, | ||
735 | event_t *event, struct sample_data *sample) | ||
736 | { | ||
737 | if (event->header.type != PERF_RECORD_SAMPLE || | ||
738 | !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) | ||
739 | return 0; | ||
740 | |||
741 | if (!ip_callchain__valid(sample->callchain, event)) { | ||
742 | pr_debug("call-chain problem with event, skipping it.\n"); | ||
743 | ++session->hists.stats.nr_invalid_chains; | ||
744 | session->hists.stats.total_invalid_chains += sample->period; | ||
745 | return -EINVAL; | ||
746 | } | ||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int perf_session__process_user_event(struct perf_session *session, event_t *event, | ||
751 | struct perf_event_ops *ops, u64 file_offset) | ||
752 | { | ||
753 | dump_event(session, event, file_offset, NULL); | ||
754 | |||
755 | /* These events are processed right away */ | ||
756 | switch (event->header.type) { | ||
622 | case PERF_RECORD_HEADER_ATTR: | 757 | case PERF_RECORD_HEADER_ATTR: |
623 | return ops->attr(event, self); | 758 | return ops->attr(event, session); |
624 | case PERF_RECORD_HEADER_EVENT_TYPE: | 759 | case PERF_RECORD_HEADER_EVENT_TYPE: |
625 | return ops->event_type(event, self); | 760 | return ops->event_type(event, session); |
626 | case PERF_RECORD_HEADER_TRACING_DATA: | 761 | case PERF_RECORD_HEADER_TRACING_DATA: |
627 | /* setup for reading amidst mmap */ | 762 | /* setup for reading amidst mmap */ |
628 | lseek(self->fd, offset + head, SEEK_SET); | 763 | lseek(session->fd, file_offset, SEEK_SET); |
629 | return ops->tracing_data(event, self); | 764 | return ops->tracing_data(event, session); |
630 | case PERF_RECORD_HEADER_BUILD_ID: | 765 | case PERF_RECORD_HEADER_BUILD_ID: |
631 | return ops->build_id(event, self); | 766 | return ops->build_id(event, session); |
632 | case PERF_RECORD_FINISHED_ROUND: | 767 | case PERF_RECORD_FINISHED_ROUND: |
633 | return ops->finished_round(event, self, ops); | 768 | return ops->finished_round(event, session, ops); |
634 | default: | 769 | default: |
635 | ++self->hists.stats.nr_unknown_events; | 770 | return -EINVAL; |
636 | return -1; | ||
637 | } | 771 | } |
638 | } | 772 | } |
639 | 773 | ||
774 | static int perf_session__process_event(struct perf_session *session, | ||
775 | event_t *event, | ||
776 | struct perf_event_ops *ops, | ||
777 | u64 file_offset) | ||
778 | { | ||
779 | struct sample_data sample; | ||
780 | int ret; | ||
781 | |||
782 | if (session->header.needs_swap && event__swap_ops[event->header.type]) | ||
783 | event__swap_ops[event->header.type](event); | ||
784 | |||
785 | if (event->header.type >= PERF_RECORD_HEADER_MAX) | ||
786 | return -EINVAL; | ||
787 | |||
788 | hists__inc_nr_events(&session->hists, event->header.type); | ||
789 | |||
790 | if (event->header.type >= PERF_RECORD_USER_TYPE_START) | ||
791 | return perf_session__process_user_event(session, event, ops, file_offset); | ||
792 | |||
793 | /* | ||
794 | * For all kernel events we get the sample data | ||
795 | */ | ||
796 | event__parse_sample(event, session, &sample); | ||
797 | |||
798 | /* Preprocess sample records - precheck callchains */ | ||
799 | if (perf_session__preprocess_sample(session, event, &sample)) | ||
800 | return 0; | ||
801 | |||
802 | if (ops->ordered_samples) { | ||
803 | ret = perf_session_queue_event(session, event, &sample, | ||
804 | file_offset); | ||
805 | if (ret != -ETIME) | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | return perf_session_deliver_event(session, event, &sample, ops, | ||
810 | file_offset); | ||
811 | } | ||
812 | |||
640 | void perf_event_header__bswap(struct perf_event_header *self) | 813 | void perf_event_header__bswap(struct perf_event_header *self) |
641 | { | 814 | { |
642 | self->type = bswap_32(self->type); | 815 | self->type = bswap_32(self->type); |
@@ -724,8 +897,7 @@ more: | |||
724 | } | 897 | } |
725 | 898 | ||
726 | if (size == 0 || | 899 | if (size == 0 || |
727 | (skip = perf_session__process_event(self, &event, ops, | 900 | (skip = perf_session__process_event(self, &event, ops, head)) < 0) { |
728 | 0, head)) < 0) { | ||
729 | dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", | 901 | dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", |
730 | head, event.header.size, event.header.type); | 902 | head, event.header.size, event.header.type); |
731 | /* | 903 | /* |
@@ -740,9 +912,6 @@ more: | |||
740 | 912 | ||
741 | head += size; | 913 | head += size; |
742 | 914 | ||
743 | dump_printf("\n%#Lx [%#x]: event: %d\n", | ||
744 | head, event.header.size, event.header.type); | ||
745 | |||
746 | if (skip > 0) | 915 | if (skip > 0) |
747 | head += skip; | 916 | head += skip; |
748 | 917 | ||
@@ -751,82 +920,90 @@ more: | |||
751 | done: | 920 | done: |
752 | err = 0; | 921 | err = 0; |
753 | out_err: | 922 | out_err: |
923 | perf_session_free_sample_buffers(self); | ||
754 | return err; | 924 | return err; |
755 | } | 925 | } |
756 | 926 | ||
757 | int __perf_session__process_events(struct perf_session *self, | 927 | int __perf_session__process_events(struct perf_session *session, |
758 | u64 data_offset, u64 data_size, | 928 | u64 data_offset, u64 data_size, |
759 | u64 file_size, struct perf_event_ops *ops) | 929 | u64 file_size, struct perf_event_ops *ops) |
760 | { | 930 | { |
761 | int err, mmap_prot, mmap_flags; | 931 | u64 head, page_offset, file_offset, file_pos, progress_next; |
762 | u64 head, shift; | 932 | int err, mmap_prot, mmap_flags, map_idx = 0; |
763 | u64 offset = 0; | 933 | struct ui_progress *progress; |
764 | size_t page_size; | 934 | size_t page_size, mmap_size; |
935 | char *buf, *mmaps[8]; | ||
765 | event_t *event; | 936 | event_t *event; |
766 | uint32_t size; | 937 | uint32_t size; |
767 | char *buf; | ||
768 | struct ui_progress *progress = ui_progress__new("Processing events...", | ||
769 | self->size); | ||
770 | if (progress == NULL) | ||
771 | return -1; | ||
772 | 938 | ||
773 | perf_event_ops__fill_defaults(ops); | 939 | perf_event_ops__fill_defaults(ops); |
774 | 940 | ||
775 | page_size = sysconf(_SC_PAGESIZE); | 941 | page_size = sysconf(_SC_PAGESIZE); |
776 | 942 | ||
777 | head = data_offset; | 943 | page_offset = page_size * (data_offset / page_size); |
778 | shift = page_size * (head / page_size); | 944 | file_offset = page_offset; |
779 | offset += shift; | 945 | head = data_offset - page_offset; |
780 | head -= shift; | 946 | |
947 | if (data_offset + data_size < file_size) | ||
948 | file_size = data_offset + data_size; | ||
949 | |||
950 | progress_next = file_size / 16; | ||
951 | progress = ui_progress__new("Processing events...", file_size); | ||
952 | if (progress == NULL) | ||
953 | return -1; | ||
954 | |||
955 | mmap_size = session->mmap_window; | ||
956 | if (mmap_size > file_size) | ||
957 | mmap_size = file_size; | ||
958 | |||
959 | memset(mmaps, 0, sizeof(mmaps)); | ||
781 | 960 | ||
782 | mmap_prot = PROT_READ; | 961 | mmap_prot = PROT_READ; |
783 | mmap_flags = MAP_SHARED; | 962 | mmap_flags = MAP_SHARED; |
784 | 963 | ||
785 | if (self->header.needs_swap) { | 964 | if (session->header.needs_swap) { |
786 | mmap_prot |= PROT_WRITE; | 965 | mmap_prot |= PROT_WRITE; |
787 | mmap_flags = MAP_PRIVATE; | 966 | mmap_flags = MAP_PRIVATE; |
788 | } | 967 | } |
789 | remap: | 968 | remap: |
790 | buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, | 969 | buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd, |
791 | mmap_flags, self->fd, offset); | 970 | file_offset); |
792 | if (buf == MAP_FAILED) { | 971 | if (buf == MAP_FAILED) { |
793 | pr_err("failed to mmap file\n"); | 972 | pr_err("failed to mmap file\n"); |
794 | err = -errno; | 973 | err = -errno; |
795 | goto out_err; | 974 | goto out_err; |
796 | } | 975 | } |
976 | mmaps[map_idx] = buf; | ||
977 | map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); | ||
978 | file_pos = file_offset + head; | ||
797 | 979 | ||
798 | more: | 980 | more: |
799 | event = (event_t *)(buf + head); | 981 | event = (event_t *)(buf + head); |
800 | ui_progress__update(progress, offset); | ||
801 | 982 | ||
802 | if (self->header.needs_swap) | 983 | if (session->header.needs_swap) |
803 | perf_event_header__bswap(&event->header); | 984 | perf_event_header__bswap(&event->header); |
804 | size = event->header.size; | 985 | size = event->header.size; |
805 | if (size == 0) | 986 | if (size == 0) |
806 | size = 8; | 987 | size = 8; |
807 | 988 | ||
808 | if (head + event->header.size >= page_size * self->mmap_window) { | 989 | if (head + event->header.size >= mmap_size) { |
809 | int munmap_ret; | 990 | if (mmaps[map_idx]) { |
810 | 991 | munmap(mmaps[map_idx], mmap_size); | |
811 | shift = page_size * (head / page_size); | 992 | mmaps[map_idx] = NULL; |
812 | 993 | } | |
813 | munmap_ret = munmap(buf, page_size * self->mmap_window); | ||
814 | assert(munmap_ret == 0); | ||
815 | 994 | ||
816 | offset += shift; | 995 | page_offset = page_size * (head / page_size); |
817 | head -= shift; | 996 | file_offset += page_offset; |
997 | head -= page_offset; | ||
818 | goto remap; | 998 | goto remap; |
819 | } | 999 | } |
820 | 1000 | ||
821 | size = event->header.size; | 1001 | size = event->header.size; |
822 | 1002 | ||
823 | dump_printf("\n%#Lx [%#x]: event: %d\n", | ||
824 | offset + head, event->header.size, event->header.type); | ||
825 | |||
826 | if (size == 0 || | 1003 | if (size == 0 || |
827 | perf_session__process_event(self, event, ops, offset, head) < 0) { | 1004 | perf_session__process_event(session, event, ops, file_pos) < 0) { |
828 | dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", | 1005 | dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", |
829 | offset + head, event->header.size, | 1006 | file_offset + head, event->header.size, |
830 | event->header.type); | 1007 | event->header.type); |
831 | /* | 1008 | /* |
832 | * assume we lost track of the stream, check alignment, and | 1009 | * assume we lost track of the stream, check alignment, and |
@@ -839,19 +1016,49 @@ more: | |||
839 | } | 1016 | } |
840 | 1017 | ||
841 | head += size; | 1018 | head += size; |
1019 | file_pos += size; | ||
842 | 1020 | ||
843 | if (offset + head >= data_offset + data_size) | 1021 | if (file_pos >= progress_next) { |
844 | goto done; | 1022 | progress_next += file_size / 16; |
1023 | ui_progress__update(progress, file_pos); | ||
1024 | } | ||
845 | 1025 | ||
846 | if (offset + head < file_size) | 1026 | if (file_pos < file_size) |
847 | goto more; | 1027 | goto more; |
848 | done: | 1028 | |
849 | err = 0; | 1029 | err = 0; |
850 | /* do the final flush for ordered samples */ | 1030 | /* do the final flush for ordered samples */ |
851 | self->ordered_samples.next_flush = ULLONG_MAX; | 1031 | session->ordered_samples.next_flush = ULLONG_MAX; |
852 | flush_sample_queue(self, ops); | 1032 | flush_sample_queue(session, ops); |
853 | out_err: | 1033 | out_err: |
854 | ui_progress__delete(progress); | 1034 | ui_progress__delete(progress); |
1035 | |||
1036 | if (ops->lost == event__process_lost && | ||
1037 | session->hists.stats.total_lost != 0) { | ||
1038 | ui__warning("Processed %Lu events and LOST %Lu!\n\n" | ||
1039 | "Check IO/CPU overload!\n\n", | ||
1040 | session->hists.stats.total_period, | ||
1041 | session->hists.stats.total_lost); | ||
1042 | } | ||
1043 | |||
1044 | if (session->hists.stats.nr_unknown_events != 0) { | ||
1045 | ui__warning("Found %u unknown events!\n\n" | ||
1046 | "Is this an older tool processing a perf.data " | ||
1047 | "file generated by a more recent tool?\n\n" | ||
1048 | "If that is not the case, consider " | ||
1049 | "reporting to linux-kernel@vger.kernel.org.\n\n", | ||
1050 | session->hists.stats.nr_unknown_events); | ||
1051 | } | ||
1052 | |||
1053 | if (session->hists.stats.nr_invalid_chains != 0) { | ||
1054 | ui__warning("Found invalid callchains!\n\n" | ||
1055 | "%u out of %u events were discarded for this reason.\n\n" | ||
1056 | "Consider reporting to linux-kernel@vger.kernel.org.\n\n", | ||
1057 | session->hists.stats.nr_invalid_chains, | ||
1058 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); | ||
1059 | } | ||
1060 | |||
1061 | perf_session_free_sample_buffers(session); | ||
855 | return err; | 1062 | return err; |
856 | } | 1063 | } |
857 | 1064 | ||
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 9fa0fc2a863f..ac36f99f14af 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -17,8 +17,12 @@ struct ordered_samples { | |||
17 | u64 last_flush; | 17 | u64 last_flush; |
18 | u64 next_flush; | 18 | u64 next_flush; |
19 | u64 max_timestamp; | 19 | u64 max_timestamp; |
20 | struct list_head samples_head; | 20 | struct list_head samples; |
21 | struct sample_queue *last_inserted; | 21 | struct list_head sample_cache; |
22 | struct list_head to_free; | ||
23 | struct sample_queue *sample_buffer; | ||
24 | struct sample_queue *last_sample; | ||
25 | int sample_buffer_idx; | ||
22 | }; | 26 | }; |
23 | 27 | ||
24 | struct perf_session { | 28 | struct perf_session { |
@@ -42,6 +46,8 @@ struct perf_session { | |||
42 | int fd; | 46 | int fd; |
43 | bool fd_pipe; | 47 | bool fd_pipe; |
44 | bool repipe; | 48 | bool repipe; |
49 | bool sample_id_all; | ||
50 | u16 id_hdr_size; | ||
45 | int cwdlen; | 51 | int cwdlen; |
46 | char *cwd; | 52 | char *cwd; |
47 | struct ordered_samples ordered_samples; | 53 | struct ordered_samples ordered_samples; |
@@ -50,7 +56,9 @@ struct perf_session { | |||
50 | 56 | ||
51 | struct perf_event_ops; | 57 | struct perf_event_ops; |
52 | 58 | ||
53 | typedef int (*event_op)(event_t *self, struct perf_session *session); | 59 | typedef int (*event_op)(event_t *self, struct sample_data *sample, |
60 | struct perf_session *session); | ||
61 | typedef int (*event_synth_op)(event_t *self, struct perf_session *session); | ||
54 | typedef int (*event_op2)(event_t *self, struct perf_session *session, | 62 | typedef int (*event_op2)(event_t *self, struct perf_session *session, |
55 | struct perf_event_ops *ops); | 63 | struct perf_event_ops *ops); |
56 | 64 | ||
@@ -63,8 +71,8 @@ struct perf_event_ops { | |||
63 | lost, | 71 | lost, |
64 | read, | 72 | read, |
65 | throttle, | 73 | throttle, |
66 | unthrottle, | 74 | unthrottle; |
67 | attr, | 75 | event_synth_op attr, |
68 | event_type, | 76 | event_type, |
69 | tracing_data, | 77 | tracing_data, |
70 | build_id; | 78 | build_id; |
@@ -100,6 +108,8 @@ int perf_session__create_kernel_maps(struct perf_session *self); | |||
100 | 108 | ||
101 | int do_read(int fd, void *buf, size_t size); | 109 | int do_read(int fd, void *buf, size_t size); |
102 | void perf_session__update_sample_type(struct perf_session *self); | 110 | void perf_session__update_sample_type(struct perf_session *self); |
111 | void perf_session__set_sample_id_all(struct perf_session *session, bool value); | ||
112 | void perf_session__set_sample_type(struct perf_session *session, u64 type); | ||
103 | void perf_session__remove_thread(struct perf_session *self, struct thread *th); | 113 | void perf_session__remove_thread(struct perf_session *self, struct thread *th); |
104 | 114 | ||
105 | static inline | 115 | static inline |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b62a553cc67d..f44fa541d56e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, | |||
170 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | 170 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); |
171 | } | 171 | } |
172 | 172 | ||
173 | return repsep_snprintf(bf, size, "%*Lx", width, self->ip); | 173 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); |
174 | } | 174 | } |
175 | 175 | ||
176 | /* --sort symbol */ | 176 | /* --sort symbol */ |
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | |||
196 | 196 | ||
197 | if (verbose) { | 197 | if (verbose) { |
198 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; | 198 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; |
199 | ret += repsep_snprintf(bf, size, "%*Lx %c ", | 199 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", |
200 | BITS_PER_LONG / 4, self->ip, o); | 200 | BITS_PER_LONG / 4, self->ip, o); |
201 | } | 201 | } |
202 | 202 | ||
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | |||
205 | ret += repsep_snprintf(bf + ret, size - ret, "%s", | 205 | ret += repsep_snprintf(bf + ret, size - ret, "%s", |
206 | self->ms.sym->name); | 206 | self->ms.sym->name); |
207 | else | 207 | else |
208 | ret += repsep_snprintf(bf + ret, size - ret, "%*Lx", | 208 | ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", |
209 | BITS_PER_LONG / 4, self->ip); | 209 | BITS_PER_LONG / 4, self->ip); |
210 | 210 | ||
211 | return ret; | 211 | return ret; |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d628c8d1cf5e..ceefa6568def 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -121,7 +121,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type) | |||
121 | * We still haven't the actual symbols, so guess the | 121 | * We still haven't the actual symbols, so guess the |
122 | * last map final address. | 122 | * last map final address. |
123 | */ | 123 | */ |
124 | curr->end = ~0UL; | 124 | curr->end = ~0ULL; |
125 | } | 125 | } |
126 | 126 | ||
127 | static void map_groups__fixup_end(struct map_groups *self) | 127 | static void map_groups__fixup_end(struct map_groups *self) |
@@ -1836,8 +1836,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, | |||
1836 | const char *kallsyms_filename = NULL; | 1836 | const char *kallsyms_filename = NULL; |
1837 | char *kallsyms_allocated_filename = NULL; | 1837 | char *kallsyms_allocated_filename = NULL; |
1838 | /* | 1838 | /* |
1839 | * Step 1: if the user specified a vmlinux filename, use it and only | 1839 | * Step 1: if the user specified a kallsyms or vmlinux filename, use |
1840 | * it, reporting errors to the user if it cannot be used. | 1840 | * it and only it, reporting errors to the user if it cannot be used. |
1841 | * | 1841 | * |
1842 | * For instance, try to analyse an ARM perf.data file _without_ a | 1842 | * For instance, try to analyse an ARM perf.data file _without_ a |
1843 | * build-id, or if the user specifies the wrong path to the right | 1843 | * build-id, or if the user specifies the wrong path to the right |
@@ -1850,6 +1850,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, | |||
1850 | * validation in dso__load_vmlinux and will bail out if they don't | 1850 | * validation in dso__load_vmlinux and will bail out if they don't |
1851 | * match. | 1851 | * match. |
1852 | */ | 1852 | */ |
1853 | if (symbol_conf.kallsyms_name != NULL) { | ||
1854 | kallsyms_filename = symbol_conf.kallsyms_name; | ||
1855 | goto do_kallsyms; | ||
1856 | } | ||
1857 | |||
1853 | if (symbol_conf.vmlinux_name != NULL) { | 1858 | if (symbol_conf.vmlinux_name != NULL) { |
1854 | err = dso__load_vmlinux(self, map, | 1859 | err = dso__load_vmlinux(self, map, |
1855 | symbol_conf.vmlinux_name, filter); | 1860 | symbol_conf.vmlinux_name, filter); |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 038f2201ee09..12defbe18c13 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -72,6 +72,7 @@ struct symbol_conf { | |||
72 | show_cpu_utilization, | 72 | show_cpu_utilization, |
73 | initialized; | 73 | initialized; |
74 | const char *vmlinux_name, | 74 | const char *vmlinux_name, |
75 | *kallsyms_name, | ||
75 | *source_prefix, | 76 | *source_prefix, |
76 | *field_sep; | 77 | *field_sep; |
77 | const char *default_guest_vmlinux_name, | 78 | const char *default_guest_vmlinux_name, |
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 056c69521a38..7b5a8926624e 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c | |||
@@ -104,10 +104,24 @@ out_destroy_form: | |||
104 | return rc; | 104 | return rc; |
105 | } | 105 | } |
106 | 106 | ||
107 | static const char yes[] = "Yes", no[] = "No"; | 107 | static const char yes[] = "Yes", no[] = "No", |
108 | warning_str[] = "Warning!", ok[] = "Ok"; | ||
108 | 109 | ||
109 | bool ui__dialog_yesno(const char *msg) | 110 | bool ui__dialog_yesno(const char *msg) |
110 | { | 111 | { |
111 | /* newtWinChoice should really be accepting const char pointers... */ | 112 | /* newtWinChoice should really be accepting const char pointers... */ |
112 | return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; | 113 | return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; |
113 | } | 114 | } |
115 | |||
116 | void ui__warning(const char *format, ...) | ||
117 | { | ||
118 | va_list args; | ||
119 | |||
120 | va_start(args, format); | ||
121 | if (use_browser > 0) | ||
122 | newtWinMessagev((char *)warning_str, (char *)ok, | ||
123 | (char *)format, args); | ||
124 | else | ||
125 | vfprintf(stderr, format, args); | ||
126 | va_end(args); | ||
127 | } | ||