aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-11 14:02:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-11 14:02:13 -0500
commit42776163e13a56ea3096edff7a5df95408e80eb4 (patch)
tree92f17bb5dadc7261b2d9238244cd8d4cb6c1bfd7
parentedb2877f4a62647e36e20839a786f94d688a06ed (diff)
parent3d03e2ea74103a50c23d6ab1906cf73399c0dafb (diff)
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (28 commits) perf session: Fix infinite loop in __perf_session__process_events perf evsel: Support perf_evsel__open(cpus > 1 && threads > 1) perf sched: Use PTHREAD_STACK_MIN to avoid pthread_attr_setstacksize() fail perf tools: Emit clearer message for sys_perf_event_open ENOENT return perf stat: better error message for unsupported events perf sched: Fix allocation result check perf, x86: P4 PMU - Fix unflagged overflows handling dynamic debug: Fix build issue with older gcc tracing: Fix TRACE_EVENT power tracepoint creation tracing: Fix preempt count leak tracepoint: Add __rcu annotation tracing: remove duplicate null-pointer check in skb tracepoint tracing/trivial: Add missing comma in TRACE_EVENT comment tracing: Include module.h in define_trace.h x86: Save rbp in pt_regs on irq entry x86, dumpstack: Fix unused variable warning x86, NMI: Clean-up default_do_nmi() x86, NMI: Allow NMI reason io port (0x61) to be processed on any CPU x86, NMI: Remove DIE_NMI_IPI x86, NMI: Add priorities to handlers ...
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mach_traps.h12
-rw-r--r--arch/x86/include/asm/nmi.h20
-rw-r--r--arch/x86/include/asm/perf_event_p4.h3
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/traps.c102
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c2
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c2
-rw-r--r--drivers/watchdog/hpwdt.c2
-rw-r--r--include/linux/dynamic_debug.h18
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--include/trace/define_trace.h10
-rw-r--r--include/trace/events/skb.h4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/perf_event.c82
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--lib/dynamic_debug.c9
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/builtin-record.c3
-rw-r--r--tools/perf/builtin-sched.c5
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/builtin-test.c116
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/util/evsel.c87
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/parse-events.c74
-rw-r--r--tools/perf/util/session.c2
38 files changed, 442 insertions, 248 deletions
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f23eb2528464..ca242d35e873 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
18 DIE_TRAP, 18 DIE_TRAP,
19 DIE_GPF, 19 DIE_GPF,
20 DIE_CALL, 20 DIE_CALL,
21 DIE_NMI_IPI,
22 DIE_PAGE_FAULT, 21 DIE_PAGE_FAULT,
23 DIE_NMIUNKNOWN, 22 DIE_NMIUNKNOWN,
24}; 23};
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index f7920601e472..72a8b52e7dfd 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
7 7
8#include <asm/mc146818rtc.h> 8#include <asm/mc146818rtc.h>
9 9
10#define NMI_REASON_PORT 0x61
11
12#define NMI_REASON_SERR 0x80
13#define NMI_REASON_IOCHK 0x40
14#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK)
15
16#define NMI_REASON_CLEAR_SERR 0x04
17#define NMI_REASON_CLEAR_IOCHK 0x08
18#define NMI_REASON_CLEAR_MASK 0x0f
19
10static inline unsigned char get_nmi_reason(void) 20static inline unsigned char get_nmi_reason(void)
11{ 21{
12 return inb(0x61); 22 return inb(NMI_REASON_PORT);
13} 23}
14 24
15static inline void reassert_nmi(void) 25static inline void reassert_nmi(void)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c4021b953510..c76f5b92b840 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void);
23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
24#endif 24#endif
25 25
26/*
27 * Define some priorities for the nmi notifier call chain.
28 *
29 * Create a local nmi bit that has a higher priority than
30 * external nmis, because the local ones are more frequent.
31 *
32 * Also setup some default high/normal/low settings for
33 * subsystems to registers with. Using 4 bits to seperate
34 * the priorities. This can go alot higher if needed be.
35 */
36
37#define NMI_LOCAL_SHIFT 16 /* randomly picked */
38#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT)
39#define NMI_HIGH_PRIOR (1ULL << 8)
40#define NMI_NORMAL_PRIOR (1ULL << 4)
41#define NMI_LOW_PRIOR (1ULL << 0)
42#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
43#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
44#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
45
26void stop_nmi(void); 46void stop_nmi(void);
27void restart_nmi(void); 47void restart_nmi(void);
28 48
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 295e2ff18a6a..e2f6a99f14ab 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -20,6 +20,9 @@
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18) 21#define ARCH_P4_MAX_CCCR (18)
22 22
23#define ARCH_P4_CNTRVAL_BITS (40)
24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25
23#define P4_ESCR_EVENT_MASK 0x7e000000U 26#define P4_ESCR_EVENT_MASK 0x7e000000U
24#define P4_ESCR_EVENT_SHIFT 25 27#define P4_ESCR_EVENT_SHIFT 25
25#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U 28#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 72ec29e1ae06..79fd43ca6f96 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
68 68
69 switch (cmd) { 69 switch (cmd) {
70 case DIE_NMI: 70 case DIE_NMI:
71 case DIE_NMI_IPI:
72 break; 71 break;
73 72
74 default: 73 default:
@@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
96static __read_mostly struct notifier_block backtrace_notifier = { 95static __read_mostly struct notifier_block backtrace_notifier = {
97 .notifier_call = arch_trigger_all_cpu_backtrace_handler, 96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
98 .next = NULL, 97 .next = NULL,
99 .priority = 1 98 .priority = NMI_LOCAL_LOW_PRIOR,
100}; 99};
101 100
102static int __init register_trigger_all_cpu_backtrace(void) 101static int __init register_trigger_all_cpu_backtrace(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ecca5f41ad2c..89b9f56aaae3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
641 */ 641 */
642int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 642int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
643{ 643{
644 if (reason != DIE_NMI_IPI) 644 if (reason != DIE_NMIUNKNOWN)
645 return NOTIFY_OK; 645 return NOTIFY_OK;
646 646
647 if (in_crash_kexec) 647 if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfedb..a77971979564 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <asm/mce.h> 26#include <asm/mce.h>
27#include <asm/apic.h> 27#include <asm/apic.h>
28#include <asm/nmi.h>
28 29
29/* Update fake mce registers on current CPU. */ 30/* Update fake mce registers on current CPU. */
30static void inject_mce(struct mce *m) 31static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
83 struct die_args *args = (struct die_args *)data; 84 struct die_args *args = (struct die_args *)data;
84 int cpu = smp_processor_id(); 85 int cpu = smp_processor_id();
85 struct mce *m = &__get_cpu_var(injectm); 86 struct mce *m = &__get_cpu_var(injectm);
86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
87 return NOTIFY_DONE; 88 return NOTIFY_DONE;
88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 89 cpumask_clear_cpu(cpu, mce_inject_cpumask);
89 if (m->inject_flags & MCJ_EXCEPTION) 90 if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
95 96
96static struct notifier_block mce_raise_nb = { 97static struct notifier_block mce_raise_nb = {
97 .notifier_call = mce_raise_notify, 98 .notifier_call = mce_raise_notify,
98 .priority = 1000, 99 .priority = NMI_LOCAL_NORMAL_PRIOR,
99}; 100};
100 101
101/* Inject mce on current CPU */ 102/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 04921017abe0..9d977a2ea693 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1267,7 +1267,6 @@ perf_event_nmi_handler(struct notifier_block *self,
1267 1267
1268 switch (cmd) { 1268 switch (cmd) {
1269 case DIE_NMI: 1269 case DIE_NMI:
1270 case DIE_NMI_IPI:
1271 break; 1270 break;
1272 case DIE_NMIUNKNOWN: 1271 case DIE_NMIUNKNOWN:
1273 this_nmi = percpu_read(irq_stat.__nmi_count); 1272 this_nmi = percpu_read(irq_stat.__nmi_count);
@@ -1317,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1317static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1316static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1318 .notifier_call = perf_event_nmi_handler, 1317 .notifier_call = perf_event_nmi_handler,
1319 .next = NULL, 1318 .next = NULL,
1320 .priority = 1 1319 .priority = NMI_LOCAL_LOW_PRIOR,
1321}; 1320};
1322 1321
1323static struct event_constraint unconstrained; 1322static struct event_constraint unconstrained;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b93e694..e56b9bfbabd1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
753 753
754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
755{ 755{
756 int overflow = 0; 756 u64 v;
757 u32 low, high;
758 757
759 rdmsr(hwc->config_base + hwc->idx, low, high); 758 /* an official way for overflow indication */
760 759 rdmsrl(hwc->config_base + hwc->idx, v);
761 /* we need to check high bit for unflagged overflows */ 760 if (v & P4_CCCR_OVF) {
762 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { 761 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
763 overflow = 1; 762 return 1;
764 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
765 ((u64)low) & ~P4_CCCR_OVF);
766 } 763 }
767 764
768 return overflow; 765 /* it might be unflagged overflow */
766 rdmsrl(hwc->event_base + hwc->idx, v);
767 if (!(v & ARCH_P4_CNTRVAL_MASK))
768 return 1;
769
770 return 0;
769} 771}
770 772
771static void p4_pmu_disable_pebs(void) 773static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
1152 */ 1154 */
1153 .num_counters = ARCH_P4_MAX_CCCR, 1155 .num_counters = ARCH_P4_MAX_CCCR,
1154 .apic = 1, 1156 .apic = 1,
1155 .cntval_bits = 40, 1157 .cntval_bits = ARCH_P4_CNTRVAL_BITS,
1156 .cntval_mask = (1ULL << 40) - 1, 1158 .cntval_mask = ARCH_P4_CNTRVAL_MASK,
1157 .max_period = (1ULL << 39) - 1, 1159 .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1158 .hw_config = p4_hw_config, 1160 .hw_config = p4_hw_config,
1159 .schedule_events = p4_pmu_schedule_events, 1161 .schedule_events = p4_pmu_schedule_events,
1160 /* 1162 /*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8474c998cbd4..d6fb146c0d8b 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
197 */ 197 */
198void dump_stack(void) 198void dump_stack(void)
199{ 199{
200 unsigned long bp = 0;
201 unsigned long stack; 200 unsigned long stack;
202 201
203#ifdef CONFIG_FRAME_POINTER
204 if (!bp)
205 get_bp(bp);
206#endif
207
208 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 202 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
209 current->pid, current->comm, print_tainted(), 203 current->pid, current->comm, print_tainted(),
210 init_utsname()->release, 204 init_utsname()->release,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e8697..d3b895f375d3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
299ENTRY(save_args) 299ENTRY(save_args)
300 XCPT_FRAME 300 XCPT_FRAME
301 cld 301 cld
302 movq_cfi rdi, RDI+16-ARGOFFSET 302 /*
303 movq_cfi rsi, RSI+16-ARGOFFSET 303 * start from rbp in pt_regs and jump over
304 movq_cfi rdx, RDX+16-ARGOFFSET 304 * return address.
305 movq_cfi rcx, RCX+16-ARGOFFSET 305 */
306 movq_cfi rax, RAX+16-ARGOFFSET 306 movq_cfi rdi, RDI+8-RBP
307 movq_cfi r8, R8+16-ARGOFFSET 307 movq_cfi rsi, RSI+8-RBP
308 movq_cfi r9, R9+16-ARGOFFSET 308 movq_cfi rdx, RDX+8-RBP
309 movq_cfi r10, R10+16-ARGOFFSET 309 movq_cfi rcx, RCX+8-RBP
310 movq_cfi r11, R11+16-ARGOFFSET 310 movq_cfi rax, RAX+8-RBP
311 311 movq_cfi r8, R8+8-RBP
312 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 312 movq_cfi r9, R9+8-RBP
313 movq_cfi r10, R10+8-RBP
314 movq_cfi r11, R11+8-RBP
315
316 leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
313 movq_cfi rbp, 8 /* push %rbp */ 317 movq_cfi rbp, 8 /* push %rbp */
314 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 318 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
315 testl $3, CS(%rdi) 319 testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
782 786
783/* 0(%rsp): ~(interrupt number) */ 787/* 0(%rsp): ~(interrupt number) */
784 .macro interrupt func 788 .macro interrupt func
785 subq $ORIG_RAX-ARGOFFSET+8, %rsp 789 /* reserve pt_regs for scratch regs and rbp */
786 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 790 subq $ORIG_RAX-RBP, %rsp
791 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
787 call save_args 792 call save_args
788 PARTIAL_FRAME 0 793 PARTIAL_FRAME 0
789 call \func 794 call \func
@@ -808,9 +813,14 @@ ret_from_intr:
808 TRACE_IRQS_OFF 813 TRACE_IRQS_OFF
809 decl PER_CPU_VAR(irq_count) 814 decl PER_CPU_VAR(irq_count)
810 leaveq 815 leaveq
816
811 CFI_RESTORE rbp 817 CFI_RESTORE rbp
812 CFI_DEF_CFA_REGISTER rsp 818 CFI_DEF_CFA_REGISTER rsp
813 CFI_ADJUST_CFA_OFFSET -8 819 CFI_ADJUST_CFA_OFFSET -8
820
821 /* we did not save rbx, restore only from ARGOFFSET */
822 addq $8, %rsp
823 CFI_ADJUST_CFA_OFFSET -8
814exit_intr: 824exit_intr:
815 GET_THREAD_INFO(%rcx) 825 GET_THREAD_INFO(%rcx)
816 testl $3,CS-ARGOFFSET(%rsp) 826 testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b654dec6..a4130005028a 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
48#include <asm/apicdef.h> 48#include <asm/apicdef.h>
49#include <asm/system.h> 49#include <asm/system.h>
50#include <asm/apic.h> 50#include <asm/apic.h>
51#include <asm/nmi.h>
51 52
52struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 53struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
53{ 54{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
525 } 526 }
526 return NOTIFY_DONE; 527 return NOTIFY_DONE;
527 528
528 case DIE_NMI_IPI:
529 /* Just ignore, we will handle the roundup on DIE_NMI. */
530 return NOTIFY_DONE;
531
532 case DIE_NMIUNKNOWN: 529 case DIE_NMIUNKNOWN:
533 if (was_in_debug_nmi[raw_smp_processor_id()]) { 530 if (was_in_debug_nmi[raw_smp_processor_id()]) {
534 was_in_debug_nmi[raw_smp_processor_id()] = 0; 531 was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
606 /* 603 /*
607 * Lowest-prio notifier priority, we want to be notified last: 604 * Lowest-prio notifier priority, we want to be notified last:
608 */ 605 */
609 .priority = -INT_MAX, 606 .priority = NMI_LOCAL_LOW_PRIOR,
610}; 607};
611 608
612/** 609/**
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d4815..fc7aae1e2bc7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
18#include <asm/pci_x86.h> 18#include <asm/pci_x86.h>
19#include <asm/virtext.h> 19#include <asm/virtext.h>
20#include <asm/cpu.h> 20#include <asm/cpu.h>
21#include <asm/nmi.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23# include <linux/ctype.h> 24# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
747{ 748{
748 int cpu; 749 int cpu;
749 750
750 if (val != DIE_NMI_IPI) 751 if (val != DIE_NMI)
751 return NOTIFY_OK; 752 return NOTIFY_OK;
752 753
753 cpu = raw_smp_processor_id(); 754 cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
778 779
779static struct notifier_block crash_nmi_nb = { 780static struct notifier_block crash_nmi_nb = {
780 .notifier_call = crash_nmi_callback, 781 .notifier_call = crash_nmi_callback,
782 /* we want to be the first one called */
783 .priority = NMI_LOCAL_HIGH_PRIOR+1,
781}; 784};
782 785
783/* Halt all other CPUs, calling the specified function on each of them 786/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c76aaca5694d..b9b67166f9de 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors);
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic; 86int unknown_nmi_panic;
87/*
88 * Prevent NMI reason port (0x61) being accessed simultaneously, can
89 * only be used in NMI handler.
90 */
91static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
87 92
88static inline void conditional_sti(struct pt_regs *regs) 93static inline void conditional_sti(struct pt_regs *regs)
89{ 94{
@@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str)
310__setup("unknown_nmi_panic", setup_unknown_nmi_panic); 315__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
311 316
312static notrace __kprobes void 317static notrace __kprobes void
313mem_parity_error(unsigned char reason, struct pt_regs *regs) 318pci_serr_error(unsigned char reason, struct pt_regs *regs)
314{ 319{
315 printk(KERN_EMERG 320 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
316 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 321 reason, smp_processor_id());
317 reason, smp_processor_id());
318
319 printk(KERN_EMERG
320 "You have some hardware problem, likely on the PCI bus.\n");
321 322
323 /*
324 * On some machines, PCI SERR line is used to report memory
325 * errors. EDAC makes use of it.
326 */
322#if defined(CONFIG_EDAC) 327#if defined(CONFIG_EDAC)
323 if (edac_handler_set()) { 328 if (edac_handler_set()) {
324 edac_atomic_assert_error(); 329 edac_atomic_assert_error();
@@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
329 if (panic_on_unrecovered_nmi) 334 if (panic_on_unrecovered_nmi)
330 panic("NMI: Not continuing"); 335 panic("NMI: Not continuing");
331 336
332 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 337 pr_emerg("Dazed and confused, but trying to continue\n");
333 338
334 /* Clear and disable the memory parity error line. */ 339 /* Clear and disable the PCI SERR error line. */
335 reason = (reason & 0xf) | 4; 340 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
336 outb(reason, 0x61); 341 outb(reason, NMI_REASON_PORT);
337} 342}
338 343
339static notrace __kprobes void 344static notrace __kprobes void
@@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
341{ 346{
342 unsigned long i; 347 unsigned long i;
343 348
344 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 349 pr_emerg(
350 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
351 reason, smp_processor_id());
345 show_registers(regs); 352 show_registers(regs);
346 353
347 if (panic_on_io_nmi) 354 if (panic_on_io_nmi)
348 panic("NMI IOCK error: Not continuing"); 355 panic("NMI IOCK error: Not continuing");
349 356
350 /* Re-enable the IOCK line, wait for a few seconds */ 357 /* Re-enable the IOCK line, wait for a few seconds */
351 reason = (reason & 0xf) | 8; 358 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
352 outb(reason, 0x61); 359 outb(reason, NMI_REASON_PORT);
353 360
354 i = 20000; 361 i = 20000;
355 while (--i) { 362 while (--i) {
@@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
357 udelay(100); 364 udelay(100);
358 } 365 }
359 366
360 reason &= ~8; 367 reason &= ~NMI_REASON_CLEAR_IOCHK;
361 outb(reason, 0x61); 368 outb(reason, NMI_REASON_PORT);
362} 369}
363 370
364static notrace __kprobes void 371static notrace __kprobes void
@@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
377 return; 384 return;
378 } 385 }
379#endif 386#endif
380 printk(KERN_EMERG 387 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
381 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 388 reason, smp_processor_id());
382 reason, smp_processor_id());
383 389
384 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 390 pr_emerg("Do you have a strange power saving mode enabled?\n");
385 if (unknown_nmi_panic || panic_on_unrecovered_nmi) 391 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
386 panic("NMI: Not continuing"); 392 panic("NMI: Not continuing");
387 393
388 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 394 pr_emerg("Dazed and confused, but trying to continue\n");
389} 395}
390 396
391static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 397static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
392{ 398{
393 unsigned char reason = 0; 399 unsigned char reason = 0;
394 int cpu;
395 400
396 cpu = smp_processor_id(); 401 /*
397 402 * CPU-specific NMI must be processed before non-CPU-specific
398 /* Only the BSP gets external NMIs from the system. */ 403 * NMI, otherwise we may lose it, because the CPU-specific
399 if (!cpu) 404 * NMI can not be detected/processed on other CPUs.
400 reason = get_nmi_reason(); 405 */
406 if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
407 return;
401 408
402 if (!(reason & 0xc0)) { 409 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
403 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 410 raw_spin_lock(&nmi_reason_lock);
404 == NOTIFY_STOP) 411 reason = get_nmi_reason();
405 return;
406 412
407#ifdef CONFIG_X86_LOCAL_APIC 413 if (reason & NMI_REASON_MASK) {
408 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 414 if (reason & NMI_REASON_SERR)
409 == NOTIFY_STOP) 415 pci_serr_error(reason, regs);
410 return; 416 else if (reason & NMI_REASON_IOCHK)
417 io_check_error(reason, regs);
418#ifdef CONFIG_X86_32
419 /*
420 * Reassert NMI in case it became active
421 * meanwhile as it's edge-triggered:
422 */
423 reassert_nmi();
411#endif 424#endif
412 unknown_nmi_error(reason, regs); 425 raw_spin_unlock(&nmi_reason_lock);
413
414 return; 426 return;
415 } 427 }
416 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 428 raw_spin_unlock(&nmi_reason_lock);
417 return;
418 429
419 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 430 unknown_nmi_error(reason, regs);
420 if (reason & 0x80)
421 mem_parity_error(reason, regs);
422 if (reason & 0x40)
423 io_check_error(reason, regs);
424#ifdef CONFIG_X86_32
425 /*
426 * Reassert NMI in case it became active meanwhile
427 * as it's edge-triggered:
428 */
429 reassert_nmi();
430#endif
431} 431}
432 432
433dotraplinkage notrace __kprobes void 433dotraplinkage notrace __kprobes void
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f24a8533bcdf..e2b7b0c06cdf 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self,
65 65
66 switch (val) { 66 switch (val) {
67 case DIE_NMI: 67 case DIE_NMI:
68 case DIE_NMI_IPI:
69 if (ctr_running) 68 if (ctr_running)
70 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); 69 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
71 else if (!nmi_enabled) 70 else if (!nmi_enabled)
@@ -361,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
361static struct notifier_block profile_exceptions_nb = { 360static struct notifier_block profile_exceptions_nb = {
362 .notifier_call = profile_exceptions_notify, 361 .notifier_call = profile_exceptions_notify,
363 .next = NULL, 362 .next = NULL,
364 .priority = 2 363 .priority = NMI_LOCAL_LOW_PRIOR,
365}; 364};
366 365
367static void nmi_cpu_restore_registers(struct op_msrs *msrs) 366static void nmi_cpu_restore_registers(struct op_msrs *msrs)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index 0636dd93cef8..720bf5a53c51 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
38static struct notifier_block profile_timer_exceptions_nb = { 38static struct notifier_block profile_timer_exceptions_nb = {
39 .notifier_call = profile_timer_exceptions_notify, 39 .notifier_call = profile_timer_exceptions_notify,
40 .next = NULL, 40 .next = NULL,
41 .priority = 0 41 .priority = NMI_LOW_PRIOR,
42}; 42};
43 43
44static int timer_start(void) 44static int timer_start(void)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f4d334f2536e..320668f4c3aa 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1081,7 +1081,7 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
1081{ 1081{
1082 struct die_args *args = data; 1082 struct die_args *args = data;
1083 1083
1084 if (val != DIE_NMI) 1084 if (val != DIE_NMIUNKNOWN)
1085 return NOTIFY_OK; 1085 return NOTIFY_OK;
1086 1086
1087 /* Hack, if it's a memory or I/O error, ignore it. */ 1087 /* Hack, if it's a memory or I/O error, ignore it. */
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index dea7b5bf6e2c..24b966d5061a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason,
469 unsigned long rom_pl; 469 unsigned long rom_pl;
470 static int die_nmi_called; 470 static int die_nmi_called;
471 471
472 if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) 472 if (ulReason != DIE_NMIUNKNOWN)
473 goto out; 473 goto out;
474 474
475 if (!hpwdt_nmi_decoding) 475 if (!hpwdt_nmi_decoding)
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a90b3892074a..1c70028f81f9 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -44,34 +44,24 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
44extern int ddebug_remove_module(const char *mod_name); 44extern int ddebug_remove_module(const char *mod_name);
45 45
46#define dynamic_pr_debug(fmt, ...) do { \ 46#define dynamic_pr_debug(fmt, ...) do { \
47 __label__ do_printk; \
48 __label__ out; \
49 static struct _ddebug descriptor \ 47 static struct _ddebug descriptor \
50 __used \ 48 __used \
51 __attribute__((section("__verbose"), aligned(8))) = \ 49 __attribute__((section("__verbose"), aligned(8))) = \
52 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ 50 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
53 _DPRINTK_FLAGS_DEFAULT }; \ 51 _DPRINTK_FLAGS_DEFAULT }; \
54 JUMP_LABEL(&descriptor.enabled, do_printk); \ 52 if (unlikely(descriptor.enabled)) \
55 goto out; \ 53 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
56do_printk: \
57 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
58out: ; \
59 } while (0) 54 } while (0)
60 55
61 56
62#define dynamic_dev_dbg(dev, fmt, ...) do { \ 57#define dynamic_dev_dbg(dev, fmt, ...) do { \
63 __label__ do_printk; \
64 __label__ out; \
65 static struct _ddebug descriptor \ 58 static struct _ddebug descriptor \
66 __used \ 59 __used \
67 __attribute__((section("__verbose"), aligned(8))) = \ 60 __attribute__((section("__verbose"), aligned(8))) = \
68 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ 61 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
69 _DPRINTK_FLAGS_DEFAULT }; \ 62 _DPRINTK_FLAGS_DEFAULT }; \
70 JUMP_LABEL(&descriptor.enabled, do_printk); \ 63 if (unlikely(descriptor.enabled)) \
71 goto out; \ 64 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
72do_printk: \
73 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
74out: ; \
75 } while (0) 65 } while (0)
76 66
77#else 67#else
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d3e4f87e95c0..c6814616653b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -32,7 +32,7 @@ struct tracepoint {
32 int state; /* State. */ 32 int state; /* State. */
33 void (*regfunc)(void); 33 void (*regfunc)(void);
34 void (*unregfunc)(void); 34 void (*unregfunc)(void);
35 struct tracepoint_func *funcs; 35 struct tracepoint_func __rcu *funcs;
36} __attribute__((aligned(32))); /* 36} __attribute__((aligned(32))); /*
37 * Aligned on 32 bytes because it is 37 * Aligned on 32 bytes because it is
38 * globally visible and gcc happily 38 * globally visible and gcc happily
@@ -326,7 +326,7 @@ do_trace: \
326 * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 326 * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
327 * __entry->next_pid = next->pid; 327 * __entry->next_pid = next->pid;
328 * __entry->next_prio = next->prio; 328 * __entry->next_prio = next->prio;
329 * ) 329 * ),
330 * 330 *
331 * * 331 * *
332 * * Formatted output of a trace record via TP_printk(). 332 * * Formatted output of a trace record via TP_printk().
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index b0b4eb24d592..da39b22636f7 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -21,6 +21,16 @@
21#undef CREATE_TRACE_POINTS 21#undef CREATE_TRACE_POINTS
22 22
23#include <linux/stringify.h> 23#include <linux/stringify.h>
24/*
25 * module.h includes tracepoints, and because ftrace.h
26 * pulls in module.h:
27 * trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h ->
28 * linux/ftrace.h -> linux/module.h
29 * we must include module.h here before we play with any of
30 * the TRACE_EVENT() macros, otherwise the tracepoints included
31 * by module.h may break the build.
32 */
33#include <linux/module.h>
24 34
25#undef TRACE_EVENT 35#undef TRACE_EVENT
26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ 36#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 75ce9d500d8e..f10293c41b1e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -25,9 +25,7 @@ TRACE_EVENT(kfree_skb,
25 25
26 TP_fast_assign( 26 TP_fast_assign(
27 __entry->skbaddr = skb; 27 __entry->skbaddr = skb;
28 if (skb) { 28 __entry->protocol = ntohs(skb->protocol);
29 __entry->protocol = ntohs(skb->protocol);
30 }
31 __entry->location = location; 29 __entry->location = location;
32 ), 30 ),
33 31
diff --git a/kernel/Makefile b/kernel/Makefile
index 33e0a39cf359..5669f71dfdd5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
100obj-$(CONFIG_TRACING) += trace/ 100obj-$(CONFIG_TRACING) += trace/
101obj-$(CONFIG_X86_DS) += trace/ 101obj-$(CONFIG_X86_DS) += trace/
102obj-$(CONFIG_RING_BUFFER) += trace/ 102obj-$(CONFIG_RING_BUFFER) += trace/
103obj-$(CONFIG_TRACEPOINTS) += trace/
103obj-$(CONFIG_SMP) += sched_cpupri.o 104obj-$(CONFIG_SMP) += sched_cpupri.o
104obj-$(CONFIG_IRQ_WORK) += irq_work.o 105obj-$(CONFIG_IRQ_WORK) += irq_work.o
105obj-$(CONFIG_PERF_EVENTS) += perf_event.o 106obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 89c74861a3da..f9a45ebcc7b1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -994,6 +994,15 @@ NORET_TYPE void do_exit(long code)
994 exit_fs(tsk); 994 exit_fs(tsk);
995 check_stack_usage(); 995 check_stack_usage();
996 exit_thread(); 996 exit_thread();
997
998 /*
999 * Flush inherited counters to the parent - before the parent
1000 * gets woken up by child-exit notifications.
1001 *
1002 * because of cgroup mode, must be called before cgroup_exit()
1003 */
1004 perf_event_exit_task(tsk);
1005
997 cgroup_exit(tsk, 1); 1006 cgroup_exit(tsk, 1);
998 1007
999 if (group_dead) 1008 if (group_dead)
@@ -1007,11 +1016,6 @@ NORET_TYPE void do_exit(long code)
1007 * FIXME: do that only when needed, using sched_exit tracepoint 1016 * FIXME: do that only when needed, using sched_exit tracepoint
1008 */ 1017 */
1009 flush_ptrace_hw_breakpoint(tsk); 1018 flush_ptrace_hw_breakpoint(tsk);
1010 /*
1011 * Flush inherited counters to the parent - before the parent
1012 * gets woken up by child-exit notifications.
1013 */
1014 perf_event_exit_task(tsk);
1015 1019
1016 exit_notify(tsk, group_dead); 1020 exit_notify(tsk, group_dead);
1017#ifdef CONFIG_NUMA 1021#ifdef CONFIG_NUMA
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 11847bf1e8cc..b782b7a79f00 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,12 @@
38 38
39#include <asm/irq_regs.h> 39#include <asm/irq_regs.h>
40 40
41enum event_type_t {
42 EVENT_FLEXIBLE = 0x1,
43 EVENT_PINNED = 0x2,
44 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
45};
46
41atomic_t perf_task_events __read_mostly; 47atomic_t perf_task_events __read_mostly;
42static atomic_t nr_mmap_events __read_mostly; 48static atomic_t nr_mmap_events __read_mostly;
43static atomic_t nr_comm_events __read_mostly; 49static atomic_t nr_comm_events __read_mostly;
@@ -65,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
65 71
66static atomic64_t perf_event_id; 72static atomic64_t perf_event_id;
67 73
74static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
75 enum event_type_t event_type);
76
77static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
78 enum event_type_t event_type);
79
68void __weak perf_event_print_debug(void) { } 80void __weak perf_event_print_debug(void) { }
69 81
70extern __weak const char *perf_pmu_name(void) 82extern __weak const char *perf_pmu_name(void)
@@ -72,6 +84,11 @@ extern __weak const char *perf_pmu_name(void)
72 return "pmu"; 84 return "pmu";
73} 85}
74 86
87static inline u64 perf_clock(void)
88{
89 return local_clock();
90}
91
75void perf_pmu_disable(struct pmu *pmu) 92void perf_pmu_disable(struct pmu *pmu)
76{ 93{
77 int *count = this_cpu_ptr(pmu->pmu_disable_count); 94 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -240,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
240 put_ctx(ctx); 257 put_ctx(ctx);
241} 258}
242 259
243static inline u64 perf_clock(void)
244{
245 return local_clock();
246}
247
248/* 260/*
249 * Update the record of the current time in a context. 261 * Update the record of the current time in a context.
250 */ 262 */
@@ -256,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx)
256 ctx->timestamp = now; 268 ctx->timestamp = now;
257} 269}
258 270
271static u64 perf_event_time(struct perf_event *event)
272{
273 struct perf_event_context *ctx = event->ctx;
274 return ctx ? ctx->time : 0;
275}
276
259/* 277/*
260 * Update the total_time_enabled and total_time_running fields for a event. 278 * Update the total_time_enabled and total_time_running fields for a event.
261 */ 279 */
@@ -269,7 +287,7 @@ static void update_event_times(struct perf_event *event)
269 return; 287 return;
270 288
271 if (ctx->is_active) 289 if (ctx->is_active)
272 run_end = ctx->time; 290 run_end = perf_event_time(event);
273 else 291 else
274 run_end = event->tstamp_stopped; 292 run_end = event->tstamp_stopped;
275 293
@@ -278,7 +296,7 @@ static void update_event_times(struct perf_event *event)
278 if (event->state == PERF_EVENT_STATE_INACTIVE) 296 if (event->state == PERF_EVENT_STATE_INACTIVE)
279 run_end = event->tstamp_stopped; 297 run_end = event->tstamp_stopped;
280 else 298 else
281 run_end = ctx->time; 299 run_end = perf_event_time(event);
282 300
283 event->total_time_running = run_end - event->tstamp_running; 301 event->total_time_running = run_end - event->tstamp_running;
284} 302}
@@ -534,6 +552,7 @@ event_sched_out(struct perf_event *event,
534 struct perf_cpu_context *cpuctx, 552 struct perf_cpu_context *cpuctx,
535 struct perf_event_context *ctx) 553 struct perf_event_context *ctx)
536{ 554{
555 u64 tstamp = perf_event_time(event);
537 u64 delta; 556 u64 delta;
538 /* 557 /*
539 * An event which could not be activated because of 558 * An event which could not be activated because of
@@ -545,7 +564,7 @@ event_sched_out(struct perf_event *event,
545 && !event_filter_match(event)) { 564 && !event_filter_match(event)) {
546 delta = ctx->time - event->tstamp_stopped; 565 delta = ctx->time - event->tstamp_stopped;
547 event->tstamp_running += delta; 566 event->tstamp_running += delta;
548 event->tstamp_stopped = ctx->time; 567 event->tstamp_stopped = tstamp;
549 } 568 }
550 569
551 if (event->state != PERF_EVENT_STATE_ACTIVE) 570 if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -556,7 +575,7 @@ event_sched_out(struct perf_event *event,
556 event->pending_disable = 0; 575 event->pending_disable = 0;
557 event->state = PERF_EVENT_STATE_OFF; 576 event->state = PERF_EVENT_STATE_OFF;
558 } 577 }
559 event->tstamp_stopped = ctx->time; 578 event->tstamp_stopped = tstamp;
560 event->pmu->del(event, 0); 579 event->pmu->del(event, 0);
561 event->oncpu = -1; 580 event->oncpu = -1;
562 581
@@ -768,6 +787,8 @@ event_sched_in(struct perf_event *event,
768 struct perf_cpu_context *cpuctx, 787 struct perf_cpu_context *cpuctx,
769 struct perf_event_context *ctx) 788 struct perf_event_context *ctx)
770{ 789{
790 u64 tstamp = perf_event_time(event);
791
771 if (event->state <= PERF_EVENT_STATE_OFF) 792 if (event->state <= PERF_EVENT_STATE_OFF)
772 return 0; 793 return 0;
773 794
@@ -784,9 +805,9 @@ event_sched_in(struct perf_event *event,
784 return -EAGAIN; 805 return -EAGAIN;
785 } 806 }
786 807
787 event->tstamp_running += ctx->time - event->tstamp_stopped; 808 event->tstamp_running += tstamp - event->tstamp_stopped;
788 809
789 event->shadow_ctx_time = ctx->time - ctx->timestamp; 810 event->shadow_ctx_time = tstamp - ctx->timestamp;
790 811
791 if (!is_software_event(event)) 812 if (!is_software_event(event))
792 cpuctx->active_oncpu++; 813 cpuctx->active_oncpu++;
@@ -898,11 +919,13 @@ static int group_can_go_on(struct perf_event *event,
898static void add_event_to_ctx(struct perf_event *event, 919static void add_event_to_ctx(struct perf_event *event,
899 struct perf_event_context *ctx) 920 struct perf_event_context *ctx)
900{ 921{
922 u64 tstamp = perf_event_time(event);
923
901 list_add_event(event, ctx); 924 list_add_event(event, ctx);
902 perf_group_attach(event); 925 perf_group_attach(event);
903 event->tstamp_enabled = ctx->time; 926 event->tstamp_enabled = tstamp;
904 event->tstamp_running = ctx->time; 927 event->tstamp_running = tstamp;
905 event->tstamp_stopped = ctx->time; 928 event->tstamp_stopped = tstamp;
906} 929}
907 930
908/* 931/*
@@ -937,7 +960,7 @@ static void __perf_install_in_context(void *info)
937 960
938 add_event_to_ctx(event, ctx); 961 add_event_to_ctx(event, ctx);
939 962
940 if (event->cpu != -1 && event->cpu != smp_processor_id()) 963 if (!event_filter_match(event))
941 goto unlock; 964 goto unlock;
942 965
943 /* 966 /*
@@ -1042,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event,
1042 struct perf_event_context *ctx) 1065 struct perf_event_context *ctx)
1043{ 1066{
1044 struct perf_event *sub; 1067 struct perf_event *sub;
1068 u64 tstamp = perf_event_time(event);
1045 1069
1046 event->state = PERF_EVENT_STATE_INACTIVE; 1070 event->state = PERF_EVENT_STATE_INACTIVE;
1047 event->tstamp_enabled = ctx->time - event->total_time_enabled; 1071 event->tstamp_enabled = tstamp - event->total_time_enabled;
1048 list_for_each_entry(sub, &event->sibling_list, group_entry) { 1072 list_for_each_entry(sub, &event->sibling_list, group_entry) {
1049 if (sub->state >= PERF_EVENT_STATE_INACTIVE) { 1073 if (sub->state >= PERF_EVENT_STATE_INACTIVE)
1050 sub->tstamp_enabled = 1074 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
1051 ctx->time - sub->total_time_enabled;
1052 }
1053 } 1075 }
1054} 1076}
1055 1077
@@ -1082,7 +1104,7 @@ static void __perf_event_enable(void *info)
1082 goto unlock; 1104 goto unlock;
1083 __perf_event_mark_enabled(event, ctx); 1105 __perf_event_mark_enabled(event, ctx);
1084 1106
1085 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1107 if (!event_filter_match(event))
1086 goto unlock; 1108 goto unlock;
1087 1109
1088 /* 1110 /*
@@ -1193,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1193 return 0; 1215 return 0;
1194} 1216}
1195 1217
1196enum event_type_t {
1197 EVENT_FLEXIBLE = 0x1,
1198 EVENT_PINNED = 0x2,
1199 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
1200};
1201
1202static void ctx_sched_out(struct perf_event_context *ctx, 1218static void ctx_sched_out(struct perf_event_context *ctx,
1203 struct perf_cpu_context *cpuctx, 1219 struct perf_cpu_context *cpuctx,
1204 enum event_type_t event_type) 1220 enum event_type_t event_type)
@@ -1435,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
1435 list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 1451 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1436 if (event->state <= PERF_EVENT_STATE_OFF) 1452 if (event->state <= PERF_EVENT_STATE_OFF)
1437 continue; 1453 continue;
1438 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1454 if (!event_filter_match(event))
1439 continue; 1455 continue;
1440 1456
1441 if (group_can_go_on(event, cpuctx, 1)) 1457 if (group_can_go_on(event, cpuctx, 1))
@@ -1467,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1467 * Listen to the 'cpu' scheduling filter constraint 1483 * Listen to the 'cpu' scheduling filter constraint
1468 * of events: 1484 * of events:
1469 */ 1485 */
1470 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1486 if (!event_filter_match(event))
1471 continue; 1487 continue;
1472 1488
1473 if (group_can_go_on(event, cpuctx, can_add_hw)) { 1489 if (group_can_go_on(event, cpuctx, can_add_hw)) {
@@ -1694,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
1694 if (event->state != PERF_EVENT_STATE_ACTIVE) 1710 if (event->state != PERF_EVENT_STATE_ACTIVE)
1695 continue; 1711 continue;
1696 1712
1697 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1713 if (!event_filter_match(event))
1698 continue; 1714 continue;
1699 1715
1700 hwc = &event->hw; 1716 hwc = &event->hw;
@@ -3893,7 +3909,7 @@ static int perf_event_task_match(struct perf_event *event)
3893 if (event->state < PERF_EVENT_STATE_INACTIVE) 3909 if (event->state < PERF_EVENT_STATE_INACTIVE)
3894 return 0; 3910 return 0;
3895 3911
3896 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3912 if (!event_filter_match(event))
3897 return 0; 3913 return 0;
3898 3914
3899 if (event->attr.comm || event->attr.mmap || 3915 if (event->attr.comm || event->attr.mmap ||
@@ -4030,7 +4046,7 @@ static int perf_event_comm_match(struct perf_event *event)
4030 if (event->state < PERF_EVENT_STATE_INACTIVE) 4046 if (event->state < PERF_EVENT_STATE_INACTIVE)
4031 return 0; 4047 return 0;
4032 4048
4033 if (event->cpu != -1 && event->cpu != smp_processor_id()) 4049 if (!event_filter_match(event))
4034 return 0; 4050 return 0;
4035 4051
4036 if (event->attr.comm) 4052 if (event->attr.comm)
@@ -4178,7 +4194,7 @@ static int perf_event_mmap_match(struct perf_event *event,
4178 if (event->state < PERF_EVENT_STATE_INACTIVE) 4194 if (event->state < PERF_EVENT_STATE_INACTIVE)
4179 return 0; 4195 return 0;
4180 4196
4181 if (event->cpu != -1 && event->cpu != smp_processor_id()) 4197 if (!event_filter_match(event))
4182 return 0; 4198 return 0;
4183 4199
4184 if ((!executable && event->attr.mmap_data) || 4200 if ((!executable && event->attr.mmap_data) ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 53f338190b26..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
52endif 52endif
53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
55obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
56ifeq ($(CONFIG_TRACING),y) 56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif 58endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f8cf959bad45..dc53ecb80589 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1313,12 +1313,10 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1313 1313
1314 __this_cpu_inc(user_stack_count); 1314 __this_cpu_inc(user_stack_count);
1315 1315
1316
1317
1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1316 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1319 sizeof(*entry), flags, pc); 1317 sizeof(*entry), flags, pc);
1320 if (!event) 1318 if (!event)
1321 return; 1319 goto out_drop_count;
1322 entry = ring_buffer_event_data(event); 1320 entry = ring_buffer_event_data(event);
1323 1321
1324 entry->tgid = current->tgid; 1322 entry->tgid = current->tgid;
@@ -1333,8 +1331,8 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1333 if (!filter_check_discard(call, entry, buffer, event)) 1331 if (!filter_check_discard(call, entry, buffer, event))
1334 ring_buffer_unlock_commit(buffer, event); 1332 ring_buffer_unlock_commit(buffer, event);
1335 1333
1334 out_drop_count:
1336 __this_cpu_dec(user_stack_count); 1335 __this_cpu_dec(user_stack_count);
1337
1338 out: 1336 out:
1339 preempt_enable(); 1337 preempt_enable();
1340} 1338}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3094318bfea7..b335acb43be2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -141,11 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
141 else if (!dp->flags) 141 else if (!dp->flags)
142 dt->num_enabled++; 142 dt->num_enabled++;
143 dp->flags = newflags; 143 dp->flags = newflags;
144 if (newflags) { 144 if (newflags)
145 jump_label_enable(&dp->enabled); 145 dp->enabled = 1;
146 } else { 146 else
147 jump_label_disable(&dp->enabled); 147 dp->enabled = 0;
148 }
149 if (verbose) 148 if (verbose)
150 printk(KERN_INFO 149 printk(KERN_INFO
151 "ddebug: changed %s:%d [%s]%s %s\n", 150 "ddebug: changed %s:%d [%s]%s %s\n",
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1b9b13ee2a72..2b5387d53ba5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -227,7 +227,7 @@ ifndef PERF_DEBUG
227 CFLAGS_OPTIMIZE = -O6 227 CFLAGS_OPTIMIZE = -O6
228endif 228endif
229 229
230CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) 230CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
231EXTLIBS = -lpthread -lrt -lelf -lm 231EXTLIBS = -lpthread -lrt -lelf -lm
232ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 232ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
233ALL_LDFLAGS = $(LDFLAGS) 233ALL_LDFLAGS = $(LDFLAGS)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc049035484..7069bd3e90b3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -331,6 +331,9 @@ try_again:
331 else if (err == ENODEV && cpu_list) { 331 else if (err == ENODEV && cpu_list) {
332 die("No such device - did you specify" 332 die("No such device - did you specify"
333 " an out-of-range profile CPU?\n"); 333 " an out-of-range profile CPU?\n");
334 } else if (err == ENOENT) {
335 die("%s event is not supported. ",
336 event_name(evsel));
334 } else if (err == EINVAL && sample_id_all_avail) { 337 } else if (err == EINVAL && sample_id_all_avail) {
335 /* 338 /*
336 * Old kernel, no attr->sample_id_type_all field 339 * Old kernel, no attr->sample_id_type_all field
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7a4ebeb8b016..abd4b8497bc4 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -489,7 +489,8 @@ static void create_tasks(void)
489 489
490 err = pthread_attr_init(&attr); 490 err = pthread_attr_init(&attr);
491 BUG_ON(err); 491 BUG_ON(err);
492 err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); 492 err = pthread_attr_setstacksize(&attr,
493 (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
493 BUG_ON(err); 494 BUG_ON(err);
494 err = pthread_mutex_lock(&start_work_mutex); 495 err = pthread_mutex_lock(&start_work_mutex);
495 BUG_ON(err); 496 BUG_ON(err);
@@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv)
1861 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1862 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1862 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1863 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1863 1864
1864 if (rec_argv) 1865 if (rec_argv == NULL)
1865 return -ENOMEM; 1866 return -ENOMEM;
1866 1867
1867 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1868 for (i = 0; i < ARRAY_SIZE(record_args); i++)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 02b2d8013a61..c385a63ebfd1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv)
316 "\t Consider tweaking" 316 "\t Consider tweaking"
317 " /proc/sys/kernel/perf_event_paranoid or running as root.", 317 " /proc/sys/kernel/perf_event_paranoid or running as root.",
318 system_wide ? "system-wide " : ""); 318 system_wide ? "system-wide " : "");
319 } else if (errno == ENOENT) {
320 error("%s event is not supported. ", event_name(counter));
319 } else { 321 } else {
320 error("open_counter returned with %d (%s). " 322 error("open_counter returned with %d (%s). "
321 "/bin/dmesg may provide additional information.\n", 323 "/bin/dmesg may provide additional information.\n",
@@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
683 nr_counters = ARRAY_SIZE(default_attrs); 685 nr_counters = ARRAY_SIZE(default_attrs);
684 686
685 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { 687 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
686 pos = perf_evsel__new(default_attrs[c].type, 688 pos = perf_evsel__new(&default_attrs[c],
687 default_attrs[c].config,
688 nr_counters); 689 nr_counters);
689 if (pos == NULL) 690 if (pos == NULL)
690 goto out; 691 goto out;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1c984342a579..ed5696198d3d 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -234,6 +234,7 @@ out:
234 return err; 234 return err;
235} 235}
236 236
237#include "util/cpumap.h"
237#include "util/evsel.h" 238#include "util/evsel.h"
238#include <sys/types.h> 239#include <sys/types.h>
239 240
@@ -264,6 +265,7 @@ static int test__open_syscall_event(void)
264 int err = -1, fd; 265 int err = -1, fd;
265 struct thread_map *threads; 266 struct thread_map *threads;
266 struct perf_evsel *evsel; 267 struct perf_evsel *evsel;
268 struct perf_event_attr attr;
267 unsigned int nr_open_calls = 111, i; 269 unsigned int nr_open_calls = 111, i;
268 int id = trace_event__id("sys_enter_open"); 270 int id = trace_event__id("sys_enter_open");
269 271
@@ -278,7 +280,10 @@ static int test__open_syscall_event(void)
278 return -1; 280 return -1;
279 } 281 }
280 282
281 evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0); 283 memset(&attr, 0, sizeof(attr));
284 attr.type = PERF_TYPE_TRACEPOINT;
285 attr.config = id;
286 evsel = perf_evsel__new(&attr, 0);
282 if (evsel == NULL) { 287 if (evsel == NULL) {
283 pr_debug("perf_evsel__new\n"); 288 pr_debug("perf_evsel__new\n");
284 goto out_thread_map_delete; 289 goto out_thread_map_delete;
@@ -317,6 +322,111 @@ out_thread_map_delete:
317 return err; 322 return err;
318} 323}
319 324
325#include <sched.h>
326
327static int test__open_syscall_event_on_all_cpus(void)
328{
329 int err = -1, fd, cpu;
330 struct thread_map *threads;
331 struct cpu_map *cpus;
332 struct perf_evsel *evsel;
333 struct perf_event_attr attr;
334 unsigned int nr_open_calls = 111, i;
335 cpu_set_t *cpu_set;
336 size_t cpu_set_size;
337 int id = trace_event__id("sys_enter_open");
338
339 if (id < 0) {
340 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
341 return -1;
342 }
343
344 threads = thread_map__new(-1, getpid());
345 if (threads == NULL) {
346 pr_debug("thread_map__new\n");
347 return -1;
348 }
349
350 cpus = cpu_map__new(NULL);
351 if (threads == NULL) {
352 pr_debug("thread_map__new\n");
353 return -1;
354 }
355
356 cpu_set = CPU_ALLOC(cpus->nr);
357
358 if (cpu_set == NULL)
359 goto out_thread_map_delete;
360
361 cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
362 CPU_ZERO_S(cpu_set_size, cpu_set);
363
364 memset(&attr, 0, sizeof(attr));
365 attr.type = PERF_TYPE_TRACEPOINT;
366 attr.config = id;
367 evsel = perf_evsel__new(&attr, 0);
368 if (evsel == NULL) {
369 pr_debug("perf_evsel__new\n");
370 goto out_cpu_free;
371 }
372
373 if (perf_evsel__open(evsel, cpus, threads) < 0) {
374 pr_debug("failed to open counter: %s, "
375 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
376 strerror(errno));
377 goto out_evsel_delete;
378 }
379
380 for (cpu = 0; cpu < cpus->nr; ++cpu) {
381 unsigned int ncalls = nr_open_calls + cpu;
382
383 CPU_SET(cpu, cpu_set);
384 sched_setaffinity(0, cpu_set_size, cpu_set);
385 for (i = 0; i < ncalls; ++i) {
386 fd = open("/etc/passwd", O_RDONLY);
387 close(fd);
388 }
389 CPU_CLR(cpu, cpu_set);
390 }
391
392 /*
393 * Here we need to explicitely preallocate the counts, as if
394 * we use the auto allocation it will allocate just for 1 cpu,
395 * as we start by cpu 0.
396 */
397 if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
398 pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
399 goto out_close_fd;
400 }
401
402 for (cpu = 0; cpu < cpus->nr; ++cpu) {
403 unsigned int expected;
404
405 if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
406 pr_debug("perf_evsel__open_read_on_cpu\n");
407 goto out_close_fd;
408 }
409
410 expected = nr_open_calls + cpu;
411 if (evsel->counts->cpu[cpu].val != expected) {
412 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
413 expected, cpu, evsel->counts->cpu[cpu].val);
414 goto out_close_fd;
415 }
416 }
417
418 err = 0;
419out_close_fd:
420 perf_evsel__close_fd(evsel, 1, threads->nr);
421out_evsel_delete:
422 perf_evsel__delete(evsel);
423out_cpu_free:
424 CPU_FREE(cpu_set);
425out_thread_map_delete:
426 thread_map__delete(threads);
427 return err;
428}
429
320static struct test { 430static struct test {
321 const char *desc; 431 const char *desc;
322 int (*func)(void); 432 int (*func)(void);
@@ -330,6 +440,10 @@ static struct test {
330 .func = test__open_syscall_event, 440 .func = test__open_syscall_event,
331 }, 441 },
332 { 442 {
443 .desc = "detect open syscall event on all cpus",
444 .func = test__open_syscall_event_on_all_cpus,
445 },
446 {
333 .func = NULL, 447 .func = NULL,
334 }, 448 },
335}; 449};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1e67ab9c7ebc..6ce4042421bd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1247,6 +1247,8 @@ try_again:
1247 die("Permission error - are you root?\n" 1247 die("Permission error - are you root?\n"
1248 "\t Consider tweaking" 1248 "\t Consider tweaking"
1249 " /proc/sys/kernel/perf_event_paranoid.\n"); 1249 " /proc/sys/kernel/perf_event_paranoid.\n");
1250 if (err == ENOENT)
1251 die("%s event is not supported. ", event_name(evsel));
1250 /* 1252 /*
1251 * If it's cycles then fall back to hrtimer 1253 * If it's cycles then fall back to hrtimer
1252 * based cpu-clock-tick sw counter, which 1254 * based cpu-clock-tick sw counter, which
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c95267e63c5b..f5cfed60af98 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -6,14 +6,13 @@
6 6
7#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 7#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
8 8
9struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) 9struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
10{ 10{
11 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 11 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
12 12
13 if (evsel != NULL) { 13 if (evsel != NULL) {
14 evsel->idx = idx; 14 evsel->idx = idx;
15 evsel->attr.type = type; 15 evsel->attr = *attr;
16 evsel->attr.config = config;
17 INIT_LIST_HEAD(&evsel->node); 16 INIT_LIST_HEAD(&evsel->node);
18 } 17 }
19 18
@@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel,
128 return 0; 127 return 0;
129} 128}
130 129
131int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 130static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
131 struct thread_map *threads)
132{ 132{
133 int cpu; 133 int cpu, thread;
134 134
135 if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0) 135 if (evsel->fd == NULL &&
136 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
136 return -1; 137 return -1;
137 138
138 for (cpu = 0; cpu < cpus->nr; cpu++) { 139 for (cpu = 0; cpu < cpus->nr; cpu++) {
139 FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1, 140 for (thread = 0; thread < threads->nr; thread++) {
140 cpus->map[cpu], -1, 0); 141 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
141 if (FD(evsel, cpu, 0) < 0) 142 threads->map[thread],
142 goto out_close; 143 cpus->map[cpu], -1, 0);
144 if (FD(evsel, cpu, thread) < 0)
145 goto out_close;
146 }
143 } 147 }
144 148
145 return 0; 149 return 0;
146 150
147out_close: 151out_close:
148 while (--cpu >= 0) { 152 do {
149 close(FD(evsel, cpu, 0)); 153 while (--thread >= 0) {
150 FD(evsel, cpu, 0) = -1; 154 close(FD(evsel, cpu, thread));
151 } 155 FD(evsel, cpu, thread) = -1;
156 }
157 thread = threads->nr;
158 } while (--cpu >= 0);
152 return -1; 159 return -1;
153} 160}
154 161
155int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 162static struct {
163 struct cpu_map map;
164 int cpus[1];
165} empty_cpu_map = {
166 .map.nr = 1,
167 .cpus = { -1, },
168};
169
170static struct {
171 struct thread_map map;
172 int threads[1];
173} empty_thread_map = {
174 .map.nr = 1,
175 .threads = { -1, },
176};
177
178int perf_evsel__open(struct perf_evsel *evsel,
179 struct cpu_map *cpus, struct thread_map *threads)
156{ 180{
157 int thread;
158
159 if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
160 return -1;
161 181
162 for (thread = 0; thread < threads->nr; thread++) { 182 if (cpus == NULL) {
163 FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr, 183 /* Work around old compiler warnings about strict aliasing */
164 threads->map[thread], -1, -1, 0); 184 cpus = &empty_cpu_map.map;
165 if (FD(evsel, 0, thread) < 0)
166 goto out_close;
167 } 185 }
168 186
169 return 0; 187 if (threads == NULL)
188 threads = &empty_thread_map.map;
170 189
171out_close: 190 return __perf_evsel__open(evsel, cpus, threads);
172 while (--thread >= 0) {
173 close(FD(evsel, 0, thread));
174 FD(evsel, 0, thread) = -1;
175 }
176 return -1;
177} 191}
178 192
179int perf_evsel__open(struct perf_evsel *evsel, 193int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
180 struct cpu_map *cpus, struct thread_map *threads)
181{ 194{
182 if (threads == NULL) 195 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
183 return perf_evsel__open_per_cpu(evsel, cpus); 196}
184 197
185 return perf_evsel__open_per_thread(evsel, threads); 198int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
199{
200 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
186} 201}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a0ccd69c3fc2..b2d755fe88a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -37,7 +37,7 @@ struct perf_evsel {
37struct cpu_map; 37struct cpu_map;
38struct thread_map; 38struct thread_map;
39 39
40struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); 40struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
41void perf_evsel__delete(struct perf_evsel *evsel); 41void perf_evsel__delete(struct perf_evsel *evsel);
42 42
43int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 43int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 649083f27e08..5cb6f4bde905 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
490 return EVT_HANDLED_ALL; 490 return EVT_HANDLED_ALL;
491} 491}
492 492
493static int store_event_type(const char *orgname)
494{
495 char filename[PATH_MAX], *c;
496 FILE *file;
497 int id, n;
498
499 sprintf(filename, "%s/", debugfs_path);
500 strncat(filename, orgname, strlen(orgname));
501 strcat(filename, "/id");
502
503 c = strchr(filename, ':');
504 if (c)
505 *c = '/';
506
507 file = fopen(filename, "r");
508 if (!file)
509 return 0;
510 n = fscanf(file, "%i", &id);
511 fclose(file);
512 if (n < 1) {
513 pr_err("cannot store event ID\n");
514 return -EINVAL;
515 }
516 return perf_header__push_event(id, orgname);
517}
493 518
494static enum event_result parse_tracepoint_event(const char **strp, 519static enum event_result parse_tracepoint_event(const char **strp,
495 struct perf_event_attr *attr) 520 struct perf_event_attr *attr)
@@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp,
533 *strp += strlen(sys_name) + evt_length; 558 *strp += strlen(sys_name) + evt_length;
534 return parse_multiple_tracepoint_event(sys_name, evt_name, 559 return parse_multiple_tracepoint_event(sys_name, evt_name,
535 flags); 560 flags);
536 } else 561 } else {
562 if (store_event_type(evt_name) < 0)
563 return EVT_FAILED;
564
537 return parse_single_tracepoint_event(sys_name, evt_name, 565 return parse_single_tracepoint_event(sys_name, evt_name,
538 evt_length, attr, strp); 566 evt_length, attr, strp);
567 }
539} 568}
540 569
541static enum event_result 570static enum event_result
@@ -778,41 +807,11 @@ modifier:
778 return ret; 807 return ret;
779} 808}
780 809
781static int store_event_type(const char *orgname)
782{
783 char filename[PATH_MAX], *c;
784 FILE *file;
785 int id, n;
786
787 sprintf(filename, "%s/", debugfs_path);
788 strncat(filename, orgname, strlen(orgname));
789 strcat(filename, "/id");
790
791 c = strchr(filename, ':');
792 if (c)
793 *c = '/';
794
795 file = fopen(filename, "r");
796 if (!file)
797 return 0;
798 n = fscanf(file, "%i", &id);
799 fclose(file);
800 if (n < 1) {
801 pr_err("cannot store event ID\n");
802 return -EINVAL;
803 }
804 return perf_header__push_event(id, orgname);
805}
806
807int parse_events(const struct option *opt __used, const char *str, int unset __used) 810int parse_events(const struct option *opt __used, const char *str, int unset __used)
808{ 811{
809 struct perf_event_attr attr; 812 struct perf_event_attr attr;
810 enum event_result ret; 813 enum event_result ret;
811 814
812 if (strchr(str, ':'))
813 if (store_event_type(str) < 0)
814 return -1;
815
816 for (;;) { 815 for (;;) {
817 memset(&attr, 0, sizeof(attr)); 816 memset(&attr, 0, sizeof(attr));
818 ret = parse_event_symbols(&str, &attr); 817 ret = parse_event_symbols(&str, &attr);
@@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
824 823
825 if (ret != EVT_HANDLED_ALL) { 824 if (ret != EVT_HANDLED_ALL) {
826 struct perf_evsel *evsel; 825 struct perf_evsel *evsel;
827 evsel = perf_evsel__new(attr.type, attr.config, 826 evsel = perf_evsel__new(&attr,
828 nr_counters); 827 nr_counters);
829 if (evsel == NULL) 828 if (evsel == NULL)
830 return -1; 829 return -1;
@@ -1014,8 +1013,15 @@ void print_events(void)
1014 1013
1015int perf_evsel_list__create_default(void) 1014int perf_evsel_list__create_default(void)
1016{ 1015{
1017 struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE, 1016 struct perf_evsel *evsel;
1018 PERF_COUNT_HW_CPU_CYCLES, 0); 1017 struct perf_event_attr attr;
1018
1019 memset(&attr, 0, sizeof(attr));
1020 attr.type = PERF_TYPE_HARDWARE;
1021 attr.config = PERF_COUNT_HW_CPU_CYCLES;
1022
1023 evsel = perf_evsel__new(&attr, 0);
1024
1019 if (evsel == NULL) 1025 if (evsel == NULL)
1020 return -ENOMEM; 1026 return -ENOMEM;
1021 1027
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6fb4694d05fa..313dac2d94ce 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,7 +1007,7 @@ more:
1007 if (size == 0) 1007 if (size == 0)
1008 size = 8; 1008 size = 8;
1009 1009
1010 if (head + event->header.size >= mmap_size) { 1010 if (head + event->header.size > mmap_size) {
1011 if (mmaps[map_idx]) { 1011 if (mmaps[map_idx]) {
1012 munmap(mmaps[map_idx], mmap_size); 1012 munmap(mmaps[map_idx], mmap_size);
1013 mmaps[map_idx] = NULL; 1013 mmaps[map_idx] = NULL;