diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-03 04:39:53 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-08 09:47:15 -0500 |
commit | 241771ef016b5c0c83cd7a4372a74321c973c1e6 (patch) | |
tree | 5893d72f1721af34daee82f27449bd35c9f65363 | |
parent | e7bc62b6b3aeaa8849f8383e0cfb7ca6c003adc6 (diff) |
performance counters: x86 support
Implement performance counters for x86 Intel CPUs.
It's simplified right now: the PERFMON CPU feature is assumed,
which is available in Core2 and later Intel CPUs.
The design is flexible to be extended to more CPU types as well.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq_32.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/hw_irq.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/intel_arch_perfmon.h | 34 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/mach-default/entry_arch.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pda.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd_32.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd_64.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/apic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 571 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 5 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit_64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/syscall_table_32.S | 1 |
21 files changed, 652 insertions, 21 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4d4cb7629ea..f2fdc1867241 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -643,6 +643,7 @@ config X86_UP_IOAPIC | |||
643 | config X86_LOCAL_APIC | 643 | config X86_LOCAL_APIC |
644 | def_bool y | 644 | def_bool y |
645 | depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) | 645 | depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) |
646 | select HAVE_PERF_COUNTERS | ||
646 | 647 | ||
647 | config X86_IO_APIC | 648 | config X86_IO_APIC |
648 | def_bool y | 649 | def_bool y |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b61892..3c14ed07dc4e 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -823,7 +823,8 @@ ia32_sys_call_table: | |||
823 | .quad compat_sys_signalfd4 | 823 | .quad compat_sys_signalfd4 |
824 | .quad sys_eventfd2 | 824 | .quad sys_eventfd2 |
825 | .quad sys_epoll_create1 | 825 | .quad sys_epoll_create1 |
826 | .quad sys_dup3 /* 330 */ | 826 | .quad sys_dup3 /* 330 */ |
827 | .quad sys_pipe2 | 827 | .quad sys_pipe2 |
828 | .quad sys_inotify_init1 | 828 | .quad sys_inotify_init1 |
829 | .quad sys_perf_counter_open | ||
829 | ia32_syscall_end: | 830 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2b..b3e475dc9338 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h | |||
@@ -9,6 +9,7 @@ typedef struct { | |||
9 | unsigned long idle_timestamp; | 9 | unsigned long idle_timestamp; |
10 | unsigned int __nmi_count; /* arch dependent */ | 10 | unsigned int __nmi_count; /* arch dependent */ |
11 | unsigned int apic_timer_irqs; /* arch dependent */ | 11 | unsigned int apic_timer_irqs; /* arch dependent */ |
12 | unsigned int apic_perf_irqs; /* arch dependent */ | ||
12 | unsigned int irq0_irqs; | 13 | unsigned int irq0_irqs; |
13 | unsigned int irq_resched_count; | 14 | unsigned int irq_resched_count; |
14 | unsigned int irq_call_count; | 15 | unsigned int irq_call_count; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8de644b6b959..aa93e53b85ee 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -30,6 +30,8 @@ | |||
30 | /* Interrupt handlers registered during init_IRQ */ | 30 | /* Interrupt handlers registered during init_IRQ */ |
31 | extern void apic_timer_interrupt(void); | 31 | extern void apic_timer_interrupt(void); |
32 | extern void error_interrupt(void); | 32 | extern void error_interrupt(void); |
33 | extern void perf_counter_interrupt(void); | ||
34 | |||
33 | extern void spurious_interrupt(void); | 35 | extern void spurious_interrupt(void); |
34 | extern void thermal_interrupt(void); | 36 | extern void thermal_interrupt(void); |
35 | extern void reschedule_interrupt(void); | 37 | extern void reschedule_interrupt(void); |
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h index fa0fd068bc2e..71598a9eab61 100644 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ b/arch/x86/include/asm/intel_arch_perfmon.h | |||
@@ -1,22 +1,24 @@ | |||
1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H | 1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H |
2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H | 2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H |
3 | 3 | ||
4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | 4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 |
5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | 5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 |
6 | 6 | ||
7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
9 | 9 | ||
10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | 10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) |
11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) |
12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) |
13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) |
14 | 14 | ||
15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) | 15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) | 17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 |
18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | 18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ |
19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
20 | |||
21 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | ||
20 | 22 | ||
21 | union cpuid10_eax { | 23 | union cpuid10_eax { |
22 | struct { | 24 | struct { |
@@ -28,4 +30,12 @@ union cpuid10_eax { | |||
28 | unsigned int full; | 30 | unsigned int full; |
29 | }; | 31 | }; |
30 | 32 | ||
33 | #ifdef CONFIG_PERF_COUNTERS | ||
34 | extern void init_hw_perf_counters(void); | ||
35 | extern void perf_counters_lapic_init(int nmi); | ||
36 | #else | ||
37 | static inline void init_hw_perf_counters(void) { } | ||
38 | static inline void perf_counters_lapic_init(int nmi) { } | ||
39 | #endif | ||
40 | |||
31 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ | 41 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..b8d277f1252f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -87,6 +87,11 @@ | |||
87 | #define LOCAL_TIMER_VECTOR 0xef | 87 | #define LOCAL_TIMER_VECTOR 0xef |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * Performance monitoring interrupt vector: | ||
91 | */ | ||
92 | #define LOCAL_PERF_VECTOR 0xee | ||
93 | |||
94 | /* | ||
90 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 95 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
91 | * start at 0x31(0x41) to spread out vectors evenly between priority | 96 | * start at 0x31(0x41) to spread out vectors evenly between priority |
92 | * levels. (0x80 is the syscall vector) | 97 | * levels. (0x80 is the syscall vector) |
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31dd..ad31e5d90e90 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h | |||
@@ -25,10 +25,15 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | |||
25 | * a much simpler SMP time architecture: | 25 | * a much simpler SMP time architecture: |
26 | */ | 26 | */ |
27 | #ifdef CONFIG_X86_LOCAL_APIC | 27 | #ifdef CONFIG_X86_LOCAL_APIC |
28 | |||
28 | BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) | 29 | BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) |
29 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) | 30 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) |
30 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | 31 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) |
31 | 32 | ||
33 | #ifdef CONFIG_PERF_COUNTERS | ||
34 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) | ||
35 | #endif | ||
36 | |||
32 | #ifdef CONFIG_X86_MCE_P4THERMAL | 37 | #ifdef CONFIG_X86_MCE_P4THERMAL |
33 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | 38 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) |
34 | #endif | 39 | #endif |
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff88df37..90a8d9d4206b 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h | |||
@@ -30,6 +30,7 @@ struct x8664_pda { | |||
30 | short isidle; | 30 | short isidle; |
31 | struct mm_struct *active_mm; | 31 | struct mm_struct *active_mm; |
32 | unsigned apic_timer_irqs; | 32 | unsigned apic_timer_irqs; |
33 | unsigned apic_perf_irqs; | ||
33 | unsigned irq0_irqs; | 34 | unsigned irq0_irqs; |
34 | unsigned irq_resched_count; | 35 | unsigned irq_resched_count; |
35 | unsigned irq_call_count; | 36 | unsigned irq_call_count; |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..810bf266d134 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -80,6 +80,7 @@ struct thread_info { | |||
80 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 80 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
81 | #define TIF_SECCOMP 8 /* secure computing */ | 81 | #define TIF_SECCOMP 8 /* secure computing */ |
82 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | 82 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ |
83 | #define TIF_PERF_COUNTERS 11 /* notify perf counter work */ | ||
83 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 84 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
84 | #define TIF_IA32 17 /* 32bit process */ | 85 | #define TIF_IA32 17 /* 32bit process */ |
85 | #define TIF_FORK 18 /* ret_from_fork */ | 86 | #define TIF_FORK 18 /* ret_from_fork */ |
@@ -103,6 +104,7 @@ struct thread_info { | |||
103 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 104 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
104 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 105 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
105 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | 106 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) |
107 | #define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) | ||
106 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 108 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
107 | #define _TIF_IA32 (1 << TIF_IA32) | 109 | #define _TIF_IA32 (1 << TIF_IA32) |
108 | #define _TIF_FORK (1 << TIF_FORK) | 110 | #define _TIF_FORK (1 << TIF_FORK) |
@@ -135,7 +137,7 @@ struct thread_info { | |||
135 | 137 | ||
136 | /* Only used for 64 bit */ | 138 | /* Only used for 64 bit */ |
137 | #define _TIF_DO_NOTIFY_MASK \ | 139 | #define _TIF_DO_NOTIFY_MASK \ |
138 | (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) | 140 | (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) |
139 | 141 | ||
140 | /* flags to check in __switch_to() */ | 142 | /* flags to check in __switch_to() */ |
141 | #define _TIF_WORK_CTXSW \ | 143 | #define _TIF_WORK_CTXSW \ |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index f2bba78430a4..7e47658b0a6f 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -338,6 +338,7 @@ | |||
338 | #define __NR_dup3 330 | 338 | #define __NR_dup3 330 |
339 | #define __NR_pipe2 331 | 339 | #define __NR_pipe2 331 |
340 | #define __NR_inotify_init1 332 | 340 | #define __NR_inotify_init1 332 |
341 | #define __NR_perf_counter_open 333 | ||
341 | 342 | ||
342 | #ifdef __KERNEL__ | 343 | #ifdef __KERNEL__ |
343 | 344 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index d2e415e6666f..53025feaf88d 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -653,7 +653,8 @@ __SYSCALL(__NR_dup3, sys_dup3) | |||
653 | __SYSCALL(__NR_pipe2, sys_pipe2) | 653 | __SYSCALL(__NR_pipe2, sys_pipe2) |
654 | #define __NR_inotify_init1 294 | 654 | #define __NR_inotify_init1 294 |
655 | __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | 655 | __SYSCALL(__NR_inotify_init1, sys_inotify_init1) |
656 | 656 | #define __NR_perf_counter_open 295 | |
657 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | ||
657 | 658 | ||
658 | #ifndef __NO_STUBS | 659 | #ifndef __NO_STUBS |
659 | #define __ARCH_WANT_OLD_READDIR | 660 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..8ab8c1858672 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
32 | #include <linux/dmar.h> | 32 | #include <linux/dmar.h> |
33 | 33 | ||
34 | #include <asm/intel_arch_perfmon.h> | ||
34 | #include <asm/atomic.h> | 35 | #include <asm/atomic.h> |
35 | #include <asm/smp.h> | 36 | #include <asm/smp.h> |
36 | #include <asm/mtrr.h> | 37 | #include <asm/mtrr.h> |
@@ -1147,6 +1148,7 @@ void __cpuinit setup_local_APIC(void) | |||
1147 | apic_write(APIC_ESR, 0); | 1148 | apic_write(APIC_ESR, 0); |
1148 | } | 1149 | } |
1149 | #endif | 1150 | #endif |
1151 | perf_counters_lapic_init(0); | ||
1150 | 1152 | ||
1151 | preempt_disable(); | 1153 | preempt_disable(); |
1152 | 1154 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec6075c057..89e53361fe24 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
@@ -16,11 +16,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o | |||
16 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 16 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
17 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 17 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_MCE) += mcheck/ | 19 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
20 | obj-$(CONFIG_MTRR) += mtrr/ | ||
21 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
22 | 20 | ||
23 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 21 | obj-$(CONFIG_X86_MCE) += mcheck/ |
22 | obj-$(CONFIG_MTRR) += mtrr/ | ||
23 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
24 | |||
25 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
24 | 26 | ||
25 | quiet_cmd_mkcapflags = MKCAP $@ | 27 | quiet_cmd_mkcapflags = MKCAP $@ |
26 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 28 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0217a9..4461011db47c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
18 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
19 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
20 | #include <asm/intel_arch_perfmon.h> | ||
20 | #include <asm/pat.h> | 21 | #include <asm/pat.h> |
21 | #include <asm/asm.h> | 22 | #include <asm/asm.h> |
22 | #include <asm/numa.h> | 23 | #include <asm/numa.h> |
@@ -750,6 +751,7 @@ void __init identify_boot_cpu(void) | |||
750 | #else | 751 | #else |
751 | vgetcpu_set_mode(); | 752 | vgetcpu_set_mode(); |
752 | #endif | 753 | #endif |
754 | init_hw_perf_counters(); | ||
753 | } | 755 | } |
754 | 756 | ||
755 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 757 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 000000000000..82440cbed0e6 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -0,0 +1,571 @@ | |||
1 | /* | ||
2 | * Performance counter x86 architecture code | ||
3 | * | ||
4 | * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #include <linux/perf_counter.h> | ||
11 | #include <linux/capability.h> | ||
12 | #include <linux/notifier.h> | ||
13 | #include <linux/hardirq.h> | ||
14 | #include <linux/kprobes.h> | ||
15 | #include <linux/kdebug.h> | ||
16 | #include <linux/sched.h> | ||
17 | |||
18 | #include <asm/intel_arch_perfmon.h> | ||
19 | #include <asm/apic.h> | ||
20 | |||
21 | static bool perf_counters_initialized __read_mostly; | ||
22 | |||
23 | /* | ||
24 | * Number of (generic) HW counters: | ||
25 | */ | ||
26 | static int nr_hw_counters __read_mostly; | ||
27 | static u32 perf_counter_mask __read_mostly; | ||
28 | |||
29 | /* No support for fixed function counters yet */ | ||
30 | |||
31 | #define MAX_HW_COUNTERS 8 | ||
32 | |||
33 | struct cpu_hw_counters { | ||
34 | struct perf_counter *counters[MAX_HW_COUNTERS]; | ||
35 | unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; | ||
36 | int enable_all; | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Intel PerfMon v3. Used on Core2 and later. | ||
41 | */ | ||
42 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | ||
43 | |||
44 | const int intel_perfmon_event_map[] = | ||
45 | { | ||
46 | [PERF_COUNT_CYCLES] = 0x003c, | ||
47 | [PERF_COUNT_INSTRUCTIONS] = 0x00c0, | ||
48 | [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, | ||
49 | [PERF_COUNT_CACHE_MISSES] = 0x412e, | ||
50 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
51 | [PERF_COUNT_BRANCH_MISSES] = 0x00c5, | ||
52 | }; | ||
53 | |||
54 | const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); | ||
55 | |||
56 | /* | ||
57 | * Setup the hardware configuration for a given hw_event_type | ||
58 | */ | ||
59 | int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) | ||
60 | { | ||
61 | struct hw_perf_counter *hwc = &counter->hw; | ||
62 | |||
63 | if (unlikely(!perf_counters_initialized)) | ||
64 | return -EINVAL; | ||
65 | |||
66 | /* | ||
67 | * Count user events, and generate PMC IRQs: | ||
68 | * (keep 'enabled' bit clear for now) | ||
69 | */ | ||
70 | hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; | ||
71 | |||
72 | /* | ||
73 | * If privileged enough, count OS events too, and allow | ||
74 | * NMI events as well: | ||
75 | */ | ||
76 | hwc->nmi = 0; | ||
77 | if (capable(CAP_SYS_ADMIN)) { | ||
78 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
79 | if (hw_event_type & PERF_COUNT_NMI) | ||
80 | hwc->nmi = 1; | ||
81 | } | ||
82 | |||
83 | hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; | ||
84 | hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; | ||
85 | |||
86 | hwc->irq_period = counter->__irq_period; | ||
87 | /* | ||
88 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
89 | * so we install an artificial 1<<31 period regardless of | ||
90 | * the generic counter period: | ||
91 | */ | ||
92 | if (!hwc->irq_period) | ||
93 | hwc->irq_period = 0x7FFFFFFF; | ||
94 | |||
95 | hwc->next_count = -((s32) hwc->irq_period); | ||
96 | |||
97 | /* | ||
98 | * Negative event types mean raw encoded event+umask values: | ||
99 | */ | ||
100 | if (hw_event_type < 0) { | ||
101 | counter->hw_event_type = -hw_event_type; | ||
102 | counter->hw_event_type &= ~PERF_COUNT_NMI; | ||
103 | } else { | ||
104 | hw_event_type &= ~PERF_COUNT_NMI; | ||
105 | if (hw_event_type >= max_intel_perfmon_events) | ||
106 | return -EINVAL; | ||
107 | /* | ||
108 | * The generic map: | ||
109 | */ | ||
110 | counter->hw_event_type = intel_perfmon_event_map[hw_event_type]; | ||
111 | } | ||
112 | hwc->config |= counter->hw_event_type; | ||
113 | counter->wakeup_pending = 0; | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | static void __hw_perf_enable_all(void) | ||
119 | { | ||
120 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); | ||
121 | } | ||
122 | |||
123 | void hw_perf_enable_all(void) | ||
124 | { | ||
125 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
126 | |||
127 | cpuc->enable_all = 1; | ||
128 | __hw_perf_enable_all(); | ||
129 | } | ||
130 | |||
131 | void hw_perf_disable_all(void) | ||
132 | { | ||
133 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
134 | |||
135 | cpuc->enable_all = 0; | ||
136 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); | ||
137 | } | ||
138 | |||
139 | static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); | ||
140 | |||
141 | static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) | ||
142 | { | ||
143 | per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; | ||
144 | |||
145 | wrmsr(hwc->counter_base + idx, hwc->next_count, 0); | ||
146 | wrmsr(hwc->config_base + idx, hwc->config, 0); | ||
147 | } | ||
148 | |||
149 | void hw_perf_counter_enable(struct perf_counter *counter) | ||
150 | { | ||
151 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
152 | struct hw_perf_counter *hwc = &counter->hw; | ||
153 | int idx = hwc->idx; | ||
154 | |||
155 | /* Try to get the previous counter again */ | ||
156 | if (test_and_set_bit(idx, cpuc->used)) { | ||
157 | idx = find_first_zero_bit(cpuc->used, nr_hw_counters); | ||
158 | set_bit(idx, cpuc->used); | ||
159 | hwc->idx = idx; | ||
160 | } | ||
161 | |||
162 | perf_counters_lapic_init(hwc->nmi); | ||
163 | |||
164 | wrmsr(hwc->config_base + idx, | ||
165 | hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); | ||
166 | |||
167 | cpuc->counters[idx] = counter; | ||
168 | counter->hw.config |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
169 | __hw_perf_counter_enable(hwc, idx); | ||
170 | } | ||
171 | |||
172 | #ifdef CONFIG_X86_64 | ||
173 | static inline void atomic64_counter_set(struct perf_counter *counter, u64 val) | ||
174 | { | ||
175 | atomic64_set(&counter->count, val); | ||
176 | } | ||
177 | |||
178 | static inline u64 atomic64_counter_read(struct perf_counter *counter) | ||
179 | { | ||
180 | return atomic64_read(&counter->count); | ||
181 | } | ||
182 | #else | ||
183 | /* | ||
184 | * Todo: add proper atomic64_t support to 32-bit x86: | ||
185 | */ | ||
186 | static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64) | ||
187 | { | ||
188 | u32 *val32 = (void *)&val64; | ||
189 | |||
190 | atomic_set(counter->count32 + 0, *(val32 + 0)); | ||
191 | atomic_set(counter->count32 + 1, *(val32 + 1)); | ||
192 | } | ||
193 | |||
194 | static inline u64 atomic64_counter_read(struct perf_counter *counter) | ||
195 | { | ||
196 | return atomic_read(counter->count32 + 0) | | ||
197 | (u64) atomic_read(counter->count32 + 1) << 32; | ||
198 | } | ||
199 | #endif | ||
200 | |||
201 | static void __hw_perf_save_counter(struct perf_counter *counter, | ||
202 | struct hw_perf_counter *hwc, int idx) | ||
203 | { | ||
204 | s64 raw = -1; | ||
205 | s64 delta; | ||
206 | int err; | ||
207 | |||
208 | /* | ||
209 | * Get the raw hw counter value: | ||
210 | */ | ||
211 | err = rdmsrl_safe(hwc->counter_base + idx, &raw); | ||
212 | WARN_ON_ONCE(err); | ||
213 | |||
214 | /* | ||
215 | * Rebase it to zero (it started counting at -irq_period), | ||
216 | * to see the delta since ->prev_count: | ||
217 | */ | ||
218 | delta = (s64)hwc->irq_period + (s64)(s32)raw; | ||
219 | |||
220 | atomic64_counter_set(counter, hwc->prev_count + delta); | ||
221 | |||
222 | /* | ||
223 | * Adjust the ->prev_count offset - if we went beyond | ||
224 | * irq_period of units, then we got an IRQ and the counter | ||
225 | * was set back to -irq_period: | ||
226 | */ | ||
227 | while (delta >= (s64)hwc->irq_period) { | ||
228 | hwc->prev_count += hwc->irq_period; | ||
229 | delta -= (s64)hwc->irq_period; | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Calculate the next raw counter value we'll write into | ||
234 | * the counter at the next sched-in time: | ||
235 | */ | ||
236 | delta -= (s64)hwc->irq_period; | ||
237 | |||
238 | hwc->next_count = (s32)delta; | ||
239 | } | ||
240 | |||
241 | void perf_counter_print_debug(void) | ||
242 | { | ||
243 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; | ||
244 | int cpu, err, idx; | ||
245 | |||
246 | local_irq_disable(); | ||
247 | |||
248 | cpu = smp_processor_id(); | ||
249 | |||
250 | err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_CTRL, &ctrl); | ||
251 | WARN_ON_ONCE(err); | ||
252 | |||
253 | err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_STATUS, &status); | ||
254 | WARN_ON_ONCE(err); | ||
255 | |||
256 | err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_OVF_CTRL, &overflow); | ||
257 | WARN_ON_ONCE(err); | ||
258 | |||
259 | printk(KERN_INFO "\n"); | ||
260 | printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
261 | printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); | ||
262 | printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
263 | |||
264 | for (idx = 0; idx < nr_hw_counters; idx++) { | ||
265 | err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl); | ||
266 | WARN_ON_ONCE(err); | ||
267 | |||
268 | err = rdmsrl_safe(MSR_ARCH_PERFMON_PERFCTR0 + idx, &pmc_count); | ||
269 | WARN_ON_ONCE(err); | ||
270 | |||
271 | next_count = per_cpu(prev_next_count[idx], cpu); | ||
272 | |||
273 | printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", | ||
274 | cpu, idx, pmc_ctrl); | ||
275 | printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", | ||
276 | cpu, idx, pmc_count); | ||
277 | printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", | ||
278 | cpu, idx, next_count); | ||
279 | } | ||
280 | local_irq_enable(); | ||
281 | } | ||
282 | |||
283 | void hw_perf_counter_disable(struct perf_counter *counter) | ||
284 | { | ||
285 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
286 | struct hw_perf_counter *hwc = &counter->hw; | ||
287 | unsigned int idx = hwc->idx; | ||
288 | |||
289 | counter->hw.config &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
290 | wrmsr(hwc->config_base + idx, hwc->config, 0); | ||
291 | |||
292 | clear_bit(idx, cpuc->used); | ||
293 | cpuc->counters[idx] = NULL; | ||
294 | __hw_perf_save_counter(counter, hwc, idx); | ||
295 | } | ||
296 | |||
297 | void hw_perf_counter_read(struct perf_counter *counter) | ||
298 | { | ||
299 | struct hw_perf_counter *hwc = &counter->hw; | ||
300 | unsigned long addr = hwc->counter_base + hwc->idx; | ||
301 | s64 offs, val = -1LL; | ||
302 | s32 val32; | ||
303 | int err; | ||
304 | |||
305 | /* Careful: NMI might modify the counter offset */ | ||
306 | do { | ||
307 | offs = hwc->prev_count; | ||
308 | err = rdmsrl_safe(addr, &val); | ||
309 | WARN_ON_ONCE(err); | ||
310 | } while (offs != hwc->prev_count); | ||
311 | |||
312 | val32 = (s32) val; | ||
313 | val = (s64)hwc->irq_period + (s64)val32; | ||
314 | atomic64_counter_set(counter, hwc->prev_count + val); | ||
315 | } | ||
316 | |||
317 | static void perf_store_irq_data(struct perf_counter *counter, u64 data) | ||
318 | { | ||
319 | struct perf_data *irqdata = counter->irqdata; | ||
320 | |||
321 | if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { | ||
322 | irqdata->overrun++; | ||
323 | } else { | ||
324 | u64 *p = (u64 *) &irqdata->data[irqdata->len]; | ||
325 | |||
326 | *p = data; | ||
327 | irqdata->len += sizeof(u64); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void perf_save_and_restart(struct perf_counter *counter) | ||
332 | { | ||
333 | struct hw_perf_counter *hwc = &counter->hw; | ||
334 | int idx = hwc->idx; | ||
335 | |||
336 | wrmsr(hwc->config_base + idx, | ||
337 | hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); | ||
338 | |||
339 | if (hwc->config & ARCH_PERFMON_EVENTSEL0_ENABLE) { | ||
340 | __hw_perf_save_counter(counter, hwc, idx); | ||
341 | __hw_perf_counter_enable(hwc, idx); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | static void | ||
346 | perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) | ||
347 | { | ||
348 | struct perf_counter_context *ctx = leader->ctx; | ||
349 | struct perf_counter *counter; | ||
350 | int bit; | ||
351 | |||
352 | list_for_each_entry(counter, &ctx->counters, list) { | ||
353 | if (counter->record_type != PERF_RECORD_SIMPLE || | ||
354 | counter == leader) | ||
355 | continue; | ||
356 | |||
357 | if (counter->active) { | ||
358 | /* | ||
359 | * When counter was not in the overflow mask, we have to | ||
360 | * read it from hardware. We read it as well, when it | ||
361 | * has not been read yet and clear the bit in the | ||
362 | * status mask. | ||
363 | */ | ||
364 | bit = counter->hw.idx; | ||
365 | if (!test_bit(bit, (unsigned long *) overflown) || | ||
366 | test_bit(bit, (unsigned long *) status)) { | ||
367 | clear_bit(bit, (unsigned long *) status); | ||
368 | perf_save_and_restart(counter); | ||
369 | } | ||
370 | } | ||
371 | perf_store_irq_data(leader, counter->hw_event_type); | ||
372 | perf_store_irq_data(leader, atomic64_counter_read(counter)); | ||
373 | } | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
378 | * rules apply: | ||
379 | */ | ||
380 | static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) | ||
381 | { | ||
382 | int bit, cpu = smp_processor_id(); | ||
383 | struct cpu_hw_counters *cpuc; | ||
384 | u64 ack, status; | ||
385 | |||
386 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
387 | if (!status) { | ||
388 | ack_APIC_irq(); | ||
389 | return; | ||
390 | } | ||
391 | |||
392 | /* Disable counters globally */ | ||
393 | wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); | ||
394 | ack_APIC_irq(); | ||
395 | |||
396 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
397 | |||
398 | again: | ||
399 | ack = status; | ||
400 | for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { | ||
401 | struct perf_counter *counter = cpuc->counters[bit]; | ||
402 | |||
403 | clear_bit(bit, (unsigned long *) &status); | ||
404 | if (!counter) | ||
405 | continue; | ||
406 | |||
407 | perf_save_and_restart(counter); | ||
408 | |||
409 | switch (counter->record_type) { | ||
410 | case PERF_RECORD_SIMPLE: | ||
411 | continue; | ||
412 | case PERF_RECORD_IRQ: | ||
413 | perf_store_irq_data(counter, instruction_pointer(regs)); | ||
414 | break; | ||
415 | case PERF_RECORD_GROUP: | ||
416 | perf_store_irq_data(counter, counter->hw_event_type); | ||
417 | perf_store_irq_data(counter, | ||
418 | atomic64_counter_read(counter)); | ||
419 | perf_handle_group(counter, &status, &ack); | ||
420 | break; | ||
421 | } | ||
422 | /* | ||
423 | * From NMI context we cannot call into the scheduler to | ||
424 | * do a task wakeup - but we mark these counters as | ||
425 | * wakeup_pending and initate a wakeup callback: | ||
426 | */ | ||
427 | if (nmi) { | ||
428 | counter->wakeup_pending = 1; | ||
429 | set_tsk_thread_flag(current, TIF_PERF_COUNTERS); | ||
430 | } else { | ||
431 | wake_up(&counter->waitq); | ||
432 | } | ||
433 | } | ||
434 | |||
435 | wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0); | ||
436 | |||
437 | /* | ||
438 | * Repeat if there is more work to be done: | ||
439 | */ | ||
440 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
441 | if (status) | ||
442 | goto again; | ||
443 | |||
444 | /* | ||
445 | * Do not reenable when global enable is off: | ||
446 | */ | ||
447 | if (cpuc->enable_all) | ||
448 | __hw_perf_enable_all(); | ||
449 | } | ||
450 | |||
451 | void smp_perf_counter_interrupt(struct pt_regs *regs) | ||
452 | { | ||
453 | irq_enter(); | ||
454 | #ifdef CONFIG_X86_64 | ||
455 | add_pda(apic_perf_irqs, 1); | ||
456 | #else | ||
457 | per_cpu(irq_stat, smp_processor_id()).apic_perf_irqs++; | ||
458 | #endif | ||
459 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
460 | __smp_perf_counter_interrupt(regs, 0); | ||
461 | |||
462 | irq_exit(); | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * This handler is triggered by NMI contexts: | ||
467 | */ | ||
468 | void perf_counter_notify(struct pt_regs *regs) | ||
469 | { | ||
470 | struct cpu_hw_counters *cpuc; | ||
471 | unsigned long flags; | ||
472 | int bit, cpu; | ||
473 | |||
474 | local_irq_save(flags); | ||
475 | cpu = smp_processor_id(); | ||
476 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
477 | |||
478 | for_each_bit(bit, cpuc->used, nr_hw_counters) { | ||
479 | struct perf_counter *counter = cpuc->counters[bit]; | ||
480 | |||
481 | if (!counter) | ||
482 | continue; | ||
483 | |||
484 | if (counter->wakeup_pending) { | ||
485 | counter->wakeup_pending = 0; | ||
486 | wake_up(&counter->waitq); | ||
487 | } | ||
488 | } | ||
489 | |||
490 | local_irq_restore(flags); | ||
491 | } | ||
492 | |||
493 | void __cpuinit perf_counters_lapic_init(int nmi) | ||
494 | { | ||
495 | u32 apic_val; | ||
496 | |||
497 | if (!perf_counters_initialized) | ||
498 | return; | ||
499 | /* | ||
500 | * Enable the performance counter vector in the APIC LVT: | ||
501 | */ | ||
502 | apic_val = apic_read(APIC_LVTERR); | ||
503 | |||
504 | apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED); | ||
505 | if (nmi) | ||
506 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
507 | else | ||
508 | apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); | ||
509 | apic_write(APIC_LVTERR, apic_val); | ||
510 | } | ||
511 | |||
512 | static int __kprobes | ||
513 | perf_counter_nmi_handler(struct notifier_block *self, | ||
514 | unsigned long cmd, void *__args) | ||
515 | { | ||
516 | struct die_args *args = __args; | ||
517 | struct pt_regs *regs; | ||
518 | |||
519 | if (likely(cmd != DIE_NMI_IPI)) | ||
520 | return NOTIFY_DONE; | ||
521 | |||
522 | regs = args->regs; | ||
523 | |||
524 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
525 | __smp_perf_counter_interrupt(regs, 1); | ||
526 | |||
527 | return NOTIFY_STOP; | ||
528 | } | ||
529 | |||
530 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | ||
531 | .notifier_call = perf_counter_nmi_handler | ||
532 | }; | ||
533 | |||
534 | void __init init_hw_perf_counters(void) | ||
535 | { | ||
536 | union cpuid10_eax eax; | ||
537 | unsigned int unused; | ||
538 | unsigned int ebx; | ||
539 | |||
540 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
541 | return; | ||
542 | |||
543 | /* | ||
544 | * Check whether the Architectural PerfMon supports | ||
545 | * Branch Misses Retired Event or not. | ||
546 | */ | ||
547 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
548 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
549 | return; | ||
550 | |||
551 | printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); | ||
552 | |||
553 | printk(KERN_INFO "... version: %d\n", eax.split.version_id); | ||
554 | printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); | ||
555 | nr_hw_counters = eax.split.num_counters; | ||
556 | if (nr_hw_counters > MAX_HW_COUNTERS) { | ||
557 | nr_hw_counters = MAX_HW_COUNTERS; | ||
558 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | ||
559 | nr_hw_counters, MAX_HW_COUNTERS); | ||
560 | } | ||
561 | perf_counter_mask = (1 << nr_hw_counters) - 1; | ||
562 | perf_max_counters = nr_hw_counters; | ||
563 | |||
564 | printk(KERN_INFO "... bit_width: %d\n", eax.split.bit_width); | ||
565 | printk(KERN_INFO "... mask_length: %d\n", eax.split.mask_length); | ||
566 | |||
567 | perf_counters_lapic_init(0); | ||
568 | register_die_notifier(&perf_counter_nmi_notifier); | ||
569 | |||
570 | perf_counters_initialized = true; | ||
571 | } | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3194636a4293..fc013cfde307 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -984,6 +984,11 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
984 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 984 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
985 | spurious_interrupt smp_spurious_interrupt | 985 | spurious_interrupt smp_spurious_interrupt |
986 | 986 | ||
987 | #ifdef CONFIG_PERF_COUNTERS | ||
988 | apicinterrupt LOCAL_PERF_VECTOR \ | ||
989 | perf_counter_interrupt smp_perf_counter_interrupt | ||
990 | #endif | ||
991 | |||
987 | /* | 992 | /* |
988 | * Exception entry points. | 993 | * Exception entry points. |
989 | */ | 994 | */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..d92bc71e41a7 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -56,6 +56,10 @@ static int show_other_interrupts(struct seq_file *p) | |||
56 | for_each_online_cpu(j) | 56 | for_each_online_cpu(j) |
57 | seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); | 57 | seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); |
58 | seq_printf(p, " Local timer interrupts\n"); | 58 | seq_printf(p, " Local timer interrupts\n"); |
59 | seq_printf(p, "CNT: "); | ||
60 | for_each_online_cpu(j) | ||
61 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | ||
62 | seq_printf(p, " Performance counter interrupts\n"); | ||
59 | #endif | 63 | #endif |
60 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
61 | seq_printf(p, "RES: "); | 65 | seq_printf(p, "RES: "); |
@@ -160,6 +164,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
160 | 164 | ||
161 | #ifdef CONFIG_X86_LOCAL_APIC | 165 | #ifdef CONFIG_X86_LOCAL_APIC |
162 | sum += irq_stats(cpu)->apic_timer_irqs; | 166 | sum += irq_stats(cpu)->apic_timer_irqs; |
167 | sum += irq_stats(cpu)->apic_perf_irqs; | ||
163 | #endif | 168 | #endif |
164 | #ifdef CONFIG_SMP | 169 | #ifdef CONFIG_SMP |
165 | sum += irq_stats(cpu)->irq_resched_count; | 170 | sum += irq_stats(cpu)->irq_resched_count; |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 607db63044a5..6a33b5e30161 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -160,6 +160,9 @@ void __init native_init_IRQ(void) | |||
160 | /* IPI vectors for APIC spurious and error interrupts */ | 160 | /* IPI vectors for APIC spurious and error interrupts */ |
161 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 161 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
162 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 162 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
163 | # ifdef CONFIG_PERF_COUNTERS | ||
164 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
165 | # endif | ||
163 | #endif | 166 | #endif |
164 | 167 | ||
165 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | 168 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8670b3ce626e..91d785c25ad9 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -138,6 +138,11 @@ static void __init apic_intr_init(void) | |||
138 | /* IPI vectors for APIC spurious and error interrupts */ | 138 | /* IPI vectors for APIC spurious and error interrupts */ |
139 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 139 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
140 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 140 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
141 | |||
142 | /* Performance monitoring interrupt: */ | ||
143 | #ifdef CONFIG_PERF_COUNTERS | ||
144 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
145 | #endif | ||
141 | } | 146 | } |
142 | 147 | ||
143 | void __init native_init_IRQ(void) | 148 | void __init native_init_IRQ(void) |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b1cc6da64208..dee553c503d3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | 9 | #include <linux/perf_counter.h> | |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
@@ -891,6 +891,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
891 | tracehook_notify_resume(regs); | 891 | tracehook_notify_resume(regs); |
892 | } | 892 | } |
893 | 893 | ||
894 | if (thread_info_flags & _TIF_PERF_COUNTERS) { | ||
895 | clear_thread_flag(TIF_PERF_COUNTERS); | ||
896 | perf_counter_notify(regs); | ||
897 | } | ||
898 | |||
894 | #ifdef CONFIG_X86_32 | 899 | #ifdef CONFIG_X86_32 |
895 | clear_thread_flag(TIF_IRET); | 900 | clear_thread_flag(TIF_IRET); |
896 | #endif /* CONFIG_X86_32 */ | 901 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c3..496726ddcea1 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -332,3 +332,4 @@ ENTRY(sys_call_table) | |||
332 | .long sys_dup3 /* 330 */ | 332 | .long sys_dup3 /* 330 */ |
333 | .long sys_pipe2 | 333 | .long sys_pipe2 |
334 | .long sys_inotify_init1 | 334 | .long sys_inotify_init1 |
335 | .long sys_perf_counter_open | ||