aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 17:01:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 17:01:07 -0400
commit8a1ca8cedd108c8e76a6ab34079d0bbb4f244799 (patch)
tree636c715524f1718599209cc289908ea44b6cb859 /arch
parentb640f042faa2a2fad6464f259a8afec06e2f6386 (diff)
parent940010c5a314a7bd9b498593bc6ba1718ac5aec5 (diff)
Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits) perf_counter: Turn off by default perf_counter: Add counter->id to the throttle event perf_counter: Better align code perf_counter: Rename L2 to LL cache perf_counter: Standardize event names perf_counter: Rename enums perf_counter tools: Clean up u64 usage perf_counter: Rename perf_counter_limit sysctl perf_counter: More paranoia settings perf_counter: powerpc: Implement generalized cache events for POWER processors perf_counters: powerpc: Add support for POWER7 processors perf_counter: Accurate period data perf_counter: Introduce struct for sample data perf_counter tools: Normalize data using per sample period data perf_counter: Annotate exit ctx recursion perf_counter tools: Propagate signals properly perf_counter tools: Small frequency related fixes perf_counter: More aggressive frequency adjustment perf_counter/x86: Fix the model number of Intel Core2 processors perf_counter, x86: Correct some event and umask values for Intel processors ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/hw_irq.h39
-rw-r--r--arch/powerpc/include/asm/paca.h1
-rw-r--r--arch/powerpc/include/asm/perf_counter.h98
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/systbl.h2
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/entry_64.S9
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/perf_counter.c1263
-rw-r--r--arch/powerpc/kernel/power4-pmu.c598
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c671
-rw-r--r--arch/powerpc/kernel/power5-pmu.c611
-rw-r--r--arch/powerpc/kernel/power6-pmu.c532
-rw-r--r--arch/powerpc/kernel/power7-pmu.c357
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c482
-rw-r--r--arch/powerpc/mm/fault.c10
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/ia32/ia32entry.S3
-rw-r--r--arch/x86/include/asm/atomic_32.h236
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/intel_arch_perfmon.h31
-rw-r--r--arch/x86/include/asm/irq_vectors.h8
-rw-r--r--arch/x86/include/asm/perf_counter.h100
-rw-r--r--arch/x86/include/asm/unistd_32.h1
-rw-r--r--arch/x86/include/asm/unistd_64.h3
-rw-r--r--arch/x86/kernel/apic/apic.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile12
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c1704
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irq.c10
-rw-r--r--arch/x86/kernel/irqinit.c15
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c7
-rw-r--r--arch/x86/oprofile/op_model_ppro.c10
-rw-r--r--arch/x86/vdso/vdso32-setup.c6
-rw-r--r--arch/x86/vdso/vma.c7
46 files changed, 6814 insertions, 72 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7e034b0a6dd..20a44d0c9fdd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
131 */ 131 */
132struct irq_chip; 132struct irq_chip;
133 133
134#ifdef CONFIG_PERF_COUNTERS
135static inline unsigned long test_perf_counter_pending(void)
136{
137 unsigned long x;
138
139 asm volatile("lbz %0,%1(13)"
140 : "=r" (x)
141 : "i" (offsetof(struct paca_struct, perf_counter_pending)));
142 return x;
143}
144
145static inline void set_perf_counter_pending(void)
146{
147 asm volatile("stb %0,%1(13)" : :
148 "r" (1),
149 "i" (offsetof(struct paca_struct, perf_counter_pending)));
150}
151
152static inline void clear_perf_counter_pending(void)
153{
154 asm volatile("stb %0,%1(13)" : :
155 "r" (0),
156 "i" (offsetof(struct paca_struct, perf_counter_pending)));
157}
158
159extern void perf_counter_do_pending(void);
160
161#else
162
163static inline unsigned long test_perf_counter_pending(void)
164{
165 return 0;
166}
167
168static inline void set_perf_counter_pending(void) {}
169static inline void clear_perf_counter_pending(void) {}
170static inline void perf_counter_do_pending(void) {}
171#endif /* CONFIG_PERF_COUNTERS */
172
134#endif /* __KERNEL__ */ 173#endif /* __KERNEL__ */
135#endif /* _ASM_POWERPC_HW_IRQ_H */ 174#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3aedf145..6ef055723019 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@ struct paca_struct {
99 u8 soft_enabled; /* irq soft-enable flag */ 99 u8 soft_enabled; /* irq soft-enable flag */
100 u8 hard_enabled; /* set if irqs are enabled in MSR */ 100 u8 hard_enabled; /* set if irqs are enabled in MSR */
101 u8 io_sync; /* writel() needs spin_unlock sync */ 101 u8 io_sync; /* writel() needs spin_unlock sync */
102 u8 perf_counter_pending; /* PM interrupt while soft-disabled */
102 103
103 /* Stuff for accurate time accounting */ 104 /* Stuff for accurate time accounting */
104 u64 user_time; /* accumulated usermode TB ticks */ 105 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 000000000000..cc7c887705b8
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,98 @@
1/*
2 * Performance counter support - PowerPC-specific definitions.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/types.h>
12
13#define MAX_HWCOUNTERS 8
14#define MAX_EVENT_ALTERNATIVES 8
15#define MAX_LIMITED_HWCOUNTERS 2
16
17/*
18 * This struct provides the constants and functions needed to
19 * describe the PMU on a particular POWER-family CPU.
20 */
21struct power_pmu {
22 int n_counter;
23 int max_alternatives;
24 u64 add_fields;
25 u64 test_adder;
26 int (*compute_mmcr)(u64 events[], int n_ev,
27 unsigned int hwc[], u64 mmcr[]);
28 int (*get_constraint)(u64 event, u64 *mskp, u64 *valp);
29 int (*get_alternatives)(u64 event, unsigned int flags,
30 u64 alt[]);
31 void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
32 int (*limited_pmc_event)(u64 event);
33 u32 flags;
34 int n_generic;
35 int *generic_events;
36 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
37 [PERF_COUNT_HW_CACHE_OP_MAX]
38 [PERF_COUNT_HW_CACHE_RESULT_MAX];
39};
40
41extern struct power_pmu *ppmu;
42
43/*
44 * Values for power_pmu.flags
45 */
46#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
47#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
48
49/*
50 * Values for flags to get_alternatives()
51 */
52#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
53#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
54#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
55
56struct pt_regs;
57extern unsigned long perf_misc_flags(struct pt_regs *regs);
58#define perf_misc_flags(regs) perf_misc_flags(regs)
59
60extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
61
62/*
63 * The power_pmu.get_constraint function returns a 64-bit value and
64 * a 64-bit mask that express the constraints between this event and
65 * other events.
66 *
67 * The value and mask are divided up into (non-overlapping) bitfields
68 * of three different types:
69 *
70 * Select field: this expresses the constraint that some set of bits
71 * in MMCR* needs to be set to a specific value for this event. For a
72 * select field, the mask contains 1s in every bit of the field, and
73 * the value contains a unique value for each possible setting of the
74 * MMCR* bits. The constraint checking code will ensure that two events
75 * that set the same field in their masks have the same value in their
76 * value dwords.
77 *
78 * Add field: this expresses the constraint that there can be at most
79 * N events in a particular class. A field of k bits can be used for
80 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
81 * set (and the other bits 0), and the value has only the least significant
82 * bit of the field set. In addition, the 'add_fields' and 'test_adder'
83 * in the struct power_pmu for this processor come into play. The
84 * add_fields value contains 1 in the LSB of the field, and the
85 * test_adder contains 2^(k-1) - 1 - N in the field.
86 *
87 * NAND field: this expresses the constraint that you may not have events
88 * in all of a set of classes. (For example, on PPC970, you can't select
89 * events from the FPU, ISU and IDU simultaneously, although any two are
90 * possible.) For N classes, the field is N+1 bits wide, and each class
91 * is assigned one bit from the least-significant N bits. The mask has
92 * only the most-significant bit set, and the value has only the bit
93 * for the event's class set. The test_adder has the least significant
94 * bit set in the field.
95 *
96 * If an event is not subject to the constraint expressed by a particular
97 * field, then it will have 0 in both the mask and value for that field.
98 */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e8018d540e87..fb359b0a6937 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -492,11 +492,13 @@
492#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ 492#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
493#define SPRN_MMCR1 798 493#define SPRN_MMCR1 798
494#define SPRN_MMCRA 0x312 494#define SPRN_MMCRA 0x312
495#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
495#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ 496#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
496#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ 497#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
497#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ 498#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
498#define MMCRA_SLOT_SHIFT 24 499#define MMCRA_SLOT_SHIFT 24
499#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ 500#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
501#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
500#define POWER6_MMCRA_SIHV 0x0000040000000000ULL 502#define POWER6_MMCRA_SIHV 0x0000040000000000ULL
501#define POWER6_MMCRA_SIPR 0x0000020000000000ULL 503#define POWER6_MMCRA_SIPR 0x0000020000000000ULL
502#define POWER6_MMCRA_THRM 0x00000020UL 504#define POWER6_MMCRA_THRM 0x00000020UL
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d98a30dfd41c..a0b92de51c7e 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1)
322SYSCALL_SPU(dup3) 322SYSCALL_SPU(dup3)
323SYSCALL_SPU(pipe2) 323SYSCALL_SPU(pipe2)
324SYSCALL(inotify_init1) 324SYSCALL(inotify_init1)
325SYSCALL(ni_syscall) 325SYSCALL_SPU(perf_counter_open)
326COMPAT_SYS_SPU(preadv) 326COMPAT_SYS_SPU(preadv)
327COMPAT_SYS_SPU(pwritev) 327COMPAT_SYS_SPU(pwritev)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3f06f8ec81c5..4badac2d11d1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,6 +341,7 @@
341#define __NR_dup3 316 341#define __NR_dup3 316
342#define __NR_pipe2 317 342#define __NR_pipe2 317
343#define __NR_inotify_init1 318 343#define __NR_inotify_init1 318
344#define __NR_perf_counter_open 319
344#define __NR_preadv 320 345#define __NR_preadv 320
345#define __NR_pwritev 321 346#define __NR_pwritev 321
346 347
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fbda4a5..a2c683403c2b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
94 94
95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
97obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
98 power5-pmu.o power5+-pmu.o power6-pmu.o \
99 power7-pmu.o
97 100
98obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 101obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
99 102
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc053946..e981d1ce1914 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@ int main(void)
131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); 131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); 132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); 133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
134 DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
134 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 135 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
135 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 136 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
136 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 137 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc32330479..43e073477c34 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
5262: 5262:
527 TRACE_AND_RESTORE_IRQ(r5); 527 TRACE_AND_RESTORE_IRQ(r5);
528 528
529#ifdef CONFIG_PERF_COUNTERS
530 /* check paca->perf_counter_pending if we're enabling ints */
531 lbz r3,PACAPERFPEND(r13)
532 and. r3,r3,r5
533 beq 27f
534 bl .perf_counter_do_pending
53527:
536#endif /* CONFIG_PERF_COUNTERS */
537
529 /* extract EE bit and use it to restore paca->hard_enabled */ 538 /* extract EE bit and use it to restore paca->hard_enabled */
530 ld r3,_MSR(r1) 539 ld r3,_MSR(r1)
531 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 540 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8c1a4966867e..feff792ed0f9 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
135 iseries_handle_interrupts(); 135 iseries_handle_interrupts();
136 } 136 }
137 137
138 if (test_perf_counter_pending()) {
139 clear_perf_counter_pending();
140 perf_counter_do_pending();
141 }
142
138 /* 143 /*
139 * if (get_paca()->hard_enabled) return; 144 * if (get_paca()->hard_enabled) return;
140 * But again we need to take care that gcc gets hard_enabled directly 145 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 000000000000..bb202388170e
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,1263 @@
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18#include <asm/machdep.h>
19#include <asm/firmware.h>
20#include <asm/ptrace.h>
21
22struct cpu_hw_counters {
23 int n_counters;
24 int n_percpu;
25 int disabled;
26 int n_added;
27 int n_limited;
28 u8 pmcs_enabled;
29 struct perf_counter *counter[MAX_HWCOUNTERS];
30 u64 events[MAX_HWCOUNTERS];
31 unsigned int flags[MAX_HWCOUNTERS];
32 u64 mmcr[3];
33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35};
36DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
37
38struct power_pmu *ppmu;
39
40/*
41 * Normally, to ignore kernel events we set the FCS (freeze counters
42 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
43 * hypervisor bit set in the MSR, or if we are running on a processor
44 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
45 * then we need to use the FCHV bit to ignore kernel events.
46 */
47static unsigned int freeze_counters_kernel = MMCR0_FCS;
48
49static void perf_counter_interrupt(struct pt_regs *regs);
50
51void perf_counter_print_debug(void)
52{
53}
54
55/*
56 * Read one performance monitor counter (PMC).
57 */
58static unsigned long read_pmc(int idx)
59{
60 unsigned long val;
61
62 switch (idx) {
63 case 1:
64 val = mfspr(SPRN_PMC1);
65 break;
66 case 2:
67 val = mfspr(SPRN_PMC2);
68 break;
69 case 3:
70 val = mfspr(SPRN_PMC3);
71 break;
72 case 4:
73 val = mfspr(SPRN_PMC4);
74 break;
75 case 5:
76 val = mfspr(SPRN_PMC5);
77 break;
78 case 6:
79 val = mfspr(SPRN_PMC6);
80 break;
81 case 7:
82 val = mfspr(SPRN_PMC7);
83 break;
84 case 8:
85 val = mfspr(SPRN_PMC8);
86 break;
87 default:
88 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
89 val = 0;
90 }
91 return val;
92}
93
94/*
95 * Write one PMC.
96 */
97static void write_pmc(int idx, unsigned long val)
98{
99 switch (idx) {
100 case 1:
101 mtspr(SPRN_PMC1, val);
102 break;
103 case 2:
104 mtspr(SPRN_PMC2, val);
105 break;
106 case 3:
107 mtspr(SPRN_PMC3, val);
108 break;
109 case 4:
110 mtspr(SPRN_PMC4, val);
111 break;
112 case 5:
113 mtspr(SPRN_PMC5, val);
114 break;
115 case 6:
116 mtspr(SPRN_PMC6, val);
117 break;
118 case 7:
119 mtspr(SPRN_PMC7, val);
120 break;
121 case 8:
122 mtspr(SPRN_PMC8, val);
123 break;
124 default:
125 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
126 }
127}
128
129/*
130 * Check if a set of events can all go on the PMU at once.
131 * If they can't, this will look at alternative codes for the events
132 * and see if any combination of alternative codes is feasible.
133 * The feasible set is returned in event[].
134 */
135static int power_check_constraints(u64 event[], unsigned int cflags[],
136 int n_ev)
137{
138 u64 mask, value, nv;
139 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
140 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
141 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
142 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
143 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
144 int i, j;
145 u64 addf = ppmu->add_fields;
146 u64 tadd = ppmu->test_adder;
147
148 if (n_ev > ppmu->n_counter)
149 return -1;
150
151 /* First see if the events will go on as-is */
152 for (i = 0; i < n_ev; ++i) {
153 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
154 && !ppmu->limited_pmc_event(event[i])) {
155 ppmu->get_alternatives(event[i], cflags[i],
156 alternatives[i]);
157 event[i] = alternatives[i][0];
158 }
159 if (ppmu->get_constraint(event[i], &amasks[i][0],
160 &avalues[i][0]))
161 return -1;
162 }
163 value = mask = 0;
164 for (i = 0; i < n_ev; ++i) {
165 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
166 if ((((nv + tadd) ^ value) & mask) != 0 ||
167 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
168 break;
169 value = nv;
170 mask |= amasks[i][0];
171 }
172 if (i == n_ev)
173 return 0; /* all OK */
174
175 /* doesn't work, gather alternatives... */
176 if (!ppmu->get_alternatives)
177 return -1;
178 for (i = 0; i < n_ev; ++i) {
179 choice[i] = 0;
180 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
181 alternatives[i]);
182 for (j = 1; j < n_alt[i]; ++j)
183 ppmu->get_constraint(alternatives[i][j],
184 &amasks[i][j], &avalues[i][j]);
185 }
186
187 /* enumerate all possibilities and see if any will work */
188 i = 0;
189 j = -1;
190 value = mask = nv = 0;
191 while (i < n_ev) {
192 if (j >= 0) {
193 /* we're backtracking, restore context */
194 value = svalues[i];
195 mask = smasks[i];
196 j = choice[i];
197 }
198 /*
199 * See if any alternative k for event i,
200 * where k > j, will satisfy the constraints.
201 */
202 while (++j < n_alt[i]) {
203 nv = (value | avalues[i][j]) +
204 (value & avalues[i][j] & addf);
205 if ((((nv + tadd) ^ value) & mask) == 0 &&
206 (((nv + tadd) ^ avalues[i][j])
207 & amasks[i][j]) == 0)
208 break;
209 }
210 if (j >= n_alt[i]) {
211 /*
212 * No feasible alternative, backtrack
213 * to event i-1 and continue enumerating its
214 * alternatives from where we got up to.
215 */
216 if (--i < 0)
217 return -1;
218 } else {
219 /*
220 * Found a feasible alternative for event i,
221 * remember where we got up to with this event,
222 * go on to the next event, and start with
223 * the first alternative for it.
224 */
225 choice[i] = j;
226 svalues[i] = value;
227 smasks[i] = mask;
228 value = nv;
229 mask |= amasks[i][j];
230 ++i;
231 j = -1;
232 }
233 }
234
235 /* OK, we have a feasible combination, tell the caller the solution */
236 for (i = 0; i < n_ev; ++i)
237 event[i] = alternatives[i][choice[i]];
238 return 0;
239}
240
241/*
242 * Check if newly-added counters have consistent settings for
243 * exclude_{user,kernel,hv} with each other and any previously
244 * added counters.
245 */
246static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
247 int n_prev, int n_new)
248{
249 int eu = 0, ek = 0, eh = 0;
250 int i, n, first;
251 struct perf_counter *counter;
252
253 n = n_prev + n_new;
254 if (n <= 1)
255 return 0;
256
257 first = 1;
258 for (i = 0; i < n; ++i) {
259 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
260 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
261 continue;
262 }
263 counter = ctrs[i];
264 if (first) {
265 eu = counter->attr.exclude_user;
266 ek = counter->attr.exclude_kernel;
267 eh = counter->attr.exclude_hv;
268 first = 0;
269 } else if (counter->attr.exclude_user != eu ||
270 counter->attr.exclude_kernel != ek ||
271 counter->attr.exclude_hv != eh) {
272 return -EAGAIN;
273 }
274 }
275
276 if (eu || ek || eh)
277 for (i = 0; i < n; ++i)
278 if (cflags[i] & PPMU_LIMITED_PMC_OK)
279 cflags[i] |= PPMU_LIMITED_PMC_REQD;
280
281 return 0;
282}
283
284static void power_pmu_read(struct perf_counter *counter)
285{
286 long val, delta, prev;
287
288 if (!counter->hw.idx)
289 return;
290 /*
291 * Performance monitor interrupts come even when interrupts
292 * are soft-disabled, as long as interrupts are hard-enabled.
293 * Therefore we treat them like NMIs.
294 */
295 do {
296 prev = atomic64_read(&counter->hw.prev_count);
297 barrier();
298 val = read_pmc(counter->hw.idx);
299 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
300
301 /* The counters are only 32 bits wide */
302 delta = (val - prev) & 0xfffffffful;
303 atomic64_add(delta, &counter->count);
304 atomic64_sub(delta, &counter->hw.period_left);
305}
306
307/*
308 * On some machines, PMC5 and PMC6 can't be written, don't respect
309 * the freeze conditions, and don't generate interrupts. This tells
310 * us if `counter' is using such a PMC.
311 */
312static int is_limited_pmc(int pmcnum)
313{
314 return (ppmu->flags & PPMU_LIMITED_PMC5_6)
315 && (pmcnum == 5 || pmcnum == 6);
316}
317
318static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
319 unsigned long pmc5, unsigned long pmc6)
320{
321 struct perf_counter *counter;
322 u64 val, prev, delta;
323 int i;
324
325 for (i = 0; i < cpuhw->n_limited; ++i) {
326 counter = cpuhw->limited_counter[i];
327 if (!counter->hw.idx)
328 continue;
329 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
330 prev = atomic64_read(&counter->hw.prev_count);
331 counter->hw.idx = 0;
332 delta = (val - prev) & 0xfffffffful;
333 atomic64_add(delta, &counter->count);
334 }
335}
336
337static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
338 unsigned long pmc5, unsigned long pmc6)
339{
340 struct perf_counter *counter;
341 u64 val;
342 int i;
343
344 for (i = 0; i < cpuhw->n_limited; ++i) {
345 counter = cpuhw->limited_counter[i];
346 counter->hw.idx = cpuhw->limited_hwidx[i];
347 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
348 atomic64_set(&counter->hw.prev_count, val);
349 perf_counter_update_userpage(counter);
350 }
351}
352
353/*
354 * Since limited counters don't respect the freeze conditions, we
355 * have to read them immediately after freezing or unfreezing the
356 * other counters. We try to keep the values from the limited
357 * counters as consistent as possible by keeping the delay (in
358 * cycles and instructions) between freezing/unfreezing and reading
359 * the limited counters as small and consistent as possible.
360 * Therefore, if any limited counters are in use, we read them
361 * both, and always in the same order, to minimize variability,
362 * and do it inside the same asm that writes MMCR0.
363 */
364static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
365{
366 unsigned long pmc5, pmc6;
367
368 if (!cpuhw->n_limited) {
369 mtspr(SPRN_MMCR0, mmcr0);
370 return;
371 }
372
373 /*
374 * Write MMCR0, then read PMC5 and PMC6 immediately.
375 * To ensure we don't get a performance monitor interrupt
376 * between writing MMCR0 and freezing/thawing the limited
377 * counters, we first write MMCR0 with the counter overflow
378 * interrupt enable bits turned off.
379 */
380 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
381 : "=&r" (pmc5), "=&r" (pmc6)
382 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
383 "i" (SPRN_MMCR0),
384 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
385
386 if (mmcr0 & MMCR0_FC)
387 freeze_limited_counters(cpuhw, pmc5, pmc6);
388 else
389 thaw_limited_counters(cpuhw, pmc5, pmc6);
390
391 /*
392 * Write the full MMCR0 including the counter overflow interrupt
393 * enable bits, if necessary.
394 */
395 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
396 mtspr(SPRN_MMCR0, mmcr0);
397}
398
399/*
400 * Disable all counters to prevent PMU interrupts and to allow
401 * counters to be added or removed.
402 */
403void hw_perf_disable(void)
404{
405 struct cpu_hw_counters *cpuhw;
406 unsigned long ret;
407 unsigned long flags;
408
409 local_irq_save(flags);
410 cpuhw = &__get_cpu_var(cpu_hw_counters);
411
412 ret = cpuhw->disabled;
413 if (!ret) {
414 cpuhw->disabled = 1;
415 cpuhw->n_added = 0;
416
417 /*
418 * Check if we ever enabled the PMU on this cpu.
419 */
420 if (!cpuhw->pmcs_enabled) {
421 if (ppc_md.enable_pmcs)
422 ppc_md.enable_pmcs();
423 cpuhw->pmcs_enabled = 1;
424 }
425
426 /*
427 * Disable instruction sampling if it was enabled
428 */
429 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
430 mtspr(SPRN_MMCRA,
431 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
432 mb();
433 }
434
435 /*
436 * Set the 'freeze counters' bit.
437 * The barrier is to make sure the mtspr has been
438 * executed and the PMU has frozen the counters
439 * before we return.
440 */
441 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
442 mb();
443 }
444 local_irq_restore(flags);
445}
446
447/*
448 * Re-enable all counters if disable == 0.
449 * If we were previously disabled and counters were added, then
450 * put the new config on the PMU.
451 */
452void hw_perf_enable(void)
453{
454 struct perf_counter *counter;
455 struct cpu_hw_counters *cpuhw;
456 unsigned long flags;
457 long i;
458 unsigned long val;
459 s64 left;
460 unsigned int hwc_index[MAX_HWCOUNTERS];
461 int n_lim;
462 int idx;
463
464 local_irq_save(flags);
465 cpuhw = &__get_cpu_var(cpu_hw_counters);
466 if (!cpuhw->disabled) {
467 local_irq_restore(flags);
468 return;
469 }
470 cpuhw->disabled = 0;
471
472 /*
473 * If we didn't change anything, or only removed counters,
474 * no need to recalculate MMCR* settings and reset the PMCs.
475 * Just reenable the PMU with the current MMCR* settings
476 * (possibly updated for removal of counters).
477 */
478 if (!cpuhw->n_added) {
479 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
480 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
481 if (cpuhw->n_counters == 0)
482 get_lppaca()->pmcregs_in_use = 0;
483 goto out_enable;
484 }
485
486 /*
487 * Compute MMCR* values for the new set of counters
488 */
489 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
490 cpuhw->mmcr)) {
491 /* shouldn't ever get here */
492 printk(KERN_ERR "oops compute_mmcr failed\n");
493 goto out;
494 }
495
496 /*
497 * Add in MMCR0 freeze bits corresponding to the
498 * attr.exclude_* bits for the first counter.
499 * We have already checked that all counters have the
500 * same values for these bits as the first counter.
501 */
502 counter = cpuhw->counter[0];
503 if (counter->attr.exclude_user)
504 cpuhw->mmcr[0] |= MMCR0_FCP;
505 if (counter->attr.exclude_kernel)
506 cpuhw->mmcr[0] |= freeze_counters_kernel;
507 if (counter->attr.exclude_hv)
508 cpuhw->mmcr[0] |= MMCR0_FCHV;
509
510 /*
511 * Write the new configuration to MMCR* with the freeze
512 * bit set and set the hardware counters to their initial values.
513 * Then unfreeze the counters.
514 */
515 get_lppaca()->pmcregs_in_use = 1;
516 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
517 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
518 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
519 | MMCR0_FC);
520
521 /*
522 * Read off any pre-existing counters that need to move
523 * to another PMC.
524 */
525 for (i = 0; i < cpuhw->n_counters; ++i) {
526 counter = cpuhw->counter[i];
527 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
528 power_pmu_read(counter);
529 write_pmc(counter->hw.idx, 0);
530 counter->hw.idx = 0;
531 }
532 }
533
534 /*
535 * Initialize the PMCs for all the new and moved counters.
536 */
537 cpuhw->n_limited = n_lim = 0;
538 for (i = 0; i < cpuhw->n_counters; ++i) {
539 counter = cpuhw->counter[i];
540 if (counter->hw.idx)
541 continue;
542 idx = hwc_index[i] + 1;
543 if (is_limited_pmc(idx)) {
544 cpuhw->limited_counter[n_lim] = counter;
545 cpuhw->limited_hwidx[n_lim] = idx;
546 ++n_lim;
547 continue;
548 }
549 val = 0;
550 if (counter->hw.sample_period) {
551 left = atomic64_read(&counter->hw.period_left);
552 if (left < 0x80000000L)
553 val = 0x80000000L - left;
554 }
555 atomic64_set(&counter->hw.prev_count, val);
556 counter->hw.idx = idx;
557 write_pmc(idx, val);
558 perf_counter_update_userpage(counter);
559 }
560 cpuhw->n_limited = n_lim;
561 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
562
563 out_enable:
564 mb();
565 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
566
567 /*
568 * Enable instruction sampling if necessary
569 */
570 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
571 mb();
572 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
573 }
574
575 out:
576 local_irq_restore(flags);
577}
578
579static int collect_events(struct perf_counter *group, int max_count,
580 struct perf_counter *ctrs[], u64 *events,
581 unsigned int *flags)
582{
583 int n = 0;
584 struct perf_counter *counter;
585
586 if (!is_software_counter(group)) {
587 if (n >= max_count)
588 return -1;
589 ctrs[n] = group;
590 flags[n] = group->hw.counter_base;
591 events[n++] = group->hw.config;
592 }
593 list_for_each_entry(counter, &group->sibling_list, list_entry) {
594 if (!is_software_counter(counter) &&
595 counter->state != PERF_COUNTER_STATE_OFF) {
596 if (n >= max_count)
597 return -1;
598 ctrs[n] = counter;
599 flags[n] = counter->hw.counter_base;
600 events[n++] = counter->hw.config;
601 }
602 }
603 return n;
604}
605
606static void counter_sched_in(struct perf_counter *counter, int cpu)
607{
608 counter->state = PERF_COUNTER_STATE_ACTIVE;
609 counter->oncpu = cpu;
610 counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
611 if (is_software_counter(counter))
612 counter->pmu->enable(counter);
613}
614
615/*
616 * Called to enable a whole group of counters.
617 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
618 * Assumes the caller has disabled interrupts and has
619 * frozen the PMU with hw_perf_save_disable.
620 */
621int hw_perf_group_sched_in(struct perf_counter *group_leader,
622 struct perf_cpu_context *cpuctx,
623 struct perf_counter_context *ctx, int cpu)
624{
625 struct cpu_hw_counters *cpuhw;
626 long i, n, n0;
627 struct perf_counter *sub;
628
629 cpuhw = &__get_cpu_var(cpu_hw_counters);
630 n0 = cpuhw->n_counters;
631 n = collect_events(group_leader, ppmu->n_counter - n0,
632 &cpuhw->counter[n0], &cpuhw->events[n0],
633 &cpuhw->flags[n0]);
634 if (n < 0)
635 return -EAGAIN;
636 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
637 return -EAGAIN;
638 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
639 if (i < 0)
640 return -EAGAIN;
641 cpuhw->n_counters = n0 + n;
642 cpuhw->n_added += n;
643
644 /*
645 * OK, this group can go on; update counter states etc.,
646 * and enable any software counters
647 */
648 for (i = n0; i < n0 + n; ++i)
649 cpuhw->counter[i]->hw.config = cpuhw->events[i];
650 cpuctx->active_oncpu += n;
651 n = 1;
652 counter_sched_in(group_leader, cpu);
653 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
654 if (sub->state != PERF_COUNTER_STATE_OFF) {
655 counter_sched_in(sub, cpu);
656 ++n;
657 }
658 }
659 ctx->nr_active += n;
660
661 return 1;
662}
663
664/*
665 * Add a counter to the PMU.
666 * If all counters are not already frozen, then we disable and
667 * re-enable the PMU in order to get hw_perf_enable to do the
668 * actual work of reconfiguring the PMU.
669 */
670static int power_pmu_enable(struct perf_counter *counter)
671{
672 struct cpu_hw_counters *cpuhw;
673 unsigned long flags;
674 int n0;
675 int ret = -EAGAIN;
676
677 local_irq_save(flags);
678 perf_disable();
679
680 /*
681 * Add the counter to the list (if there is room)
682 * and check whether the total set is still feasible.
683 */
684 cpuhw = &__get_cpu_var(cpu_hw_counters);
685 n0 = cpuhw->n_counters;
686 if (n0 >= ppmu->n_counter)
687 goto out;
688 cpuhw->counter[n0] = counter;
689 cpuhw->events[n0] = counter->hw.config;
690 cpuhw->flags[n0] = counter->hw.counter_base;
691 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
692 goto out;
693 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
694 goto out;
695
696 counter->hw.config = cpuhw->events[n0];
697 ++cpuhw->n_counters;
698 ++cpuhw->n_added;
699
700 ret = 0;
701 out:
702 perf_enable();
703 local_irq_restore(flags);
704 return ret;
705}
706
707/*
708 * Remove a counter from the PMU.
709 */
710static void power_pmu_disable(struct perf_counter *counter)
711{
712 struct cpu_hw_counters *cpuhw;
713 long i;
714 unsigned long flags;
715
716 local_irq_save(flags);
717 perf_disable();
718
719 power_pmu_read(counter);
720
721 cpuhw = &__get_cpu_var(cpu_hw_counters);
722 for (i = 0; i < cpuhw->n_counters; ++i) {
723 if (counter == cpuhw->counter[i]) {
724 while (++i < cpuhw->n_counters)
725 cpuhw->counter[i-1] = cpuhw->counter[i];
726 --cpuhw->n_counters;
727 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
728 if (counter->hw.idx) {
729 write_pmc(counter->hw.idx, 0);
730 counter->hw.idx = 0;
731 }
732 perf_counter_update_userpage(counter);
733 break;
734 }
735 }
736 for (i = 0; i < cpuhw->n_limited; ++i)
737 if (counter == cpuhw->limited_counter[i])
738 break;
739 if (i < cpuhw->n_limited) {
740 while (++i < cpuhw->n_limited) {
741 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
742 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
743 }
744 --cpuhw->n_limited;
745 }
746 if (cpuhw->n_counters == 0) {
747 /* disable exceptions if no counters are running */
748 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
749 }
750
751 perf_enable();
752 local_irq_restore(flags);
753}
754
755/*
756 * Re-enable interrupts on a counter after they were throttled
757 * because they were coming too fast.
758 */
759static void power_pmu_unthrottle(struct perf_counter *counter)
760{
761 s64 val, left;
762 unsigned long flags;
763
764 if (!counter->hw.idx || !counter->hw.sample_period)
765 return;
766 local_irq_save(flags);
767 perf_disable();
768 power_pmu_read(counter);
769 left = counter->hw.sample_period;
770 counter->hw.last_period = left;
771 val = 0;
772 if (left < 0x80000000L)
773 val = 0x80000000L - left;
774 write_pmc(counter->hw.idx, val);
775 atomic64_set(&counter->hw.prev_count, val);
776 atomic64_set(&counter->hw.period_left, left);
777 perf_counter_update_userpage(counter);
778 perf_enable();
779 local_irq_restore(flags);
780}
781
782struct pmu power_pmu = {
783 .enable = power_pmu_enable,
784 .disable = power_pmu_disable,
785 .read = power_pmu_read,
786 .unthrottle = power_pmu_unthrottle,
787};
788
789/*
790 * Return 1 if we might be able to put counter on a limited PMC,
791 * or 0 if not.
792 * A counter can only go on a limited PMC if it counts something
793 * that a limited PMC can count, doesn't require interrupts, and
794 * doesn't exclude any processor mode.
795 */
796static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
797 unsigned int flags)
798{
799 int n;
800 u64 alt[MAX_EVENT_ALTERNATIVES];
801
802 if (counter->attr.exclude_user
803 || counter->attr.exclude_kernel
804 || counter->attr.exclude_hv
805 || counter->attr.sample_period)
806 return 0;
807
808 if (ppmu->limited_pmc_event(ev))
809 return 1;
810
811 /*
812 * The requested event isn't on a limited PMC already;
813 * see if any alternative code goes on a limited PMC.
814 */
815 if (!ppmu->get_alternatives)
816 return 0;
817
818 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
819 n = ppmu->get_alternatives(ev, flags, alt);
820
821 return n > 0;
822}
823
824/*
825 * Find an alternative event that goes on a normal PMC, if possible,
826 * and return the event code, or 0 if there is no such alternative.
827 * (Note: event code 0 is "don't count" on all machines.)
828 */
829static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
830{
831 u64 alt[MAX_EVENT_ALTERNATIVES];
832 int n;
833
834 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
835 n = ppmu->get_alternatives(ev, flags, alt);
836 if (!n)
837 return 0;
838 return alt[0];
839}
840
841/* Number of perf_counters counting hardware events */
842static atomic_t num_counters;
843/* Used to avoid races in calling reserve/release_pmc_hardware */
844static DEFINE_MUTEX(pmc_reserve_mutex);
845
846/*
847 * Release the PMU if this is the last perf_counter.
848 */
849static void hw_perf_counter_destroy(struct perf_counter *counter)
850{
851 if (!atomic_add_unless(&num_counters, -1, 1)) {
852 mutex_lock(&pmc_reserve_mutex);
853 if (atomic_dec_return(&num_counters) == 0)
854 release_pmc_hardware();
855 mutex_unlock(&pmc_reserve_mutex);
856 }
857}
858
859/*
860 * Translate a generic cache event config to a raw event code.
861 */
862static int hw_perf_cache_event(u64 config, u64 *eventp)
863{
864 unsigned long type, op, result;
865 int ev;
866
867 if (!ppmu->cache_events)
868 return -EINVAL;
869
870 /* unpack config */
871 type = config & 0xff;
872 op = (config >> 8) & 0xff;
873 result = (config >> 16) & 0xff;
874
875 if (type >= PERF_COUNT_HW_CACHE_MAX ||
876 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
877 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
878 return -EINVAL;
879
880 ev = (*ppmu->cache_events)[type][op][result];
881 if (ev == 0)
882 return -EOPNOTSUPP;
883 if (ev == -1)
884 return -EINVAL;
885 *eventp = ev;
886 return 0;
887}
888
889const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
890{
891 u64 ev;
892 unsigned long flags;
893 struct perf_counter *ctrs[MAX_HWCOUNTERS];
894 u64 events[MAX_HWCOUNTERS];
895 unsigned int cflags[MAX_HWCOUNTERS];
896 int n;
897 int err;
898
899 if (!ppmu)
900 return ERR_PTR(-ENXIO);
901 switch (counter->attr.type) {
902 case PERF_TYPE_HARDWARE:
903 ev = counter->attr.config;
904 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
905 return ERR_PTR(-EOPNOTSUPP);
906 ev = ppmu->generic_events[ev];
907 break;
908 case PERF_TYPE_HW_CACHE:
909 err = hw_perf_cache_event(counter->attr.config, &ev);
910 if (err)
911 return ERR_PTR(err);
912 break;
913 case PERF_TYPE_RAW:
914 ev = counter->attr.config;
915 break;
916 }
917 counter->hw.config_base = ev;
918 counter->hw.idx = 0;
919
920 /*
921 * If we are not running on a hypervisor, force the
922 * exclude_hv bit to 0 so that we don't care what
923 * the user set it to.
924 */
925 if (!firmware_has_feature(FW_FEATURE_LPAR))
926 counter->attr.exclude_hv = 0;
927
928 /*
929 * If this is a per-task counter, then we can use
930 * PM_RUN_* events interchangeably with their non RUN_*
931 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
932 * XXX we should check if the task is an idle task.
933 */
934 flags = 0;
935 if (counter->ctx->task)
936 flags |= PPMU_ONLY_COUNT_RUN;
937
938 /*
939 * If this machine has limited counters, check whether this
940 * event could go on a limited counter.
941 */
942 if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
943 if (can_go_on_limited_pmc(counter, ev, flags)) {
944 flags |= PPMU_LIMITED_PMC_OK;
945 } else if (ppmu->limited_pmc_event(ev)) {
946 /*
947 * The requested event is on a limited PMC,
948 * but we can't use a limited PMC; see if any
949 * alternative goes on a normal PMC.
950 */
951 ev = normal_pmc_alternative(ev, flags);
952 if (!ev)
953 return ERR_PTR(-EINVAL);
954 }
955 }
956
957 /*
958 * If this is in a group, check if it can go on with all the
959 * other hardware counters in the group. We assume the counter
960 * hasn't been linked into its leader's sibling list at this point.
961 */
962 n = 0;
963 if (counter->group_leader != counter) {
964 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
965 ctrs, events, cflags);
966 if (n < 0)
967 return ERR_PTR(-EINVAL);
968 }
969 events[n] = ev;
970 ctrs[n] = counter;
971 cflags[n] = flags;
972 if (check_excludes(ctrs, cflags, n, 1))
973 return ERR_PTR(-EINVAL);
974 if (power_check_constraints(events, cflags, n + 1))
975 return ERR_PTR(-EINVAL);
976
977 counter->hw.config = events[n];
978 counter->hw.counter_base = cflags[n];
979 counter->hw.last_period = counter->hw.sample_period;
980 atomic64_set(&counter->hw.period_left, counter->hw.last_period);
981
982 /*
983 * See if we need to reserve the PMU.
984 * If no counters are currently in use, then we have to take a
985 * mutex to ensure that we don't race with another task doing
986 * reserve_pmc_hardware or release_pmc_hardware.
987 */
988 err = 0;
989 if (!atomic_inc_not_zero(&num_counters)) {
990 mutex_lock(&pmc_reserve_mutex);
991 if (atomic_read(&num_counters) == 0 &&
992 reserve_pmc_hardware(perf_counter_interrupt))
993 err = -EBUSY;
994 else
995 atomic_inc(&num_counters);
996 mutex_unlock(&pmc_reserve_mutex);
997 }
998 counter->destroy = hw_perf_counter_destroy;
999
1000 if (err)
1001 return ERR_PTR(err);
1002 return &power_pmu;
1003}
1004
1005/*
1006 * A counter has overflowed; update its count and record
1007 * things if requested. Note that interrupts are hard-disabled
1008 * here so there is no possibility of being interrupted.
1009 */
1010static void record_and_restart(struct perf_counter *counter, long val,
1011 struct pt_regs *regs, int nmi)
1012{
1013 u64 period = counter->hw.sample_period;
1014 s64 prev, delta, left;
1015 int record = 0;
1016 u64 addr, mmcra, sdsync;
1017
1018 /* we don't have to worry about interrupts here */
1019 prev = atomic64_read(&counter->hw.prev_count);
1020 delta = (val - prev) & 0xfffffffful;
1021 atomic64_add(delta, &counter->count);
1022
1023 /*
1024 * See if the total period for this counter has expired,
1025 * and update for the next period.
1026 */
1027 val = 0;
1028 left = atomic64_read(&counter->hw.period_left) - delta;
1029 if (period) {
1030 if (left <= 0) {
1031 left += period;
1032 if (left <= 0)
1033 left = period;
1034 record = 1;
1035 }
1036 if (left < 0x80000000L)
1037 val = 0x80000000L - left;
1038 }
1039
1040 /*
1041 * Finally record data if requested.
1042 */
1043 if (record) {
1044 struct perf_sample_data data = {
1045 .regs = regs,
1046 .addr = 0,
1047 .period = counter->hw.last_period,
1048 };
1049
1050 if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
1051 /*
1052 * The user wants a data address recorded.
1053 * If we're not doing instruction sampling,
1054 * give them the SDAR (sampled data address).
1055 * If we are doing instruction sampling, then only
1056 * give them the SDAR if it corresponds to the
1057 * instruction pointed to by SIAR; this is indicated
1058 * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
1059 */
1060 mmcra = regs->dsisr;
1061 sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
1062 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
1063 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
1064 data.addr = mfspr(SPRN_SDAR);
1065 }
1066 if (perf_counter_overflow(counter, nmi, &data)) {
1067 /*
1068 * Interrupts are coming too fast - throttle them
1069 * by setting the counter to 0, so it will be
1070 * at least 2^30 cycles until the next interrupt
1071 * (assuming each counter counts at most 2 counts
1072 * per cycle).
1073 */
1074 val = 0;
1075 left = ~0ULL >> 1;
1076 }
1077 }
1078
1079 write_pmc(counter->hw.idx, val);
1080 atomic64_set(&counter->hw.prev_count, val);
1081 atomic64_set(&counter->hw.period_left, left);
1082 perf_counter_update_userpage(counter);
1083}
1084
1085/*
1086 * Called from generic code to get the misc flags (i.e. processor mode)
1087 * for an event.
1088 */
1089unsigned long perf_misc_flags(struct pt_regs *regs)
1090{
1091 unsigned long mmcra;
1092
1093 if (TRAP(regs) != 0xf00) {
1094 /* not a PMU interrupt */
1095 return user_mode(regs) ? PERF_EVENT_MISC_USER :
1096 PERF_EVENT_MISC_KERNEL;
1097 }
1098
1099 mmcra = regs->dsisr;
1100 if (ppmu->flags & PPMU_ALT_SIPR) {
1101 if (mmcra & POWER6_MMCRA_SIHV)
1102 return PERF_EVENT_MISC_HYPERVISOR;
1103 return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1104 PERF_EVENT_MISC_KERNEL;
1105 }
1106 if (mmcra & MMCRA_SIHV)
1107 return PERF_EVENT_MISC_HYPERVISOR;
1108 return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1109 PERF_EVENT_MISC_KERNEL;
1110}
1111
1112/*
1113 * Called from generic code to get the instruction pointer
1114 * for an event.
1115 */
1116unsigned long perf_instruction_pointer(struct pt_regs *regs)
1117{
1118 unsigned long mmcra;
1119 unsigned long ip;
1120 unsigned long slot;
1121
1122 if (TRAP(regs) != 0xf00)
1123 return regs->nip; /* not a PMU interrupt */
1124
1125 ip = mfspr(SPRN_SIAR);
1126 mmcra = regs->dsisr;
1127 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
1128 slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
1129 if (slot > 1)
1130 ip += 4 * (slot - 1);
1131 }
1132 return ip;
1133}
1134
1135/*
1136 * Performance monitor interrupt stuff
1137 */
1138static void perf_counter_interrupt(struct pt_regs *regs)
1139{
1140 int i;
1141 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
1142 struct perf_counter *counter;
1143 long val;
1144 int found = 0;
1145 int nmi;
1146
1147 if (cpuhw->n_limited)
1148 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1149 mfspr(SPRN_PMC6));
1150
1151 /*
1152 * Overload regs->dsisr to store MMCRA so we only need to read it once.
1153 */
1154 regs->dsisr = mfspr(SPRN_MMCRA);
1155
1156 /*
1157 * If interrupts were soft-disabled when this PMU interrupt
1158 * occurred, treat it as an NMI.
1159 */
1160 nmi = !regs->softe;
1161 if (nmi)
1162 nmi_enter();
1163 else
1164 irq_enter();
1165
1166 for (i = 0; i < cpuhw->n_counters; ++i) {
1167 counter = cpuhw->counter[i];
1168 if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
1169 continue;
1170 val = read_pmc(counter->hw.idx);
1171 if ((int)val < 0) {
1172 /* counter has overflowed */
1173 found = 1;
1174 record_and_restart(counter, val, regs, nmi);
1175 }
1176 }
1177
1178 /*
1179 * In case we didn't find and reset the counter that caused
1180 * the interrupt, scan all counters and reset any that are
1181 * negative, to avoid getting continual interrupts.
1182 * Any that we processed in the previous loop will not be negative.
1183 */
1184 if (!found) {
1185 for (i = 0; i < ppmu->n_counter; ++i) {
1186 if (is_limited_pmc(i + 1))
1187 continue;
1188 val = read_pmc(i + 1);
1189 if ((int)val < 0)
1190 write_pmc(i + 1, 0);
1191 }
1192 }
1193
1194 /*
1195 * Reset MMCR0 to its normal value. This will set PMXE and
1196 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1197 * and thus allow interrupts to occur again.
1198 * XXX might want to use MSR.PM to keep the counters frozen until
1199 * we get back out of this interrupt.
1200 */
1201 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
1202
1203 if (nmi)
1204 nmi_exit();
1205 else
1206 irq_exit();
1207}
1208
1209void hw_perf_counter_setup(int cpu)
1210{
1211 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
1212
1213 memset(cpuhw, 0, sizeof(*cpuhw));
1214 cpuhw->mmcr[0] = MMCR0_FC;
1215}
1216
1217extern struct power_pmu power4_pmu;
1218extern struct power_pmu ppc970_pmu;
1219extern struct power_pmu power5_pmu;
1220extern struct power_pmu power5p_pmu;
1221extern struct power_pmu power6_pmu;
1222extern struct power_pmu power7_pmu;
1223
1224static int init_perf_counters(void)
1225{
1226 unsigned long pvr;
1227
1228 /* XXX should get this from cputable */
1229 pvr = mfspr(SPRN_PVR);
1230 switch (PVR_VER(pvr)) {
1231 case PV_POWER4:
1232 case PV_POWER4p:
1233 ppmu = &power4_pmu;
1234 break;
1235 case PV_970:
1236 case PV_970FX:
1237 case PV_970MP:
1238 ppmu = &ppc970_pmu;
1239 break;
1240 case PV_POWER5:
1241 ppmu = &power5_pmu;
1242 break;
1243 case PV_POWER5p:
1244 ppmu = &power5p_pmu;
1245 break;
1246 case 0x3e:
1247 ppmu = &power6_pmu;
1248 break;
1249 case 0x3f:
1250 ppmu = &power7_pmu;
1251 break;
1252 }
1253
1254 /*
1255 * Use FCHV to ignore kernel events if MSR.HV is set.
1256 */
1257 if (mfmsr() & MSR_HV)
1258 freeze_counters_kernel = MMCR0_FCHV;
1259
1260 return 0;
1261}
1262
1263arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 000000000000..07bd308a5fa7
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,598 @@
1/*
2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER4
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_LOWER_SH 6
23#define PM_LOWER_MSK 1
24#define PM_LOWER_MSKS 0x40
25#define PM_BYTE_SH 4 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 3
27#define PM_PMCSEL_MSK 7
28
29/*
30 * Unit code values
31 */
32#define PM_FPU 1
33#define PM_ISU1 2
34#define PM_IFU 3
35#define PM_IDU0 4
36#define PM_ISU1_ALT 6
37#define PM_ISU2 7
38#define PM_IFU_ALT 8
39#define PM_LSU0 9
40#define PM_LSU1 0xc
41#define PM_GPS 0xf
42
43/*
44 * Bits in MMCR0 for POWER4
45 */
46#define MMCR0_PMC1SEL_SH 8
47#define MMCR0_PMC2SEL_SH 1
48#define MMCR_PMCSEL_MSK 0x1f
49
50/*
51 * Bits in MMCR1 for POWER4
52 */
53#define MMCR1_TTM0SEL_SH 62
54#define MMCR1_TTC0SEL_SH 61
55#define MMCR1_TTM1SEL_SH 59
56#define MMCR1_TTC1SEL_SH 58
57#define MMCR1_TTM2SEL_SH 56
58#define MMCR1_TTC2SEL_SH 55
59#define MMCR1_TTM3SEL_SH 53
60#define MMCR1_TTC3SEL_SH 52
61#define MMCR1_TTMSEL_MSK 3
62#define MMCR1_TD_CP_DBG0SEL_SH 50
63#define MMCR1_TD_CP_DBG1SEL_SH 48
64#define MMCR1_TD_CP_DBG2SEL_SH 46
65#define MMCR1_TD_CP_DBG3SEL_SH 44
66#define MMCR1_DEBUG0SEL_SH 43
67#define MMCR1_DEBUG1SEL_SH 42
68#define MMCR1_DEBUG2SEL_SH 41
69#define MMCR1_DEBUG3SEL_SH 40
70#define MMCR1_PMC1_ADDER_SEL_SH 39
71#define MMCR1_PMC2_ADDER_SEL_SH 38
72#define MMCR1_PMC6_ADDER_SEL_SH 37
73#define MMCR1_PMC5_ADDER_SEL_SH 36
74#define MMCR1_PMC8_ADDER_SEL_SH 35
75#define MMCR1_PMC7_ADDER_SEL_SH 34
76#define MMCR1_PMC3_ADDER_SEL_SH 33
77#define MMCR1_PMC4_ADDER_SEL_SH 32
78#define MMCR1_PMC3SEL_SH 27
79#define MMCR1_PMC4SEL_SH 22
80#define MMCR1_PMC5SEL_SH 17
81#define MMCR1_PMC6SEL_SH 12
82#define MMCR1_PMC7SEL_SH 7
83#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
84
85static short mmcr1_adder_bits[8] = {
86 MMCR1_PMC1_ADDER_SEL_SH,
87 MMCR1_PMC2_ADDER_SEL_SH,
88 MMCR1_PMC3_ADDER_SEL_SH,
89 MMCR1_PMC4_ADDER_SEL_SH,
90 MMCR1_PMC5_ADDER_SEL_SH,
91 MMCR1_PMC6_ADDER_SEL_SH,
92 MMCR1_PMC7_ADDER_SEL_SH,
93 MMCR1_PMC8_ADDER_SEL_SH
94};
95
96/*
97 * Bits in MMCRA
98 */
99#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
100
101/*
102 * Layout of constraint bits:
103 * 6666555555555544444444443333333333222222222211111111110000000000
104 * 3210987654321098765432109876543210987654321098765432109876543210
105 * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
106 * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
107 * \SMPL ||\TTC3SEL
108 * |\TTC_IFU_SEL
109 * \TTM2SEL0
110 *
111 * SMPL - SAMPLE_ENABLE constraint
112 * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
113 *
114 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
115 * 55: UC1 error 0x0080_0000_0000_0000
116 * 54: FPU events needed 0x0040_0000_0000_0000
117 * 53: ISU1 events needed 0x0020_0000_0000_0000
118 * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
119 *
120 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
121 * 51: UC2 error 0x0008_0000_0000_0000
122 * 50: FPU events needed 0x0004_0000_0000_0000
123 * 49: IFU events needed 0x0002_0000_0000_0000
124 * 48: LSU0 events needed 0x0001_0000_0000_0000
125 *
126 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
127 * 47: UC3 error 0x8000_0000_0000
128 * 46: LSU0 events needed 0x4000_0000_0000
129 * 45: IFU events needed 0x2000_0000_0000
130 * 44: IDU0|ISU2 events needed 0x1000_0000_0000
131 * 43: ISU1 events needed 0x0800_0000_0000
132 *
133 * TTM2SEL0
134 * 42: 0 = IDU0 events needed
135 * 1 = ISU2 events needed 0x0400_0000_0000
136 *
137 * TTC_IFU_SEL
138 * 41: 0 = IFU.U events needed
139 * 1 = IFU.L events needed 0x0200_0000_0000
140 *
141 * TTC3SEL
142 * 40: 0 = LSU1.U events needed
143 * 1 = LSU1.L events needed 0x0100_0000_0000
144 *
145 * PS1
146 * 39: PS1 error 0x0080_0000_0000
147 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
148 *
149 * PS2
150 * 35: PS2 error 0x0008_0000_0000
151 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
152 *
153 * B0
154 * 28-31: Byte 0 event source 0xf000_0000
155 * 1 = FPU
156 * 2 = ISU1
157 * 3 = IFU
158 * 4 = IDU0
159 * 7 = ISU2
160 * 9 = LSU0
161 * c = LSU1
162 * f = GPS
163 *
164 * B1, B2, B3
165 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
166 *
167 * P8
168 * 15: P8 error 0x8000
169 * 14-15: Count of events needing PMC8
170 *
171 * P1..P7
172 * 0-13: Count of events needing PMC1..PMC7
173 *
174 * Note: this doesn't allow events using IFU.U to be combined with events
175 * using IFU.L, though that is feasible (using TTM0 and TTM2). However
176 * there are no listed events for IFU.L (they are debug events not
177 * verified for performance monitoring) so this shouldn't cause a
178 * problem.
179 */
180
181static struct unitinfo {
182 u64 value, mask;
183 int unit;
184 int lowerbit;
185} p4_unitinfo[16] = {
186 [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
187 [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
188 [PM_ISU1_ALT] =
189 { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
190 [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
191 [PM_IFU_ALT] =
192 { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
193 [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
194 [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
195 [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
196 [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
197 [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
198};
199
200static unsigned char direct_marked_event[8] = {
201 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
202 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
203 (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
204 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
205 (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
206 (1<<3) | (1<<4) | (1<<5),
207 /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
208 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
209 (1<<4), /* PMC8: PM_MRK_LSU_FIN */
210};
211
212/*
213 * Returns 1 if event counts things relating to marked instructions
214 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
215 */
216static int p4_marked_instr_event(u64 event)
217{
218 int pmc, psel, unit, byte, bit;
219 unsigned int mask;
220
221 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
222 psel = event & PM_PMCSEL_MSK;
223 if (pmc) {
224 if (direct_marked_event[pmc - 1] & (1 << psel))
225 return 1;
226 if (psel == 0) /* add events */
227 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
228 else if (psel == 6) /* decode events */
229 bit = 4;
230 else
231 return 0;
232 } else
233 bit = psel;
234
235 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
236 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
237 mask = 0;
238 switch (unit) {
239 case PM_LSU1:
240 if (event & PM_LOWER_MSKS)
241 mask = 1 << 28; /* byte 7 bit 4 */
242 else
243 mask = 6 << 24; /* byte 3 bits 1 and 2 */
244 break;
245 case PM_LSU0:
246 /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
247 mask = 0x083dff00;
248 }
249 return (mask >> (byte * 8 + bit)) & 1;
250}
251
252static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
253{
254 int pmc, byte, unit, lower, sh;
255 u64 mask = 0, value = 0;
256 int grp = -1;
257
258 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
259 if (pmc) {
260 if (pmc > 8)
261 return -1;
262 sh = (pmc - 1) * 2;
263 mask |= 2 << sh;
264 value |= 1 << sh;
265 grp = ((pmc - 1) >> 1) & 1;
266 }
267 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
268 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
269 if (unit) {
270 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
271
272 /*
273 * Bus events on bytes 0 and 2 can be counted
274 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
275 */
276 if (!pmc)
277 grp = byte & 1;
278
279 if (!p4_unitinfo[unit].unit)
280 return -1;
281 mask |= p4_unitinfo[unit].mask;
282 value |= p4_unitinfo[unit].value;
283 sh = p4_unitinfo[unit].lowerbit;
284 if (sh > 1)
285 value |= (u64)lower << sh;
286 else if (lower != sh)
287 return -1;
288 unit = p4_unitinfo[unit].unit;
289
290 /* Set byte lane select field */
291 mask |= 0xfULL << (28 - 4 * byte);
292 value |= (u64)unit << (28 - 4 * byte);
293 }
294 if (grp == 0) {
295 /* increment PMC1/2/5/6 field */
296 mask |= 0x8000000000ull;
297 value |= 0x1000000000ull;
298 } else {
299 /* increment PMC3/4/7/8 field */
300 mask |= 0x800000000ull;
301 value |= 0x100000000ull;
302 }
303
304 /* Marked instruction events need sample_enable set */
305 if (p4_marked_instr_event(event)) {
306 mask |= 1ull << 56;
307 value |= 1ull << 56;
308 }
309
310 /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
311 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
312 mask |= 1ull << 56;
313
314 *maskp = mask;
315 *valp = value;
316 return 0;
317}
318
319static unsigned int ppc_inst_cmpl[] = {
320 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
321};
322
323static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
324{
325 int i, j, na;
326
327 alt[0] = event;
328 na = 1;
329
330 /* 2 possibilities for PM_GRP_DISP_REJECT */
331 if (event == 0x8003 || event == 0x0224) {
332 alt[1] = event ^ (0x8003 ^ 0x0224);
333 return 2;
334 }
335
336 /* 2 possibilities for PM_ST_MISS_L1 */
337 if (event == 0x0c13 || event == 0x0c23) {
338 alt[1] = event ^ (0x0c13 ^ 0x0c23);
339 return 2;
340 }
341
342 /* several possibilities for PM_INST_CMPL */
343 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
344 if (event == ppc_inst_cmpl[i]) {
345 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
346 if (j != i)
347 alt[na++] = ppc_inst_cmpl[j];
348 break;
349 }
350 }
351
352 return na;
353}
354
355static int p4_compute_mmcr(u64 event[], int n_ev,
356 unsigned int hwc[], u64 mmcr[])
357{
358 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
359 unsigned int pmc, unit, byte, psel, lower;
360 unsigned int ttm, grp;
361 unsigned int pmc_inuse = 0;
362 unsigned int pmc_grp_use[2];
363 unsigned char busbyte[4];
364 unsigned char unituse[16];
365 unsigned int unitlower = 0;
366 int i;
367
368 if (n_ev > 8)
369 return -1;
370
371 /* First pass to count resource use */
372 pmc_grp_use[0] = pmc_grp_use[1] = 0;
373 memset(busbyte, 0, sizeof(busbyte));
374 memset(unituse, 0, sizeof(unituse));
375 for (i = 0; i < n_ev; ++i) {
376 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
377 if (pmc) {
378 if (pmc_inuse & (1 << (pmc - 1)))
379 return -1;
380 pmc_inuse |= 1 << (pmc - 1);
381 /* count 1/2/5/6 vs 3/4/7/8 use */
382 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
383 }
384 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
385 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
386 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
387 if (unit) {
388 if (!pmc)
389 ++pmc_grp_use[byte & 1];
390 if (unit == 6 || unit == 8)
391 /* map alt ISU1/IFU codes: 6->2, 8->3 */
392 unit = (unit >> 1) - 1;
393 if (busbyte[byte] && busbyte[byte] != unit)
394 return -1;
395 busbyte[byte] = unit;
396 lower <<= unit;
397 if (unituse[unit] && lower != (unitlower & lower))
398 return -1;
399 unituse[unit] = 1;
400 unitlower |= lower;
401 }
402 }
403 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
404 return -1;
405
406 /*
407 * Assign resources and set multiplexer selects.
408 *
409 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
410 * Each TTMx can only select one unit, but since
411 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
412 * we have some choices.
413 */
414 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
415 unituse[6] = 1; /* Move 2 to 6 */
416 unituse[2] = 0;
417 }
418 if (unituse[3] & (unituse[1] | unituse[2])) {
419 unituse[8] = 1; /* Move 3 to 8 */
420 unituse[3] = 0;
421 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
422 }
423 /* Check only one unit per TTMx */
424 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
425 unituse[4] + unituse[6] + unituse[7] > 1 ||
426 unituse[8] + unituse[9] > 1 ||
427 (unituse[5] | unituse[10] | unituse[11] |
428 unituse[13] | unituse[14]))
429 return -1;
430
431 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
432 mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
433 mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
434 mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
435
436 /* Set TTCxSEL fields. */
437 if (unitlower & 0xe)
438 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
439 if (unitlower & 0xf0)
440 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
441 if (unitlower & 0xf00)
442 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
443 if (unitlower & 0x7000)
444 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
445
446 /* Set byte lane select fields. */
447 for (byte = 0; byte < 4; ++byte) {
448 unit = busbyte[byte];
449 if (!unit)
450 continue;
451 if (unit == 0xf) {
452 /* special case for GPS */
453 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
454 } else {
455 if (!unituse[unit])
456 ttm = unit - 1; /* 2->1, 3->2 */
457 else
458 ttm = unit >> 2;
459 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
460 }
461 }
462
463 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
464 for (i = 0; i < n_ev; ++i) {
465 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
466 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
467 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
468 psel = event[i] & PM_PMCSEL_MSK;
469 if (!pmc) {
470 /* Bus event or 00xxx direct event (off or cycles) */
471 if (unit)
472 psel |= 0x10 | ((byte & 2) << 2);
473 for (pmc = 0; pmc < 8; ++pmc) {
474 if (pmc_inuse & (1 << pmc))
475 continue;
476 grp = (pmc >> 1) & 1;
477 if (unit) {
478 if (grp == (byte & 1))
479 break;
480 } else if (pmc_grp_use[grp] < 4) {
481 ++pmc_grp_use[grp];
482 break;
483 }
484 }
485 pmc_inuse |= 1 << pmc;
486 } else {
487 /* Direct event */
488 --pmc;
489 if (psel == 0 && (byte & 2))
490 /* add events on higher-numbered bus */
491 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
492 else if (psel == 6 && byte == 3)
493 /* seem to need to set sample_enable here */
494 mmcra |= MMCRA_SAMPLE_ENABLE;
495 psel |= 8;
496 }
497 if (pmc <= 1)
498 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
499 else
500 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
501 if (pmc == 7) /* PMC8 */
502 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
503 hwc[i] = pmc;
504 if (p4_marked_instr_event(event[i]))
505 mmcra |= MMCRA_SAMPLE_ENABLE;
506 }
507
508 if (pmc_inuse & 1)
509 mmcr0 |= MMCR0_PMC1CE;
510 if (pmc_inuse & 0xfe)
511 mmcr0 |= MMCR0_PMCjCE;
512
513 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
514
515 /* Return MMCRx values */
516 mmcr[0] = mmcr0;
517 mmcr[1] = mmcr1;
518 mmcr[2] = mmcra;
519 return 0;
520}
521
522static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
523{
524 /*
525 * Setting the PMCxSEL field to 0 disables PMC x.
526 * (Note that pmc is 0-based here, not 1-based.)
527 */
528 if (pmc <= 1) {
529 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
530 } else {
531 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
532 if (pmc == 7)
533 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
534 }
535}
536
537static int p4_generic_events[] = {
538 [PERF_COUNT_HW_CPU_CYCLES] = 7,
539 [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
540 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
541 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
542 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
543 [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
544};
545
546#define C(x) PERF_COUNT_HW_CACHE_##x
547
548/*
549 * Table of generalized cache-related events.
550 * 0 means not supported, -1 means nonsensical, other values
551 * are event codes.
552 */
553static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
554 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
555 [C(OP_READ)] = { 0x8c10, 0x3c10 },
556 [C(OP_WRITE)] = { 0x7c10, 0xc13 },
557 [C(OP_PREFETCH)] = { 0xc35, 0 },
558 },
559 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
560 [C(OP_READ)] = { 0, 0 },
561 [C(OP_WRITE)] = { -1, -1 },
562 [C(OP_PREFETCH)] = { 0, 0 },
563 },
564 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
565 [C(OP_READ)] = { 0, 0 },
566 [C(OP_WRITE)] = { 0, 0 },
567 [C(OP_PREFETCH)] = { 0xc34, 0 },
568 },
569 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
570 [C(OP_READ)] = { 0, 0x904 },
571 [C(OP_WRITE)] = { -1, -1 },
572 [C(OP_PREFETCH)] = { -1, -1 },
573 },
574 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
575 [C(OP_READ)] = { 0, 0x900 },
576 [C(OP_WRITE)] = { -1, -1 },
577 [C(OP_PREFETCH)] = { -1, -1 },
578 },
579 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
580 [C(OP_READ)] = { 0x330, 0x331 },
581 [C(OP_WRITE)] = { -1, -1 },
582 [C(OP_PREFETCH)] = { -1, -1 },
583 },
584};
585
586struct power_pmu power4_pmu = {
587 .n_counter = 8,
588 .max_alternatives = 5,
589 .add_fields = 0x0000001100005555ull,
590 .test_adder = 0x0011083300000000ull,
591 .compute_mmcr = p4_compute_mmcr,
592 .get_constraint = p4_get_constraint,
593 .get_alternatives = p4_get_alternatives,
594 .disable_pmc = p4_disable_pmc,
595 .n_generic = ARRAY_SIZE(p4_generic_events),
596 .generic_events = p4_generic_events,
597 .cache_events = &power4_cache_events,
598};
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 000000000000..41e5d2d958d4
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,671 @@
1/*
2 * Performance counter support for POWER5+/++ (not POWER5) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5+
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
83 *
84 * NC - number of counters
85 * 51: NC error 0x0008_0000_0000_0000
86 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
87 *
88 * G0..G3 - GRS mux constraints
89 * 46-47: GRS_L2SEL value
90 * 44-45: GRS_L3SEL value
91 * 41-44: GRS_MCSEL value
92 * 39-40: GRS_FABSEL value
93 * Note that these match up with their bit positions in MMCR1
94 *
95 * T0 - TTM0 constraint
96 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
97 *
98 * T1 - TTM1 constraint
99 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 33: UC3 error 0x02_0000_0000
103 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
104 * 31: ISU0 events needed 0x01_8000_0000
105 * 30: IDU|GRS events needed 0x00_4000_0000
106 *
107 * B0
108 * 24-27: Byte 0 event source 0x0f00_0000
109 * Encoding as for the event code
110 *
111 * B1, B2, B3
112 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
113 *
114 * P6
115 * 11: P6 error 0x800
116 * 10-11: Count of events needing PMC6
117 *
118 * P1..P5
119 * 0-9: Count of events needing PMC1..PMC5
120 */
121
122static const int grsel_shift[8] = {
123 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
124 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
125 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
126};
127
128/* Masks and values for using events from the various units */
129static u64 unit_cons[PM_LASTUNIT+1][2] = {
130 [PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
131 [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
132 [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
133 [PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
134 [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
135 [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
136};
137
138static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
139{
140 int pmc, byte, unit, sh;
141 int bit, fmask;
142 u64 mask = 0, value = 0;
143
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) {
146 if (pmc > 6)
147 return -1;
148 sh = (pmc - 1) * 2;
149 mask |= 2 << sh;
150 value |= 1 << sh;
151 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
152 return -1;
153 }
154 if (event & PM_BUSEVENT_MSK) {
155 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
156 if (unit > PM_LASTUNIT)
157 return -1;
158 if (unit == PM_ISU0_ALT)
159 unit = PM_ISU0;
160 mask |= unit_cons[unit][0];
161 value |= unit_cons[unit][1];
162 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
163 if (byte >= 4) {
164 if (unit != PM_LSU1)
165 return -1;
166 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
167 ++unit;
168 byte &= 3;
169 }
170 if (unit == PM_GRS) {
171 bit = event & 7;
172 fmask = (bit == 6)? 7: 3;
173 sh = grsel_shift[bit];
174 mask |= (u64)fmask << sh;
175 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
176 }
177 /* Set byte lane select field */
178 mask |= 0xfULL << (24 - 4 * byte);
179 value |= (u64)unit << (24 - 4 * byte);
180 }
181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ull;
184 value |= 0x1000000000000ull;
185 }
186 *maskp = mask;
187 *valp = value;
188 return 0;
189}
190
191static int power5p_limited_pmc_event(u64 event)
192{
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194
195 return pmc == 5 || pmc == 6;
196}
197
198#define MAX_ALT 3 /* at most 3 alternatives for any event */
199
200static const unsigned int event_alternatives[][MAX_ALT] = {
201 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
202 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
203 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
204 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
212};
213
214/*
215 * Scan the alternatives table for a match and return the
216 * index into the alternatives table if found, else -1.
217 */
218static int find_alternative(unsigned int event)
219{
220 int i, j;
221
222 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
223 if (event < event_alternatives[i][0])
224 break;
225 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
226 if (event == event_alternatives[i][j])
227 return i;
228 }
229 return -1;
230}
231
232static const unsigned char bytedecode_alternatives[4][4] = {
233 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
234 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
235 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
236 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
237};
238
239/*
240 * Some direct events for decodes of event bus byte 3 have alternative
241 * PMCSEL values on other counters. This returns the alternative
242 * event code for those that do, or -1 otherwise. This also handles
243 * alternative PCMSEL values for add events.
244 */
245static s64 find_alternative_bdecode(u64 event)
246{
247 int pmc, altpmc, pp, j;
248
249 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
250 if (pmc == 0 || pmc > 4)
251 return -1;
252 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
253 pp = event & PM_PMCSEL_MSK;
254 for (j = 0; j < 4; ++j) {
255 if (bytedecode_alternatives[pmc - 1][j] == pp) {
256 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
257 (altpmc << PM_PMC_SH) |
258 bytedecode_alternatives[altpmc - 1][j];
259 }
260 }
261
262 /* new decode alternatives for power5+ */
263 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
264 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
265 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
266 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
267
268 /* alternative add event encodings */
269 if (pp == 0x10 || pp == 0x28)
270 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
271 (altpmc << PM_PMC_SH);
272
273 return -1;
274}
275
276static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
277{
278 int i, j, nalt = 1;
279 int nlim;
280 s64 ae;
281
282 alt[0] = event;
283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event);
285 i = find_alternative(event);
286 if (i >= 0) {
287 for (j = 0; j < MAX_ALT; ++j) {
288 ae = event_alternatives[i][j];
289 if (ae && ae != event)
290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae);
292 }
293 } else {
294 ae = find_alternative_bdecode(event);
295 if (ae > 0)
296 alt[nalt++] = ae;
297 }
298
299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /*
301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event.
309 */
310 j = nalt;
311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim;
316 break;
317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf;
319 break;
320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim;
323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009;
327 break;
328 }
329 }
330 nalt = j;
331 }
332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */
335 j = 0;
336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i];
339 ++j;
340 }
341 }
342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */
345 j = 0;
346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i];
349 ++j;
350 }
351 }
352 nalt = j;
353 }
354
355 return nalt;
356}
357
358/*
359 * Map of which direct events on which PMCs are marked instruction events.
360 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
361 * Bit 0 is set if it is marked for all PMCs.
362 * The 0x80 bit indicates a byte decode PMCSEL value.
363 */
364static unsigned char direct_event_is_marked[0x28] = {
365 0, /* 00 */
366 0x1f, /* 01 PM_IOPS_CMPL */
367 0x2, /* 02 PM_MRK_GRP_DISP */
368 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
369 0, /* 04 */
370 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
371 0x80, /* 06 */
372 0x80, /* 07 */
373 0, 0, 0,/* 08 - 0a */
374 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
375 0, /* 0c */
376 0x80, /* 0d */
377 0x80, /* 0e */
378 0, /* 0f */
379 0, /* 10 */
380 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
381 0, /* 12 */
382 0x10, /* 13 PM_MRK_GRP_CMPL */
383 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
384 0x2, /* 15 PM_MRK_GRP_ISSUED */
385 0x80, /* 16 */
386 0x80, /* 17 */
387 0, 0, 0, 0, 0,
388 0x80, /* 1d */
389 0x80, /* 1e */
390 0, /* 1f */
391 0x80, /* 20 */
392 0x80, /* 21 */
393 0x80, /* 22 */
394 0x80, /* 23 */
395 0x80, /* 24 */
396 0x80, /* 25 */
397 0x80, /* 26 */
398 0x80, /* 27 */
399};
400
401/*
402 * Returns 1 if event counts things relating to marked instructions
403 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
404 */
405static int power5p_marked_instr_event(u64 event)
406{
407 int pmc, psel;
408 int bit, byte, unit;
409 u32 mask;
410
411 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
412 psel = event & PM_PMCSEL_MSK;
413 if (pmc >= 5)
414 return 0;
415
416 bit = -1;
417 if (psel < sizeof(direct_event_is_marked)) {
418 if (direct_event_is_marked[psel] & (1 << pmc))
419 return 1;
420 if (direct_event_is_marked[psel] & 0x80)
421 bit = 4;
422 else if (psel == 0x08)
423 bit = pmc - 1;
424 else if (psel == 0x10)
425 bit = 4 - pmc;
426 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
427 bit = 4;
428 } else if ((psel & 0x48) == 0x40) {
429 bit = psel & 7;
430 } else if (psel == 0x28) {
431 bit = pmc - 1;
432 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
433 bit = 4;
434 }
435
436 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
437 return 0;
438
439 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
440 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
441 if (unit == PM_LSU0) {
442 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
443 mask = 0x5dff00;
444 } else if (unit == PM_LSU1 && byte >= 4) {
445 byte -= 4;
446 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
447 mask = 0x5f11c000;
448 } else
449 return 0;
450
451 return (mask >> (byte * 8 + bit)) & 1;
452}
453
454static int power5p_compute_mmcr(u64 event[], int n_ev,
455 unsigned int hwc[], u64 mmcr[])
456{
457 u64 mmcr1 = 0;
458 u64 mmcra = 0;
459 unsigned int pmc, unit, byte, psel;
460 unsigned int ttm;
461 int i, isbus, bit, grsel;
462 unsigned int pmc_inuse = 0;
463 unsigned char busbyte[4];
464 unsigned char unituse[16];
465 int ttmuse;
466
467 if (n_ev > 6)
468 return -1;
469
470 /* First pass to count resource use */
471 memset(busbyte, 0, sizeof(busbyte));
472 memset(unituse, 0, sizeof(unituse));
473 for (i = 0; i < n_ev; ++i) {
474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
475 if (pmc) {
476 if (pmc > 6)
477 return -1;
478 if (pmc_inuse & (1 << (pmc - 1)))
479 return -1;
480 pmc_inuse |= 1 << (pmc - 1);
481 }
482 if (event[i] & PM_BUSEVENT_MSK) {
483 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
484 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
485 if (unit > PM_LASTUNIT)
486 return -1;
487 if (unit == PM_ISU0_ALT)
488 unit = PM_ISU0;
489 if (byte >= 4) {
490 if (unit != PM_LSU1)
491 return -1;
492 ++unit;
493 byte &= 3;
494 }
495 if (busbyte[byte] && busbyte[byte] != unit)
496 return -1;
497 busbyte[byte] = unit;
498 unituse[unit] = 1;
499 }
500 }
501
502 /*
503 * Assign resources and set multiplexer selects.
504 *
505 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
506 * choice we have to deal with.
507 */
508 if (unituse[PM_ISU0] &
509 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
510 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
511 unituse[PM_ISU0] = 0;
512 }
513 /* Set TTM[01]SEL fields. */
514 ttmuse = 0;
515 for (i = PM_FPU; i <= PM_ISU1; ++i) {
516 if (!unituse[i])
517 continue;
518 if (ttmuse++)
519 return -1;
520 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
521 }
522 ttmuse = 0;
523 for (; i <= PM_GRS; ++i) {
524 if (!unituse[i])
525 continue;
526 if (ttmuse++)
527 return -1;
528 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
529 }
530 if (ttmuse > 1)
531 return -1;
532
533 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
534 for (byte = 0; byte < 4; ++byte) {
535 unit = busbyte[byte];
536 if (!unit)
537 continue;
538 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
539 /* get ISU0 through TTM1 rather than TTM0 */
540 unit = PM_ISU0_ALT;
541 } else if (unit == PM_LSU1 + 1) {
542 /* select lower word of LSU1 for this byte */
543 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
544 }
545 ttm = unit >> 2;
546 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
547 }
548
549 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
550 for (i = 0; i < n_ev; ++i) {
551 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
552 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
553 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
554 psel = event[i] & PM_PMCSEL_MSK;
555 isbus = event[i] & PM_BUSEVENT_MSK;
556 if (!pmc) {
557 /* Bus event or any-PMC direct event */
558 for (pmc = 0; pmc < 4; ++pmc) {
559 if (!(pmc_inuse & (1 << pmc)))
560 break;
561 }
562 if (pmc >= 4)
563 return -1;
564 pmc_inuse |= 1 << pmc;
565 } else if (pmc <= 4) {
566 /* Direct event */
567 --pmc;
568 if (isbus && (byte & 2) &&
569 (psel == 8 || psel == 0x10 || psel == 0x28))
570 /* add events on higher-numbered bus */
571 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
572 } else {
573 /* Instructions or run cycles on PMC5/6 */
574 --pmc;
575 }
576 if (isbus && unit == PM_GRS) {
577 bit = psel & 7;
578 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
579 mmcr1 |= (u64)grsel << grsel_shift[bit];
580 }
581 if (power5p_marked_instr_event(event[i]))
582 mmcra |= MMCRA_SAMPLE_ENABLE;
583 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
584 /* select alternate byte lane */
585 psel |= 0x10;
586 if (pmc <= 3)
587 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
588 hwc[i] = pmc;
589 }
590
591 /* Return MMCRx values */
592 mmcr[0] = 0;
593 if (pmc_inuse & 1)
594 mmcr[0] = MMCR0_PMC1CE;
595 if (pmc_inuse & 0x3e)
596 mmcr[0] |= MMCR0_PMCjCE;
597 mmcr[1] = mmcr1;
598 mmcr[2] = mmcra;
599 return 0;
600}
601
602static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
603{
604 if (pmc <= 3)
605 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
606}
607
608static int power5p_generic_events[] = {
609 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
610 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
611 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
612 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
613 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
614 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
615};
616
617#define C(x) PERF_COUNT_HW_CACHE_##x
618
619/*
620 * Table of generalized cache-related events.
621 * 0 means not supported, -1 means nonsensical, other values
622 * are event codes.
623 */
624static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
625 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
626 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
627 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
628 [C(OP_PREFETCH)] = { 0xc70e7, -1 },
629 },
630 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
631 [C(OP_READ)] = { 0, 0 },
632 [C(OP_WRITE)] = { -1, -1 },
633 [C(OP_PREFETCH)] = { 0, 0 },
634 },
635 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
636 [C(OP_READ)] = { 0, 0 },
637 [C(OP_WRITE)] = { 0, 0 },
638 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
639 },
640 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
641 [C(OP_READ)] = { 0xc20e4, 0x800c4 },
642 [C(OP_WRITE)] = { -1, -1 },
643 [C(OP_PREFETCH)] = { -1, -1 },
644 },
645 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
646 [C(OP_READ)] = { 0, 0x800c0 },
647 [C(OP_WRITE)] = { -1, -1 },
648 [C(OP_PREFETCH)] = { -1, -1 },
649 },
650 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
651 [C(OP_READ)] = { 0x230e4, 0x230e5 },
652 [C(OP_WRITE)] = { -1, -1 },
653 [C(OP_PREFETCH)] = { -1, -1 },
654 },
655};
656
657struct power_pmu power5p_pmu = {
658 .n_counter = 6,
659 .max_alternatives = MAX_ALT,
660 .add_fields = 0x7000000000055ull,
661 .test_adder = 0x3000040000000ull,
662 .compute_mmcr = power5p_compute_mmcr,
663 .get_constraint = power5p_get_constraint,
664 .get_alternatives = power5p_get_alternatives,
665 .disable_pmc = power5p_disable_pmc,
666 .limited_pmc_event = power5p_limited_pmc_event,
667 .flags = PPMU_LIMITED_PMC5_6,
668 .n_generic = ARRAY_SIZE(power5p_generic_events),
669 .generic_events = power5p_generic_events,
670 .cache_events = &power5p_cache_events,
671};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 000000000000..05600b66221a
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,611 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5 (not POWER5++)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
82 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
83 *
84 * T0 - TTM0 constraint
85 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
86 *
87 * T1 - TTM1 constraint
88 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
89 *
90 * NC - number of counters
91 * 51: NC error 0x0008_0000_0000_0000
92 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
93 *
94 * G0..G3 - GRS mux constraints
95 * 46-47: GRS_L2SEL value
96 * 44-45: GRS_L3SEL value
97 * 41-44: GRS_MCSEL value
98 * 39-40: GRS_FABSEL value
99 * Note that these match up with their bit positions in MMCR1
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 37: UC3 error 0x20_0000_0000
103 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
104 * 35: ISU0 events needed 0x08_0000_0000
105 * 34: IDU|GRS events needed 0x04_0000_0000
106 *
107 * PS1
108 * 33: PS1 error 0x2_0000_0000
109 * 31-32: count of events needing PMC1/2 0x1_8000_0000
110 *
111 * PS2
112 * 30: PS2 error 0x4000_0000
113 * 28-29: count of events needing PMC3/4 0x3000_0000
114 *
115 * B0
116 * 24-27: Byte 0 event source 0x0f00_0000
117 * Encoding as for the event code
118 *
119 * B1, B2, B3
120 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
121 *
122 * P1..P6
123 * 0-11: Count of events needing PMC1..PMC6
124 */
125
126static const int grsel_shift[8] = {
127 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
128 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
129 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
130};
131
132/* Masks and values for using events from the various units */
133static u64 unit_cons[PM_LASTUNIT+1][2] = {
134 [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
135 [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
136 [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
137 [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
138 [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
139 [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
140};
141
142static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
143{
144 int pmc, byte, unit, sh;
145 int bit, fmask;
146 u64 mask = 0, value = 0;
147 int grp = -1;
148
149 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
150 if (pmc) {
151 if (pmc > 6)
152 return -1;
153 sh = (pmc - 1) * 2;
154 mask |= 2 << sh;
155 value |= 1 << sh;
156 if (pmc <= 4)
157 grp = (pmc - 1) >> 1;
158 else if (event != 0x500009 && event != 0x600005)
159 return -1;
160 }
161 if (event & PM_BUSEVENT_MSK) {
162 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
163 if (unit > PM_LASTUNIT)
164 return -1;
165 if (unit == PM_ISU0_ALT)
166 unit = PM_ISU0;
167 mask |= unit_cons[unit][0];
168 value |= unit_cons[unit][1];
169 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
170 if (byte >= 4) {
171 if (unit != PM_LSU1)
172 return -1;
173 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
174 ++unit;
175 byte &= 3;
176 }
177 if (unit == PM_GRS) {
178 bit = event & 7;
179 fmask = (bit == 6)? 7: 3;
180 sh = grsel_shift[bit];
181 mask |= (u64)fmask << sh;
182 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
183 }
184 /*
185 * Bus events on bytes 0 and 2 can be counted
186 * on PMC1/2; bytes 1 and 3 on PMC3/4.
187 */
188 if (!pmc)
189 grp = byte & 1;
190 /* Set byte lane select field */
191 mask |= 0xfULL << (24 - 4 * byte);
192 value |= (u64)unit << (24 - 4 * byte);
193 }
194 if (grp == 0) {
195 /* increment PMC1/2 field */
196 mask |= 0x200000000ull;
197 value |= 0x080000000ull;
198 } else if (grp == 1) {
199 /* increment PMC3/4 field */
200 mask |= 0x40000000ull;
201 value |= 0x10000000ull;
202 }
203 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ull;
206 value |= 0x1000000000000ull;
207 }
208 *maskp = mask;
209 *valp = value;
210 return 0;
211}
212
213#define MAX_ALT 3 /* at most 3 alternatives for any event */
214
215static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
220 { 0x300009, 0x400009 }, /* PM_INST_DISP */
221};
222
223/*
224 * Scan the alternatives table for a match and return the
225 * index into the alternatives table if found, else -1.
226 */
227static int find_alternative(u64 event)
228{
229 int i, j;
230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0])
233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j])
236 return i;
237 }
238 return -1;
239}
240
241static const unsigned char bytedecode_alternatives[4][4] = {
242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
246};
247
248/*
249 * Some direct events for decodes of event bus byte 3 have alternative
250 * PMCSEL values on other counters. This returns the alternative
251 * event code for those that do, or -1 otherwise.
252 */
253static s64 find_alternative_bdecode(u64 event)
254{
255 int pmc, altpmc, pp, j;
256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4)
259 return -1;
260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j];
267 }
268 }
269 return -1;
270}
271
272static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
273{
274 int i, j, nalt = 1;
275 s64 ae;
276
277 alt[0] = event;
278 nalt = 1;
279 i = find_alternative(event);
280 if (i >= 0) {
281 for (j = 0; j < MAX_ALT; ++j) {
282 ae = event_alternatives[i][j];
283 if (ae && ae != event)
284 alt[nalt++] = ae;
285 }
286 } else {
287 ae = find_alternative_bdecode(event);
288 if (ae > 0)
289 alt[nalt++] = ae;
290 }
291 return nalt;
292}
293
294/*
295 * Map of which direct events on which PMCs are marked instruction events.
296 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
297 * Bit 0 is set if it is marked for all PMCs.
298 * The 0x80 bit indicates a byte decode PMCSEL value.
299 */
300static unsigned char direct_event_is_marked[0x28] = {
301 0, /* 00 */
302 0x1f, /* 01 PM_IOPS_CMPL */
303 0x2, /* 02 PM_MRK_GRP_DISP */
304 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
305 0, /* 04 */
306 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
307 0x80, /* 06 */
308 0x80, /* 07 */
309 0, 0, 0,/* 08 - 0a */
310 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
311 0, /* 0c */
312 0x80, /* 0d */
313 0x80, /* 0e */
314 0, /* 0f */
315 0, /* 10 */
316 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
317 0, /* 12 */
318 0x10, /* 13 PM_MRK_GRP_CMPL */
319 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
320 0x2, /* 15 PM_MRK_GRP_ISSUED */
321 0x80, /* 16 */
322 0x80, /* 17 */
323 0, 0, 0, 0, 0,
324 0x80, /* 1d */
325 0x80, /* 1e */
326 0, /* 1f */
327 0x80, /* 20 */
328 0x80, /* 21 */
329 0x80, /* 22 */
330 0x80, /* 23 */
331 0x80, /* 24 */
332 0x80, /* 25 */
333 0x80, /* 26 */
334 0x80, /* 27 */
335};
336
337/*
338 * Returns 1 if event counts things relating to marked instructions
339 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
340 */
341static int power5_marked_instr_event(u64 event)
342{
343 int pmc, psel;
344 int bit, byte, unit;
345 u32 mask;
346
347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
348 psel = event & PM_PMCSEL_MSK;
349 if (pmc >= 5)
350 return 0;
351
352 bit = -1;
353 if (psel < sizeof(direct_event_is_marked)) {
354 if (direct_event_is_marked[psel] & (1 << pmc))
355 return 1;
356 if (direct_event_is_marked[psel] & 0x80)
357 bit = 4;
358 else if (psel == 0x08)
359 bit = pmc - 1;
360 else if (psel == 0x10)
361 bit = 4 - pmc;
362 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
363 bit = 4;
364 } else if ((psel & 0x58) == 0x40)
365 bit = psel & 7;
366
367 if (!(event & PM_BUSEVENT_MSK))
368 return 0;
369
370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
372 if (unit == PM_LSU0) {
373 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
374 mask = 0x5dff00;
375 } else if (unit == PM_LSU1 && byte >= 4) {
376 byte -= 4;
377 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
378 mask = 0x5f00c0aa;
379 } else
380 return 0;
381
382 return (mask >> (byte * 8 + bit)) & 1;
383}
384
385static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], u64 mmcr[])
387{
388 u64 mmcr1 = 0;
389 u64 mmcra = 0;
390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4];
396 unsigned char unituse[16];
397 int ttmuse;
398
399 if (n_ev > 6)
400 return -1;
401
402 /* First pass to count resource use */
403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) {
409 if (pmc > 6)
410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1;
413 pmc_inuse |= 1 << (pmc - 1);
414 /* count 1/2 vs 3/4 use */
415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1];
417 }
418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT)
422 return -1;
423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0;
425 if (byte >= 4) {
426 if (unit != PM_LSU1)
427 return -1;
428 ++unit;
429 byte &= 3;
430 }
431 if (!pmc)
432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1;
435 busbyte[byte] = unit;
436 unituse[unit] = 1;
437 }
438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1;
441
442 /*
443 * Assign resources and set multiplexer selects.
444 *
445 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
446 * choice we have to deal with.
447 */
448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
451 unituse[PM_ISU0] = 0;
452 }
453 /* Set TTM[01]SEL fields. */
454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i])
457 continue;
458 if (ttmuse++)
459 return -1;
460 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
461 }
462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i])
465 continue;
466 if (ttmuse++)
467 return -1;
468 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
469 }
470 if (ttmuse > 1)
471 return -1;
472
473 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte];
476 if (!unit)
477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479 /* get ISU0 through TTM1 rather than TTM0 */
480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) {
482 /* select lower word of LSU1 for this byte */
483 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
484 }
485 ttm = unit >> 2;
486 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
487 }
488
489 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
490 for (i = 0; i < n_ev; ++i) {
491 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
492 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
493 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
494 psel = event[i] & PM_PMCSEL_MSK;
495 isbus = event[i] & PM_BUSEVENT_MSK;
496 if (!pmc) {
497 /* Bus event or any-PMC direct event */
498 for (pmc = 0; pmc < 4; ++pmc) {
499 if (pmc_inuse & (1 << pmc))
500 continue;
501 grp = (pmc >> 1) & 1;
502 if (isbus) {
503 if (grp == (byte & 1))
504 break;
505 } else if (pmc_grp_use[grp] < 2) {
506 ++pmc_grp_use[grp];
507 break;
508 }
509 }
510 pmc_inuse |= 1 << pmc;
511 } else if (pmc <= 4) {
512 /* Direct event */
513 --pmc;
514 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
515 /* add events on higher-numbered bus */
516 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
517 } else {
518 /* Instructions or run cycles on PMC5/6 */
519 --pmc;
520 }
521 if (isbus && unit == PM_GRS) {
522 bit = psel & 7;
523 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
524 mmcr1 |= (u64)grsel << grsel_shift[bit];
525 }
526 if (power5_marked_instr_event(event[i]))
527 mmcra |= MMCRA_SAMPLE_ENABLE;
528 if (pmc <= 3)
529 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
530 hwc[i] = pmc;
531 }
532
533 /* Return MMCRx values */
534 mmcr[0] = 0;
535 if (pmc_inuse & 1)
536 mmcr[0] = MMCR0_PMC1CE;
537 if (pmc_inuse & 0x3e)
538 mmcr[0] |= MMCR0_PMCjCE;
539 mmcr[1] = mmcr1;
540 mmcr[2] = mmcra;
541 return 0;
542}
543
544static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
545{
546 if (pmc <= 3)
547 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
548}
549
550static int power5_generic_events[] = {
551 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
552 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
553 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
554 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
555 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
556 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
557};
558
559#define C(x) PERF_COUNT_HW_CACHE_##x
560
561/*
562 * Table of generalized cache-related events.
563 * 0 means not supported, -1 means nonsensical, other values
564 * are event codes.
565 */
566static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
567 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
568 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
569 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
570 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
571 },
572 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
573 [C(OP_READ)] = { 0, 0 },
574 [C(OP_WRITE)] = { -1, -1 },
575 [C(OP_PREFETCH)] = { 0, 0 },
576 },
577 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
578 [C(OP_READ)] = { 0, 0x3c309b },
579 [C(OP_WRITE)] = { 0, 0 },
580 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
581 },
582 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
583 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
584 [C(OP_WRITE)] = { -1, -1 },
585 [C(OP_PREFETCH)] = { -1, -1 },
586 },
587 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
588 [C(OP_READ)] = { 0, 0x800c0 },
589 [C(OP_WRITE)] = { -1, -1 },
590 [C(OP_PREFETCH)] = { -1, -1 },
591 },
592 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
593 [C(OP_READ)] = { 0x230e4, 0x230e5 },
594 [C(OP_WRITE)] = { -1, -1 },
595 [C(OP_PREFETCH)] = { -1, -1 },
596 },
597};
598
599struct power_pmu power5_pmu = {
600 .n_counter = 6,
601 .max_alternatives = MAX_ALT,
602 .add_fields = 0x7000090000555ull,
603 .test_adder = 0x3000490000000ull,
604 .compute_mmcr = power5_compute_mmcr,
605 .get_constraint = power5_get_constraint,
606 .get_alternatives = power5_get_alternatives,
607 .disable_pmc = power5_disable_pmc,
608 .n_generic = ARRAY_SIZE(power5_generic_events),
609 .generic_events = power5_generic_events,
610 .cache_events = &power5_cache_events,
611};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 000000000000..46f74bebcfd9
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,532 @@
1/*
2 * Performance counter support for POWER6 processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER6
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0x7
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
22#define PM_UNIT_MSK 0xf
23#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
24#define PM_LLAV 0x8000 /* Load lookahead match value */
25#define PM_LLA 0x4000 /* Load lookahead match enable */
26#define PM_BYTE_SH 12 /* Byte of event bus to use */
27#define PM_BYTE_MSK 3
28#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
29#define PM_SUBUNIT_MSK 7
30#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
31#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
32#define PM_BUSEVENT_MSK 0xf3700
33
34/*
35 * Bits in MMCR1 for POWER6
36 */
37#define MMCR1_TTM0SEL_SH 60
38#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
39#define MMCR1_TTMSEL_MSK 0xf
40#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
41#define MMCR1_NESTSEL_SH 45
42#define MMCR1_NESTSEL_MSK 0x7
43#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
44#define MMCR1_PMC1_LLA ((u64)1 << 44)
45#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
46#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
47#define MMCR1_PMC1SEL_SH 24
48#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49#define MMCR1_PMCSEL_MSK 0xff
50
51/*
52 * Map of which direct events on which PMCs are marked instruction events.
53 * Indexed by PMCSEL value >> 1.
54 * Bottom 4 bits are a map of which PMCs are interesting,
55 * top 4 bits say what sort of event:
56 * 0 = direct marked event,
57 * 1 = byte decode event,
58 * 4 = add/and event (PMC1 -> bits 0 & 4),
59 * 5 = add/and event (PMC1 -> bits 1 & 5),
60 * 6 = add/and event (PMC1 -> bits 2 & 6),
61 * 7 = add/and event (PMC1 -> bits 3 & 7).
62 */
63static unsigned char direct_event_is_marked[0x60 >> 1] = {
64 0, /* 00 */
65 0, /* 02 */
66 0, /* 04 */
67 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
68 0x04, /* 08 PM_MRK_DFU_FIN */
69 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
70 0, /* 0c */
71 0, /* 0e */
72 0x02, /* 10 PM_MRK_INST_DISP */
73 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
74 0, /* 14 */
75 0, /* 16 */
76 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
77 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
78 0x01, /* 1c PM_MRK_INST_ISSUED */
79 0, /* 1e */
80 0, /* 20 */
81 0, /* 22 */
82 0, /* 24 */
83 0, /* 26 */
84 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
85 0, /* 2a */
86 0, /* 2c */
87 0, /* 2e */
88 0x4f, /* 30 */
89 0x7f, /* 32 */
90 0x4f, /* 34 */
91 0x5f, /* 36 */
92 0x6f, /* 38 */
93 0x4f, /* 3a */
94 0, /* 3c */
95 0x08, /* 3e PM_MRK_INST_TIMEO */
96 0x1f, /* 40 */
97 0x1f, /* 42 */
98 0x1f, /* 44 */
99 0x1f, /* 46 */
100 0x1f, /* 48 */
101 0x1f, /* 4a */
102 0x1f, /* 4c */
103 0x1f, /* 4e */
104 0, /* 50 */
105 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
106 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
107 0x02, /* 56 PM_MRK_LD_MISS_L1 */
108 0, /* 58 */
109 0, /* 5a */
110 0, /* 5c */
111 0, /* 5e */
112};
113
114/*
115 * Masks showing for each unit which bits are marked events.
116 * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
117 */
118static u32 marked_bus_events[16] = {
119 0x01000000, /* direct events set 1: byte 3 bit 0 */
120 0x00010000, /* direct events set 2: byte 2 bit 0 */
121 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
122 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
123 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
124 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
125 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
126 0, /* LSU set 3 */
127 0x00000010, /* VMX set 3: byte 0 bit 4 */
128 0, /* BFP set 1 */
129 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
130 0, 0
131};
132
133/*
134 * Returns 1 if event counts things relating to marked instructions
135 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
136 */
137static int power6_marked_instr_event(u64 event)
138{
139 int pmc, psel, ptype;
140 int bit, byte, unit;
141 u32 mask;
142
143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
144 psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
145 if (pmc >= 5)
146 return 0;
147
148 bit = -1;
149 if (psel < sizeof(direct_event_is_marked)) {
150 ptype = direct_event_is_marked[psel];
151 if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
152 return 0;
153 ptype >>= 4;
154 if (ptype == 0)
155 return 1;
156 if (ptype == 1)
157 bit = 0;
158 else
159 bit = ptype ^ (pmc - 1);
160 } else if ((psel & 0x48) == 0x40)
161 bit = psel & 7;
162
163 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
164 return 0;
165
166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
167 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
168 mask = marked_bus_events[unit];
169 return (mask >> (byte * 8 + bit)) & 1;
170}
171
172/*
173 * Assign PMC numbers and compute MMCR1 value for a set of events
174 */
175static int p6_compute_mmcr(u64 event[], int n_ev,
176 unsigned int hwc[], u64 mmcr[])
177{
178 u64 mmcr1 = 0;
179 u64 mmcra = 0;
180 int i;
181 unsigned int pmc, ev, b, u, s, psel;
182 unsigned int ttmset = 0;
183 unsigned int pmc_inuse = 0;
184
185 if (n_ev > 6)
186 return -1;
187 for (i = 0; i < n_ev; ++i) {
188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
189 if (pmc) {
190 if (pmc_inuse & (1 << (pmc - 1)))
191 return -1; /* collision! */
192 pmc_inuse |= 1 << (pmc - 1);
193 }
194 }
195 for (i = 0; i < n_ev; ++i) {
196 ev = event[i];
197 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
198 if (pmc) {
199 --pmc;
200 } else {
201 /* can go on any PMC; find a free one */
202 for (pmc = 0; pmc < 4; ++pmc)
203 if (!(pmc_inuse & (1 << pmc)))
204 break;
205 if (pmc >= 4)
206 return -1;
207 pmc_inuse |= 1 << pmc;
208 }
209 hwc[i] = pmc;
210 psel = ev & PM_PMCSEL_MSK;
211 if (ev & PM_BUSEVENT_MSK) {
212 /* this event uses the event bus */
213 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
214 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
215 /* check for conflict on this byte of event bus */
216 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
217 return -1;
218 mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
219 ttmset |= 1 << b;
220 if (u == 5) {
221 /* Nest events have a further mux */
222 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
223 if ((ttmset & 0x10) &&
224 MMCR1_NESTSEL(mmcr1) != s)
225 return -1;
226 ttmset |= 0x10;
227 mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
228 }
229 if (0x30 <= psel && psel <= 0x3d) {
230 /* these need the PMCx_ADDR_SEL bits */
231 if (b >= 2)
232 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
233 }
234 /* bus select values are different for PMC3/4 */
235 if (pmc >= 2 && (psel & 0x90) == 0x80)
236 psel ^= 0x20;
237 }
238 if (ev & PM_LLA) {
239 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
240 if (ev & PM_LLAV)
241 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
242 }
243 if (power6_marked_instr_event(event[i]))
244 mmcra |= MMCRA_SAMPLE_ENABLE;
245 if (pmc < 4)
246 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
247 }
248 mmcr[0] = 0;
249 if (pmc_inuse & 1)
250 mmcr[0] = MMCR0_PMC1CE;
251 if (pmc_inuse & 0xe)
252 mmcr[0] |= MMCR0_PMCjCE;
253 mmcr[1] = mmcr1;
254 mmcr[2] = mmcra;
255 return 0;
256}
257
258/*
259 * Layout of constraint bits:
260 *
261 * 0-1 add field: number of uses of PMC1 (max 1)
262 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
263 * 12-15 add field: number of uses of PMC1-4 (max 4)
264 * 16-19 select field: unit on byte 0 of event bus
265 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
266 * 32-34 select field: nest (subunit) event selector
267 */
268static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
269{
270 int pmc, byte, sh, subunit;
271 u64 mask = 0, value = 0;
272
273 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
274 if (pmc) {
275 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
276 return -1;
277 sh = (pmc - 1) * 2;
278 mask |= 2 << sh;
279 value |= 1 << sh;
280 }
281 if (event & PM_BUSEVENT_MSK) {
282 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
283 sh = byte * 4 + (16 - PM_UNIT_SH);
284 mask |= PM_UNIT_MSKS << sh;
285 value |= (u64)(event & PM_UNIT_MSKS) << sh;
286 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
287 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
288 mask |= (u64)PM_SUBUNIT_MSK << 32;
289 value |= (u64)subunit << 32;
290 }
291 }
292 if (pmc <= 4) {
293 mask |= 0x8000; /* add field for count of PMC1-4 uses */
294 value |= 0x1000;
295 }
296 *maskp = mask;
297 *valp = value;
298 return 0;
299}
300
301static int p6_limited_pmc_event(u64 event)
302{
303 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
304
305 return pmc == 5 || pmc == 6;
306}
307
308#define MAX_ALT 4 /* at most 4 alternatives for any event */
309
310static const unsigned int event_alternatives[][MAX_ALT] = {
311 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
312 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
313 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
314 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
315 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
316 { 0x10000e, 0x400010 }, /* PM_PURR */
317 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
318 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
319 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
320 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
321 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
322 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
323 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
324 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
325 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
326 { 0x200012, 0x300012 }, /* PM_INST_DISP */
327 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
328 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
329 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
330 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
331 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
332 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
333 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
334};
335
336/*
337 * This could be made more efficient with a binary search on
338 * a presorted list, if necessary
339 */
340static int find_alternatives_list(u64 event)
341{
342 int i, j;
343 unsigned int alt;
344
345 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
346 if (event < event_alternatives[i][0])
347 return -1;
348 for (j = 0; j < MAX_ALT; ++j) {
349 alt = event_alternatives[i][j];
350 if (!alt || event < alt)
351 break;
352 if (event == alt)
353 return i;
354 }
355 }
356 return -1;
357}
358
359static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
360{
361 int i, j, nlim;
362 unsigned int psel, pmc;
363 unsigned int nalt = 1;
364 u64 aevent;
365
366 alt[0] = event;
367 nlim = p6_limited_pmc_event(event);
368
369 /* check the alternatives table */
370 i = find_alternatives_list(event);
371 if (i >= 0) {
372 /* copy out alternatives from list */
373 for (j = 0; j < MAX_ALT; ++j) {
374 aevent = event_alternatives[i][j];
375 if (!aevent)
376 break;
377 if (aevent != event)
378 alt[nalt++] = aevent;
379 nlim += p6_limited_pmc_event(aevent);
380 }
381
382 } else {
383 /* Check for alternative ways of computing sum events */
384 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
385 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
386 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
387 if (pmc && (psel == 0x32 || psel == 0x34))
388 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
389 ((5 - pmc) << PM_PMC_SH);
390
391 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
392 if (pmc && (psel == 0x38 || psel == 0x3a))
393 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
394 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
395 }
396
397 if (flags & PPMU_ONLY_COUNT_RUN) {
398 /*
399 * We're only counting in RUN state,
400 * so PM_CYC is equivalent to PM_RUN_CYC,
401 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
402 * This doesn't include alternatives that don't provide
403 * any extra flexibility in assigning PMCs (e.g.
404 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
405 * Note that even with these additional alternatives
406 * we never end up with more than 4 alternatives for any event.
407 */
408 j = nalt;
409 for (i = 0; i < nalt; ++i) {
410 switch (alt[i]) {
411 case 0x1e: /* PM_CYC */
412 alt[j++] = 0x600005; /* PM_RUN_CYC */
413 ++nlim;
414 break;
415 case 0x10000a: /* PM_RUN_CYC */
416 alt[j++] = 0x1e; /* PM_CYC */
417 break;
418 case 2: /* PM_INST_CMPL */
419 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
420 ++nlim;
421 break;
422 case 0x500009: /* PM_RUN_INST_CMPL */
423 alt[j++] = 2; /* PM_INST_CMPL */
424 break;
425 case 0x10000e: /* PM_PURR */
426 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
427 break;
428 case 0x4000f4: /* PM_RUN_PURR */
429 alt[j++] = 0x10000e; /* PM_PURR */
430 break;
431 }
432 }
433 nalt = j;
434 }
435
436 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
437 /* remove the limited PMC events */
438 j = 0;
439 for (i = 0; i < nalt; ++i) {
440 if (!p6_limited_pmc_event(alt[i])) {
441 alt[j] = alt[i];
442 ++j;
443 }
444 }
445 nalt = j;
446 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
447 /* remove all but the limited PMC events */
448 j = 0;
449 for (i = 0; i < nalt; ++i) {
450 if (p6_limited_pmc_event(alt[i])) {
451 alt[j] = alt[i];
452 ++j;
453 }
454 }
455 nalt = j;
456 }
457
458 return nalt;
459}
460
461static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
462{
463 /* Set PMCxSEL to 0 to disable PMCx */
464 if (pmc <= 3)
465 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
466}
467
468static int power6_generic_events[] = {
469 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
470 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
471 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
472 [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
473 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
474 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
475};
476
477#define C(x) PERF_COUNT_HW_CACHE_##x
478
479/*
480 * Table of generalized cache-related events.
481 * 0 means not supported, -1 means nonsensical, other values
482 * are event codes.
483 * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
484 */
485static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
486 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
487 [C(OP_READ)] = { 0x80082, 0x80080 },
488 [C(OP_WRITE)] = { 0x80086, 0x80088 },
489 [C(OP_PREFETCH)] = { 0x810a4, 0 },
490 },
491 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
492 [C(OP_READ)] = { 0, 0x100056 },
493 [C(OP_WRITE)] = { -1, -1 },
494 [C(OP_PREFETCH)] = { 0x4008c, 0 },
495 },
496 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
497 [C(OP_READ)] = { 0x150730, 0x250532 },
498 [C(OP_WRITE)] = { 0x250432, 0x150432 },
499 [C(OP_PREFETCH)] = { 0x810a6, 0 },
500 },
501 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
502 [C(OP_READ)] = { 0, 0x20000e },
503 [C(OP_WRITE)] = { -1, -1 },
504 [C(OP_PREFETCH)] = { -1, -1 },
505 },
506 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
507 [C(OP_READ)] = { 0, 0x420ce },
508 [C(OP_WRITE)] = { -1, -1 },
509 [C(OP_PREFETCH)] = { -1, -1 },
510 },
511 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
512 [C(OP_READ)] = { 0x430e6, 0x400052 },
513 [C(OP_WRITE)] = { -1, -1 },
514 [C(OP_PREFETCH)] = { -1, -1 },
515 },
516};
517
518struct power_pmu power6_pmu = {
519 .n_counter = 6,
520 .max_alternatives = MAX_ALT,
521 .add_fields = 0x1555,
522 .test_adder = 0x3000,
523 .compute_mmcr = p6_compute_mmcr,
524 .get_constraint = p6_get_constraint,
525 .get_alternatives = p6_get_alternatives,
526 .disable_pmc = p6_disable_pmc,
527 .limited_pmc_event = p6_limited_pmc_event,
528 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
529 .n_generic = ARRAY_SIZE(power6_generic_events),
530 .generic_events = power6_generic_events,
531 .cache_events = &power6_cache_events,
532};
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
new file mode 100644
index 000000000000..b3f7d1216bae
--- /dev/null
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -0,0 +1,357 @@
1/*
2 * Performance counter support for POWER7 processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER7
17 */
18#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_COMBINE_SH 11 /* Combined event bit */
24#define PM_COMBINE_MSK 1
25#define PM_COMBINE_MSKS 0x800
26#define PM_L2SEL_SH 8 /* L2 event select */
27#define PM_L2SEL_MSK 7
28#define PM_PMCSEL_MSK 0xff
29
30/*
31 * Bits in MMCR1 for POWER7
32 */
33#define MMCR1_TTM0SEL_SH 60
34#define MMCR1_TTM1SEL_SH 56
35#define MMCR1_TTM2SEL_SH 52
36#define MMCR1_TTM3SEL_SH 48
37#define MMCR1_TTMSEL_MSK 0xf
38#define MMCR1_L2SEL_SH 45
39#define MMCR1_L2SEL_MSK 7
40#define MMCR1_PMC1_COMBINE_SH 35
41#define MMCR1_PMC2_COMBINE_SH 34
42#define MMCR1_PMC3_COMBINE_SH 33
43#define MMCR1_PMC4_COMBINE_SH 32
44#define MMCR1_PMC1SEL_SH 24
45#define MMCR1_PMC2SEL_SH 16
46#define MMCR1_PMC3SEL_SH 8
47#define MMCR1_PMC4SEL_SH 0
48#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49#define MMCR1_PMCSEL_MSK 0xff
50
51/*
52 * Bits in MMCRA
53 */
54
55/*
56 * Layout of constraint bits:
57 * 6666555555555544444444443333333333222222222211111111110000000000
58 * 3210987654321098765432109876543210987654321098765432109876543210
59 * [ ><><><><><><>
60 * NC P6P5P4P3P2P1
61 *
62 * NC - number of counters
63 * 15: NC error 0x8000
64 * 12-14: number of events needing PMC1-4 0x7000
65 *
66 * P6
67 * 11: P6 error 0x800
68 * 10-11: Count of events needing PMC6
69 *
70 * P1..P5
71 * 0-9: Count of events needing PMC1..PMC5
72 */
73
74static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp)
75{
76 int pmc, sh;
77 u64 mask = 0, value = 0;
78
79 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
80 if (pmc) {
81 if (pmc > 6)
82 return -1;
83 sh = (pmc - 1) * 2;
84 mask |= 2 << sh;
85 value |= 1 << sh;
86 if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
87 return -1;
88 }
89 if (pmc < 5) {
90 /* need a counter from PMC1-4 set */
91 mask |= 0x8000;
92 value |= 0x1000;
93 }
94 *maskp = mask;
95 *valp = value;
96 return 0;
97}
98
99#define MAX_ALT 2 /* at most 2 alternatives for any event */
100
101static const unsigned int event_alternatives[][MAX_ALT] = {
102 { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
103 { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
104 { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
105};
106
107/*
108 * Scan the alternatives table for a match and return the
109 * index into the alternatives table if found, else -1.
110 */
111static int find_alternative(u64 event)
112{
113 int i, j;
114
115 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
116 if (event < event_alternatives[i][0])
117 break;
118 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
119 if (event == event_alternatives[i][j])
120 return i;
121 }
122 return -1;
123}
124
125static s64 find_alternative_decode(u64 event)
126{
127 int pmc, psel;
128
129 /* this only handles the 4x decode events */
130 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
131 psel = event & PM_PMCSEL_MSK;
132 if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
133 return event - (1 << PM_PMC_SH) + 8;
134 if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
135 return event + (1 << PM_PMC_SH) - 8;
136 return -1;
137}
138
139static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
140{
141 int i, j, nalt = 1;
142 s64 ae;
143
144 alt[0] = event;
145 nalt = 1;
146 i = find_alternative(event);
147 if (i >= 0) {
148 for (j = 0; j < MAX_ALT; ++j) {
149 ae = event_alternatives[i][j];
150 if (ae && ae != event)
151 alt[nalt++] = ae;
152 }
153 } else {
154 ae = find_alternative_decode(event);
155 if (ae > 0)
156 alt[nalt++] = ae;
157 }
158
159 if (flags & PPMU_ONLY_COUNT_RUN) {
160 /*
161 * We're only counting in RUN state,
162 * so PM_CYC is equivalent to PM_RUN_CYC
163 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
164 * This doesn't include alternatives that don't provide
165 * any extra flexibility in assigning PMCs.
166 */
167 j = nalt;
168 for (i = 0; i < nalt; ++i) {
169 switch (alt[i]) {
170 case 0x1e: /* PM_CYC */
171 alt[j++] = 0x600f4; /* PM_RUN_CYC */
172 break;
173 case 0x600f4: /* PM_RUN_CYC */
174 alt[j++] = 0x1e;
175 break;
176 case 0x2: /* PM_PPC_CMPL */
177 alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
178 break;
179 case 0x500fa: /* PM_RUN_INST_CMPL */
180 alt[j++] = 0x2; /* PM_PPC_CMPL */
181 break;
182 }
183 }
184 nalt = j;
185 }
186
187 return nalt;
188}
189
190/*
191 * Returns 1 if event counts things relating to marked instructions
192 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
193 */
194static int power7_marked_instr_event(u64 event)
195{
196 int pmc, psel;
197 int unit;
198
199 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
200 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
201 psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
202 if (pmc >= 5)
203 return 0;
204
205 switch (psel >> 4) {
206 case 2:
207 return pmc == 2 || pmc == 4;
208 case 3:
209 if (psel == 0x3c)
210 return pmc == 1;
211 if (psel == 0x3e)
212 return pmc != 2;
213 return 1;
214 case 4:
215 case 5:
216 return unit == 0xd;
217 case 6:
218 if (psel == 0x64)
219 return pmc >= 3;
220 case 8:
221 return unit == 0xd;
222 }
223 return 0;
224}
225
226static int power7_compute_mmcr(u64 event[], int n_ev,
227 unsigned int hwc[], u64 mmcr[])
228{
229 u64 mmcr1 = 0;
230 u64 mmcra = 0;
231 unsigned int pmc, unit, combine, l2sel, psel;
232 unsigned int pmc_inuse = 0;
233 int i;
234
235 /* First pass to count resource use */
236 for (i = 0; i < n_ev; ++i) {
237 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
238 if (pmc) {
239 if (pmc > 6)
240 return -1;
241 if (pmc_inuse & (1 << (pmc - 1)))
242 return -1;
243 pmc_inuse |= 1 << (pmc - 1);
244 }
245 }
246
247 /* Second pass: assign PMCs, set all MMCR1 fields */
248 for (i = 0; i < n_ev; ++i) {
249 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
250 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
251 combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
252 l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
253 psel = event[i] & PM_PMCSEL_MSK;
254 if (!pmc) {
255 /* Bus event or any-PMC direct event */
256 for (pmc = 0; pmc < 4; ++pmc) {
257 if (!(pmc_inuse & (1 << pmc)))
258 break;
259 }
260 if (pmc >= 4)
261 return -1;
262 pmc_inuse |= 1 << pmc;
263 } else {
264 /* Direct or decoded event */
265 --pmc;
266 }
267 if (pmc <= 3) {
268 mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc);
269 mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc);
270 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
271 if (unit == 6) /* L2 events */
272 mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH;
273 }
274 if (power7_marked_instr_event(event[i]))
275 mmcra |= MMCRA_SAMPLE_ENABLE;
276 hwc[i] = pmc;
277 }
278
279 /* Return MMCRx values */
280 mmcr[0] = 0;
281 if (pmc_inuse & 1)
282 mmcr[0] = MMCR0_PMC1CE;
283 if (pmc_inuse & 0x3e)
284 mmcr[0] |= MMCR0_PMCjCE;
285 mmcr[1] = mmcr1;
286 mmcr[2] = mmcra;
287 return 0;
288}
289
290static void power7_disable_pmc(unsigned int pmc, u64 mmcr[])
291{
292 if (pmc <= 3)
293 mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc));
294}
295
296static int power7_generic_events[] = {
297 [PERF_COUNT_CPU_CYCLES] = 0x1e,
298 [PERF_COUNT_INSTRUCTIONS] = 2,
299 [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */
300 [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
301 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
302 [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
303};
304
305#define C(x) PERF_COUNT_HW_CACHE_##x
306
307/*
308 * Table of generalized cache-related events.
309 * 0 means not supported, -1 means nonsensical, other values
310 * are event codes.
311 */
312static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
313 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
314 [C(OP_READ)] = { 0x400f0, 0xc880 },
315 [C(OP_WRITE)] = { 0, 0x300f0 },
316 [C(OP_PREFETCH)] = { 0xd8b8, 0 },
317 },
318 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
319 [C(OP_READ)] = { 0, 0x200fc },
320 [C(OP_WRITE)] = { -1, -1 },
321 [C(OP_PREFETCH)] = { 0x408a, 0 },
322 },
323 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
324 [C(OP_READ)] = { 0x6080, 0x6084 },
325 [C(OP_WRITE)] = { 0x6082, 0x6086 },
326 [C(OP_PREFETCH)] = { 0, 0 },
327 },
328 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
329 [C(OP_READ)] = { 0, 0x300fc },
330 [C(OP_WRITE)] = { -1, -1 },
331 [C(OP_PREFETCH)] = { -1, -1 },
332 },
333 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
334 [C(OP_READ)] = { 0, 0x400fc },
335 [C(OP_WRITE)] = { -1, -1 },
336 [C(OP_PREFETCH)] = { -1, -1 },
337 },
338 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
339 [C(OP_READ)] = { 0x10068, 0x400f6 },
340 [C(OP_WRITE)] = { -1, -1 },
341 [C(OP_PREFETCH)] = { -1, -1 },
342 },
343};
344
345struct power_pmu power7_pmu = {
346 .n_counter = 6,
347 .max_alternatives = MAX_ALT + 1,
348 .add_fields = 0x1555ull,
349 .test_adder = 0x3000ull,
350 .compute_mmcr = power7_compute_mmcr,
351 .get_constraint = power7_get_constraint,
352 .get_alternatives = power7_get_alternatives,
353 .disable_pmc = power7_disable_pmc,
354 .n_generic = ARRAY_SIZE(power7_generic_events),
355 .generic_events = power7_generic_events,
356 .cache_events = &power7_cache_events,
357};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 000000000000..ba0a357a89f4
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,482 @@
1/*
2 * Performance counter support for PPC970-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for PPC970
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_SPCSEL_SH 6
23#define PM_SPCSEL_MSK 3
24#define PM_BYTE_SH 4 /* Byte number of event bus to use */
25#define PM_BYTE_MSK 3
26#define PM_PMCSEL_MSK 0xf
27
28/* Values in PM_UNIT field */
29#define PM_NONE 0
30#define PM_FPU 1
31#define PM_VPU 2
32#define PM_ISU 3
33#define PM_IFU 4
34#define PM_IDU 5
35#define PM_STS 6
36#define PM_LSU0 7
37#define PM_LSU1U 8
38#define PM_LSU1L 9
39#define PM_LASTUNIT 9
40
41/*
42 * Bits in MMCR0 for PPC970
43 */
44#define MMCR0_PMC1SEL_SH 8
45#define MMCR0_PMC2SEL_SH 1
46#define MMCR_PMCSEL_MSK 0x1f
47
48/*
49 * Bits in MMCR1 for PPC970
50 */
51#define MMCR1_TTM0SEL_SH 62
52#define MMCR1_TTM1SEL_SH 59
53#define MMCR1_TTM3SEL_SH 53
54#define MMCR1_TTMSEL_MSK 3
55#define MMCR1_TD_CP_DBG0SEL_SH 50
56#define MMCR1_TD_CP_DBG1SEL_SH 48
57#define MMCR1_TD_CP_DBG2SEL_SH 46
58#define MMCR1_TD_CP_DBG3SEL_SH 44
59#define MMCR1_PMC1_ADDER_SEL_SH 39
60#define MMCR1_PMC2_ADDER_SEL_SH 38
61#define MMCR1_PMC6_ADDER_SEL_SH 37
62#define MMCR1_PMC5_ADDER_SEL_SH 36
63#define MMCR1_PMC8_ADDER_SEL_SH 35
64#define MMCR1_PMC7_ADDER_SEL_SH 34
65#define MMCR1_PMC3_ADDER_SEL_SH 33
66#define MMCR1_PMC4_ADDER_SEL_SH 32
67#define MMCR1_PMC3SEL_SH 27
68#define MMCR1_PMC4SEL_SH 22
69#define MMCR1_PMC5SEL_SH 17
70#define MMCR1_PMC6SEL_SH 12
71#define MMCR1_PMC7SEL_SH 7
72#define MMCR1_PMC8SEL_SH 2
73
74static short mmcr1_adder_bits[8] = {
75 MMCR1_PMC1_ADDER_SEL_SH,
76 MMCR1_PMC2_ADDER_SEL_SH,
77 MMCR1_PMC3_ADDER_SEL_SH,
78 MMCR1_PMC4_ADDER_SEL_SH,
79 MMCR1_PMC5_ADDER_SEL_SH,
80 MMCR1_PMC6_ADDER_SEL_SH,
81 MMCR1_PMC7_ADDER_SEL_SH,
82 MMCR1_PMC8_ADDER_SEL_SH
83};
84
85/*
86 * Bits in MMCRA
87 */
88
89/*
90 * Layout of constraint bits:
91 * 6666555555555544444444443333333333222222222211111111110000000000
92 * 3210987654321098765432109876543210987654321098765432109876543210
93 * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
94 * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
95 *
96 * SP - SPCSEL constraint
97 * 48-49: SPCSEL value 0x3_0000_0000_0000
98 *
99 * T0 - TTM0 constraint
100 * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
101 *
102 * T1 - TTM1 constraint
103 * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
104 *
105 * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
106 * 43: UC3 error 0x0800_0000_0000
107 * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
108 * 41: ISU events needed 0x0200_0000_0000
109 * 40: IDU|STS events needed 0x0100_0000_0000
110 *
111 * PS1
112 * 39: PS1 error 0x0080_0000_0000
113 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
114 *
115 * PS2
116 * 35: PS2 error 0x0008_0000_0000
117 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
118 *
119 * B0
120 * 28-31: Byte 0 event source 0xf000_0000
121 * Encoding as for the event code
122 *
123 * B1, B2, B3
124 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
125 *
126 * P1
127 * 15: P1 error 0x8000
128 * 14-15: Count of events needing PMC1
129 *
130 * P2..P8
131 * 0-13: Count of events needing PMC2..PMC8
132 */
133
134static unsigned char direct_marked_event[8] = {
135 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
136 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
137 (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
138 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
139 (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
140 (1<<3) | (1<<4) | (1<<5),
141 /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
142 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
143 (1<<4) /* PMC8: PM_MRK_LSU_FIN */
144};
145
146/*
147 * Returns 1 if event counts things relating to marked instructions
148 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
149 */
150static int p970_marked_instr_event(u64 event)
151{
152 int pmc, psel, unit, byte, bit;
153 unsigned int mask;
154
155 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
156 psel = event & PM_PMCSEL_MSK;
157 if (pmc) {
158 if (direct_marked_event[pmc - 1] & (1 << psel))
159 return 1;
160 if (psel == 0) /* add events */
161 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
162 else if (psel == 7 || psel == 13) /* decode events */
163 bit = 4;
164 else
165 return 0;
166 } else
167 bit = psel;
168
169 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
170 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
171 mask = 0;
172 switch (unit) {
173 case PM_VPU:
174 mask = 0x4c; /* byte 0 bits 2,3,6 */
175 case PM_LSU0:
176 /* byte 2 bits 0,2,3,4,6; all of byte 1 */
177 mask = 0x085dff00;
178 case PM_LSU1L:
179 mask = 0x50 << 24; /* byte 3 bits 4,6 */
180 break;
181 }
182 return (mask >> (byte * 8 + bit)) & 1;
183}
184
185/* Masks and values for using events from the various units */
186static u64 unit_cons[PM_LASTUNIT+1][2] = {
187 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
188 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
189 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
190 [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
191 [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
192 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
193};
194
195static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
196{
197 int pmc, byte, unit, sh, spcsel;
198 u64 mask = 0, value = 0;
199 int grp = -1;
200
201 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
202 if (pmc) {
203 if (pmc > 8)
204 return -1;
205 sh = (pmc - 1) * 2;
206 mask |= 2 << sh;
207 value |= 1 << sh;
208 grp = ((pmc - 1) >> 1) & 1;
209 }
210 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
211 if (unit) {
212 if (unit > PM_LASTUNIT)
213 return -1;
214 mask |= unit_cons[unit][0];
215 value |= unit_cons[unit][1];
216 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
217 /*
218 * Bus events on bytes 0 and 2 can be counted
219 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
220 */
221 if (!pmc)
222 grp = byte & 1;
223 /* Set byte lane select field */
224 mask |= 0xfULL << (28 - 4 * byte);
225 value |= (u64)unit << (28 - 4 * byte);
226 }
227 if (grp == 0) {
228 /* increment PMC1/2/5/6 field */
229 mask |= 0x8000000000ull;
230 value |= 0x1000000000ull;
231 } else if (grp == 1) {
232 /* increment PMC3/4/7/8 field */
233 mask |= 0x800000000ull;
234 value |= 0x100000000ull;
235 }
236 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
237 if (spcsel) {
238 mask |= 3ull << 48;
239 value |= (u64)spcsel << 48;
240 }
241 *maskp = mask;
242 *valp = value;
243 return 0;
244}
245
246static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
247{
248 alt[0] = event;
249
250 /* 2 alternatives for LSU empty */
251 if (event == 0x2002 || event == 0x3002) {
252 alt[1] = event ^ 0x1000;
253 return 2;
254 }
255
256 return 1;
257}
258
259static int p970_compute_mmcr(u64 event[], int n_ev,
260 unsigned int hwc[], u64 mmcr[])
261{
262 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
263 unsigned int pmc, unit, byte, psel;
264 unsigned int ttm, grp;
265 unsigned int pmc_inuse = 0;
266 unsigned int pmc_grp_use[2];
267 unsigned char busbyte[4];
268 unsigned char unituse[16];
269 unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
270 unsigned char ttmuse[2];
271 unsigned char pmcsel[8];
272 int i;
273 int spcsel;
274
275 if (n_ev > 8)
276 return -1;
277
278 /* First pass to count resource use */
279 pmc_grp_use[0] = pmc_grp_use[1] = 0;
280 memset(busbyte, 0, sizeof(busbyte));
281 memset(unituse, 0, sizeof(unituse));
282 for (i = 0; i < n_ev; ++i) {
283 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
284 if (pmc) {
285 if (pmc_inuse & (1 << (pmc - 1)))
286 return -1;
287 pmc_inuse |= 1 << (pmc - 1);
288 /* count 1/2/5/6 vs 3/4/7/8 use */
289 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
290 }
291 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
292 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
293 if (unit) {
294 if (unit > PM_LASTUNIT)
295 return -1;
296 if (!pmc)
297 ++pmc_grp_use[byte & 1];
298 if (busbyte[byte] && busbyte[byte] != unit)
299 return -1;
300 busbyte[byte] = unit;
301 unituse[unit] = 1;
302 }
303 }
304 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
305 return -1;
306
307 /*
308 * Assign resources and set multiplexer selects.
309 *
310 * PM_ISU can go either on TTM0 or TTM1, but that's the only
311 * choice we have to deal with.
312 */
313 if (unituse[PM_ISU] &
314 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
315 unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
316 /* Set TTM[01]SEL fields. */
317 ttmuse[0] = ttmuse[1] = 0;
318 for (i = PM_FPU; i <= PM_STS; ++i) {
319 if (!unituse[i])
320 continue;
321 ttm = unitmap[i];
322 ++ttmuse[(ttm >> 2) & 1];
323 mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
324 }
325 /* Check only one unit per TTMx */
326 if (ttmuse[0] > 1 || ttmuse[1] > 1)
327 return -1;
328
329 /* Set byte lane select fields and TTM3SEL. */
330 for (byte = 0; byte < 4; ++byte) {
331 unit = busbyte[byte];
332 if (!unit)
333 continue;
334 if (unit <= PM_STS)
335 ttm = (unitmap[unit] >> 2) & 1;
336 else if (unit == PM_LSU0)
337 ttm = 2;
338 else {
339 ttm = 3;
340 if (unit == PM_LSU1L && byte >= 2)
341 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
342 }
343 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
344 }
345
346 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
347 memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
348 for (i = 0; i < n_ev; ++i) {
349 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
350 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
351 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
352 psel = event[i] & PM_PMCSEL_MSK;
353 if (!pmc) {
354 /* Bus event or any-PMC direct event */
355 if (unit)
356 psel |= 0x10 | ((byte & 2) << 2);
357 else
358 psel |= 8;
359 for (pmc = 0; pmc < 8; ++pmc) {
360 if (pmc_inuse & (1 << pmc))
361 continue;
362 grp = (pmc >> 1) & 1;
363 if (unit) {
364 if (grp == (byte & 1))
365 break;
366 } else if (pmc_grp_use[grp] < 4) {
367 ++pmc_grp_use[grp];
368 break;
369 }
370 }
371 pmc_inuse |= 1 << pmc;
372 } else {
373 /* Direct event */
374 --pmc;
375 if (psel == 0 && (byte & 2))
376 /* add events on higher-numbered bus */
377 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
378 }
379 pmcsel[pmc] = psel;
380 hwc[i] = pmc;
381 spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
382 mmcr1 |= spcsel;
383 if (p970_marked_instr_event(event[i]))
384 mmcra |= MMCRA_SAMPLE_ENABLE;
385 }
386 for (pmc = 0; pmc < 2; ++pmc)
387 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
388 for (; pmc < 8; ++pmc)
389 mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
390 if (pmc_inuse & 1)
391 mmcr0 |= MMCR0_PMC1CE;
392 if (pmc_inuse & 0xfe)
393 mmcr0 |= MMCR0_PMCjCE;
394
395 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
396
397 /* Return MMCRx values */
398 mmcr[0] = mmcr0;
399 mmcr[1] = mmcr1;
400 mmcr[2] = mmcra;
401 return 0;
402}
403
404static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
405{
406 int shift, i;
407
408 if (pmc <= 1) {
409 shift = MMCR0_PMC1SEL_SH - 7 * pmc;
410 i = 0;
411 } else {
412 shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
413 i = 1;
414 }
415 /*
416 * Setting the PMCxSEL field to 0x08 disables PMC x.
417 */
418 mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
419}
420
421static int ppc970_generic_events[] = {
422 [PERF_COUNT_HW_CPU_CYCLES] = 7,
423 [PERF_COUNT_HW_INSTRUCTIONS] = 1,
424 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
425 [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
426 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
427 [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
428};
429
430#define C(x) PERF_COUNT_HW_CACHE_##x
431
432/*
433 * Table of generalized cache-related events.
434 * 0 means not supported, -1 means nonsensical, other values
435 * are event codes.
436 */
437static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
438 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
439 [C(OP_READ)] = { 0x8810, 0x3810 },
440 [C(OP_WRITE)] = { 0x7810, 0x813 },
441 [C(OP_PREFETCH)] = { 0x731, 0 },
442 },
443 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
444 [C(OP_READ)] = { 0, 0 },
445 [C(OP_WRITE)] = { -1, -1 },
446 [C(OP_PREFETCH)] = { 0, 0 },
447 },
448 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
449 [C(OP_READ)] = { 0, 0 },
450 [C(OP_WRITE)] = { 0, 0 },
451 [C(OP_PREFETCH)] = { 0x733, 0 },
452 },
453 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
454 [C(OP_READ)] = { 0, 0x704 },
455 [C(OP_WRITE)] = { -1, -1 },
456 [C(OP_PREFETCH)] = { -1, -1 },
457 },
458 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
459 [C(OP_READ)] = { 0, 0x700 },
460 [C(OP_WRITE)] = { -1, -1 },
461 [C(OP_PREFETCH)] = { -1, -1 },
462 },
463 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
464 [C(OP_READ)] = { 0x431, 0x327 },
465 [C(OP_WRITE)] = { -1, -1 },
466 [C(OP_PREFETCH)] = { -1, -1 },
467 },
468};
469
470struct power_pmu ppc970_pmu = {
471 .n_counter = 8,
472 .max_alternatives = 2,
473 .add_fields = 0x001100005555ull,
474 .test_adder = 0x013300000000ull,
475 .compute_mmcr = p970_compute_mmcr,
476 .get_constraint = p970_get_constraint,
477 .get_alternatives = p970_get_alternatives,
478 .disable_pmc = p970_disable_pmc,
479 .n_generic = ARRAY_SIZE(ppc970_generic_events),
480 .generic_events = ppc970_generic_events,
481 .cache_events = &ppc970_cache_events,
482};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 76993941cac9..5beffc8f481e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32#include <linux/perf_counter.h>
32 33
33#include <asm/firmware.h> 34#include <asm/firmware.h>
34#include <asm/page.h> 35#include <asm/page.h>
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
170 die("Weird page fault", regs, SIGSEGV); 171 die("Weird page fault", regs, SIGSEGV);
171 } 172 }
172 173
174 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
175
173 /* When running in the kernel we expect faults to occur only to 176 /* When running in the kernel we expect faults to occur only to
174 * addresses in user space. All other faults represent errors in the 177 * addresses in user space. All other faults represent errors in the
175 * kernel and should generate an OOPS. Unfortunately, in the case of an 178 * kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -309,6 +312,8 @@ good_area:
309 } 312 }
310 if (ret & VM_FAULT_MAJOR) { 313 if (ret & VM_FAULT_MAJOR) {
311 current->maj_flt++; 314 current->maj_flt++;
315 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
316 regs, address);
312#ifdef CONFIG_PPC_SMLPAR 317#ifdef CONFIG_PPC_SMLPAR
313 if (firmware_has_feature(FW_FEATURE_CMO)) { 318 if (firmware_has_feature(FW_FEATURE_CMO)) {
314 preempt_disable(); 319 preempt_disable();
@@ -316,8 +321,11 @@ good_area:
316 preempt_enable(); 321 preempt_enable();
317 } 322 }
318#endif 323#endif
319 } else 324 } else {
320 current->min_flt++; 325 current->min_flt++;
326 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
327 regs, address);
328 }
321 up_read(&mm->mmap_sem); 329 up_read(&mm->mmap_sem);
322 return 0; 330 return 0;
323 331
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9da795e49337..732ee93a8e98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
1config PPC64 1config PPC64
2 bool "64-bit kernel" 2 bool "64-bit kernel"
3 default n 3 default n
4 select HAVE_PERF_COUNTERS
4 help 5 help
5 This option selects whether a 32-bit or a 64-bit kernel 6 This option selects whether a 32-bit or a 64-bit kernel
6 will be built. 7 will be built.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index aafae3b140de..68f5578fe38e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -739,6 +739,7 @@ config X86_UP_IOAPIC
739config X86_LOCAL_APIC 739config X86_LOCAL_APIC
740 def_bool y 740 def_bool y
741 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 741 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
742 select HAVE_PERF_COUNTERS if (!M386 && !M486)
742 743
743config X86_IO_APIC 744config X86_IO_APIC
744 def_bool y 745 def_bool y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index dcef387ddc36..e590261ba059 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -825,10 +825,11 @@ ia32_sys_call_table:
825 .quad compat_sys_signalfd4 825 .quad compat_sys_signalfd4
826 .quad sys_eventfd2 826 .quad sys_eventfd2
827 .quad sys_epoll_create1 827 .quad sys_epoll_create1
828 .quad sys_dup3 /* 330 */ 828 .quad sys_dup3 /* 330 */
829 .quad sys_pipe2 829 .quad sys_pipe2
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv 831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev 832 .quad compat_sys_pwritev
833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */ 833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
834 .quad sys_perf_counter_open
834ia32_syscall_end: 835ia32_syscall_end:
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fba4229..aff9f1fcdcd7 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
247#define smp_mb__before_atomic_inc() barrier() 247#define smp_mb__before_atomic_inc() barrier()
248#define smp_mb__after_atomic_inc() barrier() 248#define smp_mb__after_atomic_inc() barrier()
249 249
250/* An 64bit atomic type */
251
252typedef struct {
253 unsigned long long counter;
254} atomic64_t;
255
256#define ATOMIC64_INIT(val) { (val) }
257
258/**
259 * atomic64_read - read atomic64 variable
260 * @v: pointer of type atomic64_t
261 *
262 * Atomically reads the value of @v.
263 * Doesn't imply a read memory barrier.
264 */
265#define __atomic64_read(ptr) ((ptr)->counter)
266
267static inline unsigned long long
268cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
269{
270 asm volatile(
271
272 LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
273
274 : "=A" (old)
275
276 : [ptr] "D" (ptr),
277 "A" (old),
278 "b" (ll_low(new)),
279 "c" (ll_high(new))
280
281 : "memory");
282
283 return old;
284}
285
286static inline unsigned long long
287atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
288 unsigned long long new_val)
289{
290 return cmpxchg8b(&ptr->counter, old_val, new_val);
291}
292
293/**
294 * atomic64_xchg - xchg atomic64 variable
295 * @ptr: pointer to type atomic64_t
296 * @new_val: value to assign
297 * @old_val: old value that was there
298 *
299 * Atomically xchgs the value of @ptr to @new_val and returns
300 * the old value.
301 */
302
303static inline unsigned long long
304atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
305{
306 unsigned long long old_val;
307
308 do {
309 old_val = atomic_read(ptr);
310 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
311
312 return old_val;
313}
314
315/**
316 * atomic64_set - set atomic64 variable
317 * @ptr: pointer to type atomic64_t
318 * @new_val: value to assign
319 *
320 * Atomically sets the value of @ptr to @new_val.
321 */
322static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
323{
324 atomic64_xchg(ptr, new_val);
325}
326
327/**
328 * atomic64_read - read atomic64 variable
329 * @ptr: pointer to type atomic64_t
330 *
331 * Atomically reads the value of @ptr and returns it.
332 */
333static inline unsigned long long atomic64_read(atomic64_t *ptr)
334{
335 unsigned long long curr_val;
336
337 do {
338 curr_val = __atomic64_read(ptr);
339 } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
340
341 return curr_val;
342}
343
344/**
345 * atomic64_add_return - add and return
346 * @delta: integer value to add
347 * @ptr: pointer to type atomic64_t
348 *
349 * Atomically adds @delta to @ptr and returns @delta + *@ptr
350 */
351static inline unsigned long long
352atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
353{
354 unsigned long long old_val, new_val;
355
356 do {
357 old_val = atomic_read(ptr);
358 new_val = old_val + delta;
359
360 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
361
362 return new_val;
363}
364
365static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
366{
367 return atomic64_add_return(-delta, ptr);
368}
369
370static inline long atomic64_inc_return(atomic64_t *ptr)
371{
372 return atomic64_add_return(1, ptr);
373}
374
375static inline long atomic64_dec_return(atomic64_t *ptr)
376{
377 return atomic64_sub_return(1, ptr);
378}
379
380/**
381 * atomic64_add - add integer to atomic64 variable
382 * @delta: integer value to add
383 * @ptr: pointer to type atomic64_t
384 *
385 * Atomically adds @delta to @ptr.
386 */
387static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
388{
389 atomic64_add_return(delta, ptr);
390}
391
392/**
393 * atomic64_sub - subtract the atomic64 variable
394 * @delta: integer value to subtract
395 * @ptr: pointer to type atomic64_t
396 *
397 * Atomically subtracts @delta from @ptr.
398 */
399static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
400{
401 atomic64_add(-delta, ptr);
402}
403
404/**
405 * atomic64_sub_and_test - subtract value from variable and test result
406 * @delta: integer value to subtract
407 * @ptr: pointer to type atomic64_t
408 *
409 * Atomically subtracts @delta from @ptr and returns
410 * true if the result is zero, or false for all
411 * other cases.
412 */
413static inline int
414atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
415{
416 unsigned long long old_val = atomic64_sub_return(delta, ptr);
417
418 return old_val == 0;
419}
420
421/**
422 * atomic64_inc - increment atomic64 variable
423 * @ptr: pointer to type atomic64_t
424 *
425 * Atomically increments @ptr by 1.
426 */
427static inline void atomic64_inc(atomic64_t *ptr)
428{
429 atomic64_add(1, ptr);
430}
431
432/**
433 * atomic64_dec - decrement atomic64 variable
434 * @ptr: pointer to type atomic64_t
435 *
436 * Atomically decrements @ptr by 1.
437 */
438static inline void atomic64_dec(atomic64_t *ptr)
439{
440 atomic64_sub(1, ptr);
441}
442
443/**
444 * atomic64_dec_and_test - decrement and test
445 * @ptr: pointer to type atomic64_t
446 *
447 * Atomically decrements @ptr by 1 and
448 * returns true if the result is 0, or false for all other
449 * cases.
450 */
451static inline int atomic64_dec_and_test(atomic64_t *ptr)
452{
453 return atomic64_sub_and_test(1, ptr);
454}
455
456/**
457 * atomic64_inc_and_test - increment and test
458 * @ptr: pointer to type atomic64_t
459 *
460 * Atomically increments @ptr by 1
461 * and returns true if the result is zero, or false for all
462 * other cases.
463 */
464static inline int atomic64_inc_and_test(atomic64_t *ptr)
465{
466 return atomic64_sub_and_test(-1, ptr);
467}
468
469/**
470 * atomic64_add_negative - add and test if negative
471 * @delta: integer value to add
472 * @ptr: pointer to type atomic64_t
473 *
474 * Atomically adds @delta to @ptr and returns true
475 * if the result is negative, or false when
476 * result is greater than or equal to zero.
477 */
478static inline int
479atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
480{
481 long long old_val = atomic64_add_return(delta, ptr);
482
483 return old_val < 0;
484}
485
250#include <asm-generic/atomic.h> 486#include <asm-generic/atomic.h>
251#endif /* _ASM_X86_ATOMIC_32_H */ 487#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bedaf258..d750a10ccad6 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
49BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 49BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
50 50
51#ifdef CONFIG_PERF_COUNTERS 51#ifdef CONFIG_PERF_COUNTERS
52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) 52BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
53#endif 53#endif
54 54
55#ifdef CONFIG_X86_MCE_P4THERMAL 55#ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e52f980..9ebc5c255032 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -13,6 +13,8 @@ typedef struct {
13 unsigned int irq_spurious_count; 13 unsigned int irq_spurious_count;
14#endif 14#endif
15 unsigned int generic_irqs; /* arch dependent */ 15 unsigned int generic_irqs; /* arch dependent */
16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs;
16#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
17 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
18 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 3bd1777a4c8b..6df45f639666 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,8 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void generic_interrupt(void); 30extern void generic_interrupt(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void);
33
32extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
33extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
34extern void reschedule_interrupt(void); 36extern void reschedule_interrupt(void);
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd068bc2e..000000000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
1#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
2#define _ASM_X86_INTEL_ARCH_PERFMON_H
3
4#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
5#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
6
7#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
8#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
9
10#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
11#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
12#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
13#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
18#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
19 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
20
21union cpuid10_eax {
22 struct {
23 unsigned int version_id:8;
24 unsigned int num_counters:8;
25 unsigned int bit_width:8;
26 unsigned int mask_length:8;
27 } split;
28 unsigned int full;
29};
30
31#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 910b5a3d6751..e997be98c9b9 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -108,14 +108,14 @@
108#define LOCAL_TIMER_VECTOR 0xef 108#define LOCAL_TIMER_VECTOR 0xef
109 109
110/* 110/*
111 * Performance monitoring interrupt vector: 111 * Generic system vector for platform specific use
112 */ 112 */
113#define LOCAL_PERF_VECTOR 0xee 113#define GENERIC_INTERRUPT_VECTOR 0xed
114 114
115/* 115/*
116 * Generic system vector for platform specific use 116 * Performance monitoring pending work vector:
117 */ 117 */
118#define GENERIC_INTERRUPT_VECTOR 0xed 118#define LOCAL_PENDING_VECTOR 0xec
119 119
120/* 120/*
121 * First APIC vector available to drivers: (vectors 0x30-0xee) we 121 * First APIC vector available to drivers: (vectors 0x30-0xee) we
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 000000000000..876ed97147b3
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,100 @@
1#ifndef _ASM_X86_PERF_COUNTER_H
2#define _ASM_X86_PERF_COUNTER_H
3
4/*
5 * Performance counter hw details:
6 */
7
8#define X86_PMC_MAX_GENERIC 8
9#define X86_PMC_MAX_FIXED 3
10
11#define X86_PMC_IDX_GENERIC 0
12#define X86_PMC_IDX_FIXED 32
13#define X86_PMC_IDX_MAX 64
14
15#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
16#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
17
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
23#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
24#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
25
26/*
27 * Includes eventsel and unit mask as well:
28 */
29#define ARCH_PERFMON_EVENT_MASK 0xffff
30
31#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
32#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
33#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
34#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
35 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
36
37#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
38
39/*
40 * Intel "Architectural Performance Monitoring" CPUID
41 * detection/enumeration details:
42 */
43union cpuid10_eax {
44 struct {
45 unsigned int version_id:8;
46 unsigned int num_counters:8;
47 unsigned int bit_width:8;
48 unsigned int mask_length:8;
49 } split;
50 unsigned int full;
51};
52
53union cpuid10_edx {
54 struct {
55 unsigned int num_counters_fixed:4;
56 unsigned int reserved:28;
57 } split;
58 unsigned int full;
59};
60
61
62/*
63 * Fixed-purpose performance counters:
64 */
65
66/*
67 * All 3 fixed-mode PMCs are configured via this single MSR:
68 */
69#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
70
71/*
72 * The counts are available in three separate MSRs:
73 */
74
75/* Instr_Retired.Any: */
76#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
77#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
78
79/* CPU_CLK_Unhalted.Core: */
80#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
81#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
82
83/* CPU_CLK_Unhalted.Ref: */
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(void);
95#else
96static inline void init_hw_perf_counters(void) { }
97static inline void perf_counters_lapic_init(void) { }
98#endif
99
100#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 708dae61262d..732a30706153 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,6 +341,7 @@
341#define __NR_preadv 333 341#define __NR_preadv 333
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335 343#define __NR_rt_tgsigqueueinfo 335
344#define __NR_perf_counter_open 336
344 345
345#ifdef __KERNEL__ 346#ifdef __KERNEL__
346 347
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 4e2b05404400..900e1617e672 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,7 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
659__SYSCALL(__NR_pwritev, sys_pwritev) 659__SYSCALL(__NR_pwritev, sys_pwritev)
660#define __NR_rt_tgsigqueueinfo 297 660#define __NR_rt_tgsigqueueinfo 297
661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) 661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
662 662#define __NR_perf_counter_open 298
663__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
663 664
664#ifndef __NO_STUBS 665#ifndef __NO_STUBS
665#define __ARCH_WANT_OLD_READDIR 666#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a4c9cf0bf70b..076d3881f3da 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/perf_counter.h>
17#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
18#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
19#include <linux/acpi_pmtmr.h> 20#include <linux/acpi_pmtmr.h>
@@ -34,6 +35,7 @@
34#include <linux/smp.h> 35#include <linux/smp.h>
35#include <linux/mm.h> 36#include <linux/mm.h>
36 37
38#include <asm/perf_counter.h>
37#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
38#include <asm/atomic.h> 40#include <asm/atomic.h>
39#include <asm/mpspec.h> 41#include <asm/mpspec.h>
@@ -1187,6 +1189,7 @@ void __cpuinit setup_local_APIC(void)
1187 apic_write(APIC_ESR, 0); 1189 apic_write(APIC_ESR, 0);
1188 } 1190 }
1189#endif 1191#endif
1192 perf_counters_lapic_init();
1190 1193
1191 preempt_disable(); 1194 preempt_disable();
1192 1195
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9a06e4..3efcb2b96a15 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details, features and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
25 25
26obj-$(CONFIG_X86_MCE) += mcheck/ 26obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
27obj-$(CONFIG_MTRR) += mtrr/
28obj-$(CONFIG_CPU_FREQ) += cpufreq/
29 27
30obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 28obj-$(CONFIG_X86_MCE) += mcheck/
29obj-$(CONFIG_MTRR) += mtrr/
30obj-$(CONFIG_CPU_FREQ) += cpufreq/
31
32obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
31 33
32quiet_cmd_mkcapflags = MKCAP $@ 34quiet_cmd_mkcapflags = MKCAP $@
33 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 35 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b0517aa2bd3b..3ffdcfa9abdf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14 14
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_counter.h>
16#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
17#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -874,6 +875,7 @@ void __init identify_boot_cpu(void)
874#else 875#else
875 vgetcpu_set_mode(); 876 vgetcpu_set_mode();
876#endif 877#endif
878 init_hw_perf_counters();
877} 879}
878 880
879void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 881void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 000000000000..895c82e78455
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1704 @@
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * For licencing details see kernel-base/COPYING
11 */
12
13#include <linux/perf_counter.h>
14#include <linux/capability.h>
15#include <linux/notifier.h>
16#include <linux/hardirq.h>
17#include <linux/kprobes.h>
18#include <linux/module.h>
19#include <linux/kdebug.h>
20#include <linux/sched.h>
21#include <linux/uaccess.h>
22
23#include <asm/apic.h>
24#include <asm/stacktrace.h>
25#include <asm/nmi.h>
26
27static u64 perf_counter_mask __read_mostly;
28
29struct cpu_hw_counters {
30 struct perf_counter *counters[X86_PMC_IDX_MAX];
31 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
32 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33 unsigned long interrupts;
34 int enabled;
35};
36
37/*
38 * struct x86_pmu - generic x86 pmu
39 */
40struct x86_pmu {
41 const char *name;
42 int version;
43 int (*handle_irq)(struct pt_regs *);
44 void (*disable_all)(void);
45 void (*enable_all)(void);
46 void (*enable)(struct hw_perf_counter *, int);
47 void (*disable)(struct hw_perf_counter *, int);
48 unsigned eventsel;
49 unsigned perfctr;
50 u64 (*event_map)(int);
51 u64 (*raw_event)(u64);
52 int max_events;
53 int num_counters;
54 int num_counters_fixed;
55 int counter_bits;
56 u64 counter_mask;
57 u64 max_period;
58 u64 intel_ctrl;
59};
60
61static struct x86_pmu x86_pmu __read_mostly;
62
63static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
64 .enabled = 1,
65};
66
67/*
68 * Intel PerfMon v3. Used on Core2 and later.
69 */
70static const u64 intel_perfmon_event_map[] =
71{
72 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
73 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
74 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
75 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
76 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
77 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
78 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
79};
80
81static u64 intel_pmu_event_map(int event)
82{
83 return intel_perfmon_event_map[event];
84}
85
86/*
87 * Generalized hw caching related event table, filled
88 * in on a per model basis. A value of 0 means
89 * 'not supported', -1 means 'event makes no sense on
90 * this CPU', any other value means the raw event
91 * ID.
92 */
93
94#define C(x) PERF_COUNT_HW_CACHE_##x
95
96static u64 __read_mostly hw_cache_event_ids
97 [PERF_COUNT_HW_CACHE_MAX]
98 [PERF_COUNT_HW_CACHE_OP_MAX]
99 [PERF_COUNT_HW_CACHE_RESULT_MAX];
100
101static const u64 nehalem_hw_cache_event_ids
102 [PERF_COUNT_HW_CACHE_MAX]
103 [PERF_COUNT_HW_CACHE_OP_MAX]
104 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
105{
106 [ C(L1D) ] = {
107 [ C(OP_READ) ] = {
108 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
109 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
110 },
111 [ C(OP_WRITE) ] = {
112 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
113 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
114 },
115 [ C(OP_PREFETCH) ] = {
116 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
117 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
118 },
119 },
120 [ C(L1I ) ] = {
121 [ C(OP_READ) ] = {
122 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
123 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
124 },
125 [ C(OP_WRITE) ] = {
126 [ C(RESULT_ACCESS) ] = -1,
127 [ C(RESULT_MISS) ] = -1,
128 },
129 [ C(OP_PREFETCH) ] = {
130 [ C(RESULT_ACCESS) ] = 0x0,
131 [ C(RESULT_MISS) ] = 0x0,
132 },
133 },
134 [ C(LL ) ] = {
135 [ C(OP_READ) ] = {
136 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
137 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
138 },
139 [ C(OP_WRITE) ] = {
140 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
141 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
142 },
143 [ C(OP_PREFETCH) ] = {
144 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
145 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
146 },
147 },
148 [ C(DTLB) ] = {
149 [ C(OP_READ) ] = {
150 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
151 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
152 },
153 [ C(OP_WRITE) ] = {
154 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
155 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
156 },
157 [ C(OP_PREFETCH) ] = {
158 [ C(RESULT_ACCESS) ] = 0x0,
159 [ C(RESULT_MISS) ] = 0x0,
160 },
161 },
162 [ C(ITLB) ] = {
163 [ C(OP_READ) ] = {
164 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
165 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
166 },
167 [ C(OP_WRITE) ] = {
168 [ C(RESULT_ACCESS) ] = -1,
169 [ C(RESULT_MISS) ] = -1,
170 },
171 [ C(OP_PREFETCH) ] = {
172 [ C(RESULT_ACCESS) ] = -1,
173 [ C(RESULT_MISS) ] = -1,
174 },
175 },
176 [ C(BPU ) ] = {
177 [ C(OP_READ) ] = {
178 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
179 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
180 },
181 [ C(OP_WRITE) ] = {
182 [ C(RESULT_ACCESS) ] = -1,
183 [ C(RESULT_MISS) ] = -1,
184 },
185 [ C(OP_PREFETCH) ] = {
186 [ C(RESULT_ACCESS) ] = -1,
187 [ C(RESULT_MISS) ] = -1,
188 },
189 },
190};
191
192static const u64 core2_hw_cache_event_ids
193 [PERF_COUNT_HW_CACHE_MAX]
194 [PERF_COUNT_HW_CACHE_OP_MAX]
195 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
196{
197 [ C(L1D) ] = {
198 [ C(OP_READ) ] = {
199 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
200 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
201 },
202 [ C(OP_WRITE) ] = {
203 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
204 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
205 },
206 [ C(OP_PREFETCH) ] = {
207 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
208 [ C(RESULT_MISS) ] = 0,
209 },
210 },
211 [ C(L1I ) ] = {
212 [ C(OP_READ) ] = {
213 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
214 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
215 },
216 [ C(OP_WRITE) ] = {
217 [ C(RESULT_ACCESS) ] = -1,
218 [ C(RESULT_MISS) ] = -1,
219 },
220 [ C(OP_PREFETCH) ] = {
221 [ C(RESULT_ACCESS) ] = 0,
222 [ C(RESULT_MISS) ] = 0,
223 },
224 },
225 [ C(LL ) ] = {
226 [ C(OP_READ) ] = {
227 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
228 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
229 },
230 [ C(OP_WRITE) ] = {
231 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
232 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
233 },
234 [ C(OP_PREFETCH) ] = {
235 [ C(RESULT_ACCESS) ] = 0,
236 [ C(RESULT_MISS) ] = 0,
237 },
238 },
239 [ C(DTLB) ] = {
240 [ C(OP_READ) ] = {
241 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
242 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
243 },
244 [ C(OP_WRITE) ] = {
245 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
246 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
247 },
248 [ C(OP_PREFETCH) ] = {
249 [ C(RESULT_ACCESS) ] = 0,
250 [ C(RESULT_MISS) ] = 0,
251 },
252 },
253 [ C(ITLB) ] = {
254 [ C(OP_READ) ] = {
255 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
256 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
257 },
258 [ C(OP_WRITE) ] = {
259 [ C(RESULT_ACCESS) ] = -1,
260 [ C(RESULT_MISS) ] = -1,
261 },
262 [ C(OP_PREFETCH) ] = {
263 [ C(RESULT_ACCESS) ] = -1,
264 [ C(RESULT_MISS) ] = -1,
265 },
266 },
267 [ C(BPU ) ] = {
268 [ C(OP_READ) ] = {
269 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
270 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
271 },
272 [ C(OP_WRITE) ] = {
273 [ C(RESULT_ACCESS) ] = -1,
274 [ C(RESULT_MISS) ] = -1,
275 },
276 [ C(OP_PREFETCH) ] = {
277 [ C(RESULT_ACCESS) ] = -1,
278 [ C(RESULT_MISS) ] = -1,
279 },
280 },
281};
282
283static const u64 atom_hw_cache_event_ids
284 [PERF_COUNT_HW_CACHE_MAX]
285 [PERF_COUNT_HW_CACHE_OP_MAX]
286 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
287{
288 [ C(L1D) ] = {
289 [ C(OP_READ) ] = {
290 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
291 [ C(RESULT_MISS) ] = 0,
292 },
293 [ C(OP_WRITE) ] = {
294 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
295 [ C(RESULT_MISS) ] = 0,
296 },
297 [ C(OP_PREFETCH) ] = {
298 [ C(RESULT_ACCESS) ] = 0x0,
299 [ C(RESULT_MISS) ] = 0,
300 },
301 },
302 [ C(L1I ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
305 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
306 },
307 [ C(OP_WRITE) ] = {
308 [ C(RESULT_ACCESS) ] = -1,
309 [ C(RESULT_MISS) ] = -1,
310 },
311 [ C(OP_PREFETCH) ] = {
312 [ C(RESULT_ACCESS) ] = 0,
313 [ C(RESULT_MISS) ] = 0,
314 },
315 },
316 [ C(LL ) ] = {
317 [ C(OP_READ) ] = {
318 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
319 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
320 },
321 [ C(OP_WRITE) ] = {
322 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
323 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
324 },
325 [ C(OP_PREFETCH) ] = {
326 [ C(RESULT_ACCESS) ] = 0,
327 [ C(RESULT_MISS) ] = 0,
328 },
329 },
330 [ C(DTLB) ] = {
331 [ C(OP_READ) ] = {
332 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
333 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
334 },
335 [ C(OP_WRITE) ] = {
336 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
337 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
338 },
339 [ C(OP_PREFETCH) ] = {
340 [ C(RESULT_ACCESS) ] = 0,
341 [ C(RESULT_MISS) ] = 0,
342 },
343 },
344 [ C(ITLB) ] = {
345 [ C(OP_READ) ] = {
346 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
347 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
348 },
349 [ C(OP_WRITE) ] = {
350 [ C(RESULT_ACCESS) ] = -1,
351 [ C(RESULT_MISS) ] = -1,
352 },
353 [ C(OP_PREFETCH) ] = {
354 [ C(RESULT_ACCESS) ] = -1,
355 [ C(RESULT_MISS) ] = -1,
356 },
357 },
358 [ C(BPU ) ] = {
359 [ C(OP_READ) ] = {
360 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
361 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
362 },
363 [ C(OP_WRITE) ] = {
364 [ C(RESULT_ACCESS) ] = -1,
365 [ C(RESULT_MISS) ] = -1,
366 },
367 [ C(OP_PREFETCH) ] = {
368 [ C(RESULT_ACCESS) ] = -1,
369 [ C(RESULT_MISS) ] = -1,
370 },
371 },
372};
373
374static u64 intel_pmu_raw_event(u64 event)
375{
376#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
377#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
378#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
379#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
380#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
381
382#define CORE_EVNTSEL_MASK \
383 (CORE_EVNTSEL_EVENT_MASK | \
384 CORE_EVNTSEL_UNIT_MASK | \
385 CORE_EVNTSEL_EDGE_MASK | \
386 CORE_EVNTSEL_INV_MASK | \
387 CORE_EVNTSEL_COUNTER_MASK)
388
389 return event & CORE_EVNTSEL_MASK;
390}
391
392static const u64 amd_0f_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{
397 [ C(L1D) ] = {
398 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0,
400 [ C(RESULT_MISS) ] = 0,
401 },
402 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0,
404 [ C(RESULT_MISS) ] = 0,
405 },
406 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0,
408 [ C(RESULT_MISS) ] = 0,
409 },
410 },
411 [ C(L1I ) ] = {
412 [ C(OP_READ) ] = {
413 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
414 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
415 },
416 [ C(OP_WRITE) ] = {
417 [ C(RESULT_ACCESS) ] = -1,
418 [ C(RESULT_MISS) ] = -1,
419 },
420 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0,
422 [ C(RESULT_MISS) ] = 0,
423 },
424 },
425 [ C(LL ) ] = {
426 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0,
428 [ C(RESULT_MISS) ] = 0,
429 },
430 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0,
432 [ C(RESULT_MISS) ] = 0,
433 },
434 [ C(OP_PREFETCH) ] = {
435 [ C(RESULT_ACCESS) ] = 0,
436 [ C(RESULT_MISS) ] = 0,
437 },
438 },
439 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0,
442 [ C(RESULT_MISS) ] = 0,
443 },
444 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0,
446 [ C(RESULT_MISS) ] = 0,
447 },
448 [ C(OP_PREFETCH) ] = {
449 [ C(RESULT_ACCESS) ] = 0,
450 [ C(RESULT_MISS) ] = 0,
451 },
452 },
453 [ C(ITLB) ] = {
454 [ C(OP_READ) ] = {
455 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
456 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
457 },
458 [ C(OP_WRITE) ] = {
459 [ C(RESULT_ACCESS) ] = -1,
460 [ C(RESULT_MISS) ] = -1,
461 },
462 [ C(OP_PREFETCH) ] = {
463 [ C(RESULT_ACCESS) ] = -1,
464 [ C(RESULT_MISS) ] = -1,
465 },
466 },
467 [ C(BPU ) ] = {
468 [ C(OP_READ) ] = {
469 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
470 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
471 },
472 [ C(OP_WRITE) ] = {
473 [ C(RESULT_ACCESS) ] = -1,
474 [ C(RESULT_MISS) ] = -1,
475 },
476 [ C(OP_PREFETCH) ] = {
477 [ C(RESULT_ACCESS) ] = -1,
478 [ C(RESULT_MISS) ] = -1,
479 },
480 },
481};
482
483/*
484 * AMD Performance Monitor K7 and later.
485 */
486static const u64 amd_perfmon_event_map[] =
487{
488 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
489 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
490 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
491 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
492 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
493 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
494};
495
496static u64 amd_pmu_event_map(int event)
497{
498 return amd_perfmon_event_map[event];
499}
500
501static u64 amd_pmu_raw_event(u64 event)
502{
503#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
504#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
505#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
506#define K7_EVNTSEL_INV_MASK 0x000800000ULL
507#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
508
509#define K7_EVNTSEL_MASK \
510 (K7_EVNTSEL_EVENT_MASK | \
511 K7_EVNTSEL_UNIT_MASK | \
512 K7_EVNTSEL_EDGE_MASK | \
513 K7_EVNTSEL_INV_MASK | \
514 K7_EVNTSEL_COUNTER_MASK)
515
516 return event & K7_EVNTSEL_MASK;
517}
518
519/*
520 * Propagate counter elapsed time into the generic counter.
521 * Can only be executed on the CPU where the counter is active.
522 * Returns the delta events processed.
523 */
524static u64
525x86_perf_counter_update(struct perf_counter *counter,
526 struct hw_perf_counter *hwc, int idx)
527{
528 int shift = 64 - x86_pmu.counter_bits;
529 u64 prev_raw_count, new_raw_count;
530 s64 delta;
531
532 /*
533 * Careful: an NMI might modify the previous counter value.
534 *
535 * Our tactic to handle this is to first atomically read and
536 * exchange a new raw count - then add that new-prev delta
537 * count to the generic counter atomically:
538 */
539again:
540 prev_raw_count = atomic64_read(&hwc->prev_count);
541 rdmsrl(hwc->counter_base + idx, new_raw_count);
542
543 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
544 new_raw_count) != prev_raw_count)
545 goto again;
546
547 /*
548 * Now we have the new raw value and have updated the prev
549 * timestamp already. We can now calculate the elapsed delta
550 * (counter-)time and add that to the generic counter.
551 *
552 * Careful, not all hw sign-extends above the physical width
553 * of the count.
554 */
555 delta = (new_raw_count << shift) - (prev_raw_count << shift);
556 delta >>= shift;
557
558 atomic64_add(delta, &counter->count);
559 atomic64_sub(delta, &hwc->period_left);
560
561 return new_raw_count;
562}
563
564static atomic_t active_counters;
565static DEFINE_MUTEX(pmc_reserve_mutex);
566
567static bool reserve_pmc_hardware(void)
568{
569 int i;
570
571 if (nmi_watchdog == NMI_LOCAL_APIC)
572 disable_lapic_nmi_watchdog();
573
574 for (i = 0; i < x86_pmu.num_counters; i++) {
575 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
576 goto perfctr_fail;
577 }
578
579 for (i = 0; i < x86_pmu.num_counters; i++) {
580 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
581 goto eventsel_fail;
582 }
583
584 return true;
585
586eventsel_fail:
587 for (i--; i >= 0; i--)
588 release_evntsel_nmi(x86_pmu.eventsel + i);
589
590 i = x86_pmu.num_counters;
591
592perfctr_fail:
593 for (i--; i >= 0; i--)
594 release_perfctr_nmi(x86_pmu.perfctr + i);
595
596 if (nmi_watchdog == NMI_LOCAL_APIC)
597 enable_lapic_nmi_watchdog();
598
599 return false;
600}
601
602static void release_pmc_hardware(void)
603{
604 int i;
605
606 for (i = 0; i < x86_pmu.num_counters; i++) {
607 release_perfctr_nmi(x86_pmu.perfctr + i);
608 release_evntsel_nmi(x86_pmu.eventsel + i);
609 }
610
611 if (nmi_watchdog == NMI_LOCAL_APIC)
612 enable_lapic_nmi_watchdog();
613}
614
615static void hw_perf_counter_destroy(struct perf_counter *counter)
616{
617 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
618 release_pmc_hardware();
619 mutex_unlock(&pmc_reserve_mutex);
620 }
621}
622
623static inline int x86_pmu_initialized(void)
624{
625 return x86_pmu.handle_irq != NULL;
626}
627
628static inline int
629set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
630{
631 unsigned int cache_type, cache_op, cache_result;
632 u64 config, val;
633
634 config = attr->config;
635
636 cache_type = (config >> 0) & 0xff;
637 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
638 return -EINVAL;
639
640 cache_op = (config >> 8) & 0xff;
641 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
642 return -EINVAL;
643
644 cache_result = (config >> 16) & 0xff;
645 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
646 return -EINVAL;
647
648 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
649
650 if (val == 0)
651 return -ENOENT;
652
653 if (val == -1)
654 return -EINVAL;
655
656 hwc->config |= val;
657
658 return 0;
659}
660
661/*
662 * Setup the hardware configuration for a given attr_type
663 */
664static int __hw_perf_counter_init(struct perf_counter *counter)
665{
666 struct perf_counter_attr *attr = &counter->attr;
667 struct hw_perf_counter *hwc = &counter->hw;
668 int err;
669
670 if (!x86_pmu_initialized())
671 return -ENODEV;
672
673 err = 0;
674 if (!atomic_inc_not_zero(&active_counters)) {
675 mutex_lock(&pmc_reserve_mutex);
676 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
677 err = -EBUSY;
678 else
679 atomic_inc(&active_counters);
680 mutex_unlock(&pmc_reserve_mutex);
681 }
682 if (err)
683 return err;
684
685 /*
686 * Generate PMC IRQs:
687 * (keep 'enabled' bit clear for now)
688 */
689 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
690
691 /*
692 * Count user and OS events unless requested not to.
693 */
694 if (!attr->exclude_user)
695 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
696 if (!attr->exclude_kernel)
697 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
698
699 if (!hwc->sample_period) {
700 hwc->sample_period = x86_pmu.max_period;
701 hwc->last_period = hwc->sample_period;
702 atomic64_set(&hwc->period_left, hwc->sample_period);
703 }
704
705 counter->destroy = hw_perf_counter_destroy;
706
707 /*
708 * Raw event type provide the config in the event structure
709 */
710 if (attr->type == PERF_TYPE_RAW) {
711 hwc->config |= x86_pmu.raw_event(attr->config);
712 return 0;
713 }
714
715 if (attr->type == PERF_TYPE_HW_CACHE)
716 return set_ext_hw_attr(hwc, attr);
717
718 if (attr->config >= x86_pmu.max_events)
719 return -EINVAL;
720 /*
721 * The generic map:
722 */
723 hwc->config |= x86_pmu.event_map(attr->config);
724
725 return 0;
726}
727
728static void intel_pmu_disable_all(void)
729{
730 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
731}
732
733static void amd_pmu_disable_all(void)
734{
735 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
736 int idx;
737
738 if (!cpuc->enabled)
739 return;
740
741 cpuc->enabled = 0;
742 /*
743 * ensure we write the disable before we start disabling the
744 * counters proper, so that amd_pmu_enable_counter() does the
745 * right thing.
746 */
747 barrier();
748
749 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
750 u64 val;
751
752 if (!test_bit(idx, cpuc->active_mask))
753 continue;
754 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
755 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
756 continue;
757 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
758 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
759 }
760}
761
762void hw_perf_disable(void)
763{
764 if (!x86_pmu_initialized())
765 return;
766 return x86_pmu.disable_all();
767}
768
769static void intel_pmu_enable_all(void)
770{
771 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
772}
773
774static void amd_pmu_enable_all(void)
775{
776 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
777 int idx;
778
779 if (cpuc->enabled)
780 return;
781
782 cpuc->enabled = 1;
783 barrier();
784
785 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
786 u64 val;
787
788 if (!test_bit(idx, cpuc->active_mask))
789 continue;
790 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
791 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
792 continue;
793 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
794 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
795 }
796}
797
798void hw_perf_enable(void)
799{
800 if (!x86_pmu_initialized())
801 return;
802 x86_pmu.enable_all();
803}
804
805static inline u64 intel_pmu_get_status(void)
806{
807 u64 status;
808
809 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
810
811 return status;
812}
813
814static inline void intel_pmu_ack_status(u64 ack)
815{
816 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
817}
818
819static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
820{
821 int err;
822 err = checking_wrmsrl(hwc->config_base + idx,
823 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
824}
825
826static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
827{
828 int err;
829 err = checking_wrmsrl(hwc->config_base + idx,
830 hwc->config);
831}
832
833static inline void
834intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
835{
836 int idx = __idx - X86_PMC_IDX_FIXED;
837 u64 ctrl_val, mask;
838 int err;
839
840 mask = 0xfULL << (idx * 4);
841
842 rdmsrl(hwc->config_base, ctrl_val);
843 ctrl_val &= ~mask;
844 err = checking_wrmsrl(hwc->config_base, ctrl_val);
845}
846
847static inline void
848intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
849{
850 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
851 intel_pmu_disable_fixed(hwc, idx);
852 return;
853 }
854
855 x86_pmu_disable_counter(hwc, idx);
856}
857
858static inline void
859amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
860{
861 x86_pmu_disable_counter(hwc, idx);
862}
863
864static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
865
866/*
867 * Set the next IRQ period, based on the hwc->period_left value.
868 * To be called with the counter disabled in hw:
869 */
870static int
871x86_perf_counter_set_period(struct perf_counter *counter,
872 struct hw_perf_counter *hwc, int idx)
873{
874 s64 left = atomic64_read(&hwc->period_left);
875 s64 period = hwc->sample_period;
876 int err, ret = 0;
877
878 /*
879 * If we are way outside a reasoable range then just skip forward:
880 */
881 if (unlikely(left <= -period)) {
882 left = period;
883 atomic64_set(&hwc->period_left, left);
884 hwc->last_period = period;
885 ret = 1;
886 }
887
888 if (unlikely(left <= 0)) {
889 left += period;
890 atomic64_set(&hwc->period_left, left);
891 hwc->last_period = period;
892 ret = 1;
893 }
894 /*
895 * Quirk: certain CPUs dont like it if just 1 event is left:
896 */
897 if (unlikely(left < 2))
898 left = 2;
899
900 if (left > x86_pmu.max_period)
901 left = x86_pmu.max_period;
902
903 per_cpu(prev_left[idx], smp_processor_id()) = left;
904
905 /*
906 * The hw counter starts counting from this counter offset,
907 * mark it to be able to extra future deltas:
908 */
909 atomic64_set(&hwc->prev_count, (u64)-left);
910
911 err = checking_wrmsrl(hwc->counter_base + idx,
912 (u64)(-left) & x86_pmu.counter_mask);
913
914 return ret;
915}
916
917static inline void
918intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
919{
920 int idx = __idx - X86_PMC_IDX_FIXED;
921 u64 ctrl_val, bits, mask;
922 int err;
923
924 /*
925 * Enable IRQ generation (0x8),
926 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
927 * if requested:
928 */
929 bits = 0x8ULL;
930 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
931 bits |= 0x2;
932 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
933 bits |= 0x1;
934 bits <<= (idx * 4);
935 mask = 0xfULL << (idx * 4);
936
937 rdmsrl(hwc->config_base, ctrl_val);
938 ctrl_val &= ~mask;
939 ctrl_val |= bits;
940 err = checking_wrmsrl(hwc->config_base, ctrl_val);
941}
942
943static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
944{
945 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
946 intel_pmu_enable_fixed(hwc, idx);
947 return;
948 }
949
950 x86_pmu_enable_counter(hwc, idx);
951}
952
953static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
954{
955 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
956
957 if (cpuc->enabled)
958 x86_pmu_enable_counter(hwc, idx);
959 else
960 x86_pmu_disable_counter(hwc, idx);
961}
962
963static int
964fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
965{
966 unsigned int event;
967
968 if (!x86_pmu.num_counters_fixed)
969 return -1;
970
971 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
972
973 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
974 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
975 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
976 return X86_PMC_IDX_FIXED_CPU_CYCLES;
977 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
978 return X86_PMC_IDX_FIXED_BUS_CYCLES;
979
980 return -1;
981}
982
983/*
984 * Find a PMC slot for the freshly enabled / scheduled in counter:
985 */
986static int x86_pmu_enable(struct perf_counter *counter)
987{
988 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
989 struct hw_perf_counter *hwc = &counter->hw;
990 int idx;
991
992 idx = fixed_mode_idx(counter, hwc);
993 if (idx >= 0) {
994 /*
995 * Try to get the fixed counter, if that is already taken
996 * then try to get a generic counter:
997 */
998 if (test_and_set_bit(idx, cpuc->used_mask))
999 goto try_generic;
1000
1001 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1002 /*
1003 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
1004 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1005 */
1006 hwc->counter_base =
1007 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1008 hwc->idx = idx;
1009 } else {
1010 idx = hwc->idx;
1011 /* Try to get the previous generic counter again */
1012 if (test_and_set_bit(idx, cpuc->used_mask)) {
1013try_generic:
1014 idx = find_first_zero_bit(cpuc->used_mask,
1015 x86_pmu.num_counters);
1016 if (idx == x86_pmu.num_counters)
1017 return -EAGAIN;
1018
1019 set_bit(idx, cpuc->used_mask);
1020 hwc->idx = idx;
1021 }
1022 hwc->config_base = x86_pmu.eventsel;
1023 hwc->counter_base = x86_pmu.perfctr;
1024 }
1025
1026 perf_counters_lapic_init();
1027
1028 x86_pmu.disable(hwc, idx);
1029
1030 cpuc->counters[idx] = counter;
1031 set_bit(idx, cpuc->active_mask);
1032
1033 x86_perf_counter_set_period(counter, hwc, idx);
1034 x86_pmu.enable(hwc, idx);
1035
1036 return 0;
1037}
1038
1039static void x86_pmu_unthrottle(struct perf_counter *counter)
1040{
1041 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1042 struct hw_perf_counter *hwc = &counter->hw;
1043
1044 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1045 cpuc->counters[hwc->idx] != counter))
1046 return;
1047
1048 x86_pmu.enable(hwc, hwc->idx);
1049}
1050
1051void perf_counter_print_debug(void)
1052{
1053 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1054 struct cpu_hw_counters *cpuc;
1055 unsigned long flags;
1056 int cpu, idx;
1057
1058 if (!x86_pmu.num_counters)
1059 return;
1060
1061 local_irq_save(flags);
1062
1063 cpu = smp_processor_id();
1064 cpuc = &per_cpu(cpu_hw_counters, cpu);
1065
1066 if (x86_pmu.version >= 2) {
1067 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1068 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1069 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1070 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1071
1072 pr_info("\n");
1073 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1074 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1075 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1076 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1077 }
1078 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1079
1080 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1081 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1082 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1083
1084 prev_left = per_cpu(prev_left[idx], cpu);
1085
1086 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1087 cpu, idx, pmc_ctrl);
1088 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
1089 cpu, idx, pmc_count);
1090 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1091 cpu, idx, prev_left);
1092 }
1093 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1094 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1095
1096 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1097 cpu, idx, pmc_count);
1098 }
1099 local_irq_restore(flags);
1100}
1101
1102static void x86_pmu_disable(struct perf_counter *counter)
1103{
1104 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1105 struct hw_perf_counter *hwc = &counter->hw;
1106 int idx = hwc->idx;
1107
1108 /*
1109 * Must be done before we disable, otherwise the nmi handler
1110 * could reenable again:
1111 */
1112 clear_bit(idx, cpuc->active_mask);
1113 x86_pmu.disable(hwc, idx);
1114
1115 /*
1116 * Make sure the cleared pointer becomes visible before we
1117 * (potentially) free the counter:
1118 */
1119 barrier();
1120
1121 /*
1122 * Drain the remaining delta count out of a counter
1123 * that we are disabling:
1124 */
1125 x86_perf_counter_update(counter, hwc, idx);
1126 cpuc->counters[idx] = NULL;
1127 clear_bit(idx, cpuc->used_mask);
1128}
1129
1130/*
1131 * Save and restart an expired counter. Called by NMI contexts,
1132 * so it has to be careful about preempting normal counter ops:
1133 */
1134static int intel_pmu_save_and_restart(struct perf_counter *counter)
1135{
1136 struct hw_perf_counter *hwc = &counter->hw;
1137 int idx = hwc->idx;
1138 int ret;
1139
1140 x86_perf_counter_update(counter, hwc, idx);
1141 ret = x86_perf_counter_set_period(counter, hwc, idx);
1142
1143 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1144 intel_pmu_enable_counter(hwc, idx);
1145
1146 return ret;
1147}
1148
1149static void intel_pmu_reset(void)
1150{
1151 unsigned long flags;
1152 int idx;
1153
1154 if (!x86_pmu.num_counters)
1155 return;
1156
1157 local_irq_save(flags);
1158
1159 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1160
1161 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1162 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1163 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1164 }
1165 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1166 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1167 }
1168
1169 local_irq_restore(flags);
1170}
1171
1172
1173/*
1174 * This handler is triggered by the local APIC, so the APIC IRQ handling
1175 * rules apply:
1176 */
1177static int intel_pmu_handle_irq(struct pt_regs *regs)
1178{
1179 struct perf_sample_data data;
1180 struct cpu_hw_counters *cpuc;
1181 int bit, cpu, loops;
1182 u64 ack, status;
1183
1184 data.regs = regs;
1185 data.addr = 0;
1186
1187 cpu = smp_processor_id();
1188 cpuc = &per_cpu(cpu_hw_counters, cpu);
1189
1190 perf_disable();
1191 status = intel_pmu_get_status();
1192 if (!status) {
1193 perf_enable();
1194 return 0;
1195 }
1196
1197 loops = 0;
1198again:
1199 if (++loops > 100) {
1200 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
1201 perf_counter_print_debug();
1202 intel_pmu_reset();
1203 perf_enable();
1204 return 1;
1205 }
1206
1207 inc_irq_stat(apic_perf_irqs);
1208 ack = status;
1209 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1210 struct perf_counter *counter = cpuc->counters[bit];
1211
1212 clear_bit(bit, (unsigned long *) &status);
1213 if (!test_bit(bit, cpuc->active_mask))
1214 continue;
1215
1216 if (!intel_pmu_save_and_restart(counter))
1217 continue;
1218
1219 if (perf_counter_overflow(counter, 1, &data))
1220 intel_pmu_disable_counter(&counter->hw, bit);
1221 }
1222
1223 intel_pmu_ack_status(ack);
1224
1225 /*
1226 * Repeat if there is more work to be done:
1227 */
1228 status = intel_pmu_get_status();
1229 if (status)
1230 goto again;
1231
1232 perf_enable();
1233
1234 return 1;
1235}
1236
1237static int amd_pmu_handle_irq(struct pt_regs *regs)
1238{
1239 struct perf_sample_data data;
1240 struct cpu_hw_counters *cpuc;
1241 struct perf_counter *counter;
1242 struct hw_perf_counter *hwc;
1243 int cpu, idx, handled = 0;
1244 u64 val;
1245
1246 data.regs = regs;
1247 data.addr = 0;
1248
1249 cpu = smp_processor_id();
1250 cpuc = &per_cpu(cpu_hw_counters, cpu);
1251
1252 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1253 if (!test_bit(idx, cpuc->active_mask))
1254 continue;
1255
1256 counter = cpuc->counters[idx];
1257 hwc = &counter->hw;
1258
1259 val = x86_perf_counter_update(counter, hwc, idx);
1260 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1261 continue;
1262
1263 /*
1264 * counter overflow
1265 */
1266 handled = 1;
1267 data.period = counter->hw.last_period;
1268
1269 if (!x86_perf_counter_set_period(counter, hwc, idx))
1270 continue;
1271
1272 if (perf_counter_overflow(counter, 1, &data))
1273 amd_pmu_disable_counter(hwc, idx);
1274 }
1275
1276 if (handled)
1277 inc_irq_stat(apic_perf_irqs);
1278
1279 return handled;
1280}
1281
1282void smp_perf_pending_interrupt(struct pt_regs *regs)
1283{
1284 irq_enter();
1285 ack_APIC_irq();
1286 inc_irq_stat(apic_pending_irqs);
1287 perf_counter_do_pending();
1288 irq_exit();
1289}
1290
1291void set_perf_counter_pending(void)
1292{
1293 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1294}
1295
1296void perf_counters_lapic_init(void)
1297{
1298 if (!x86_pmu_initialized())
1299 return;
1300
1301 /*
1302 * Always use NMI for PMU
1303 */
1304 apic_write(APIC_LVTPC, APIC_DM_NMI);
1305}
1306
1307static int __kprobes
1308perf_counter_nmi_handler(struct notifier_block *self,
1309 unsigned long cmd, void *__args)
1310{
1311 struct die_args *args = __args;
1312 struct pt_regs *regs;
1313
1314 if (!atomic_read(&active_counters))
1315 return NOTIFY_DONE;
1316
1317 switch (cmd) {
1318 case DIE_NMI:
1319 case DIE_NMI_IPI:
1320 break;
1321
1322 default:
1323 return NOTIFY_DONE;
1324 }
1325
1326 regs = args->regs;
1327
1328 apic_write(APIC_LVTPC, APIC_DM_NMI);
1329 /*
1330 * Can't rely on the handled return value to say it was our NMI, two
1331 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1332 *
1333 * If the first NMI handles both, the latter will be empty and daze
1334 * the CPU.
1335 */
1336 x86_pmu.handle_irq(regs);
1337
1338 return NOTIFY_STOP;
1339}
1340
1341static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1342 .notifier_call = perf_counter_nmi_handler,
1343 .next = NULL,
1344 .priority = 1
1345};
1346
1347static struct x86_pmu intel_pmu = {
1348 .name = "Intel",
1349 .handle_irq = intel_pmu_handle_irq,
1350 .disable_all = intel_pmu_disable_all,
1351 .enable_all = intel_pmu_enable_all,
1352 .enable = intel_pmu_enable_counter,
1353 .disable = intel_pmu_disable_counter,
1354 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1355 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1356 .event_map = intel_pmu_event_map,
1357 .raw_event = intel_pmu_raw_event,
1358 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1359 /*
1360 * Intel PMCs cannot be accessed sanely above 32 bit width,
1361 * so we install an artificial 1<<31 period regardless of
1362 * the generic counter period:
1363 */
1364 .max_period = (1ULL << 31) - 1,
1365};
1366
1367static struct x86_pmu amd_pmu = {
1368 .name = "AMD",
1369 .handle_irq = amd_pmu_handle_irq,
1370 .disable_all = amd_pmu_disable_all,
1371 .enable_all = amd_pmu_enable_all,
1372 .enable = amd_pmu_enable_counter,
1373 .disable = amd_pmu_disable_counter,
1374 .eventsel = MSR_K7_EVNTSEL0,
1375 .perfctr = MSR_K7_PERFCTR0,
1376 .event_map = amd_pmu_event_map,
1377 .raw_event = amd_pmu_raw_event,
1378 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1379 .num_counters = 4,
1380 .counter_bits = 48,
1381 .counter_mask = (1ULL << 48) - 1,
1382 /* use highest bit to detect overflow */
1383 .max_period = (1ULL << 47) - 1,
1384};
1385
1386static int intel_pmu_init(void)
1387{
1388 union cpuid10_edx edx;
1389 union cpuid10_eax eax;
1390 unsigned int unused;
1391 unsigned int ebx;
1392 int version;
1393
1394 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
1395 return -ENODEV;
1396
1397 /*
1398 * Check whether the Architectural PerfMon supports
1399 * Branch Misses Retired Event or not.
1400 */
1401 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1402 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1403 return -ENODEV;
1404
1405 version = eax.split.version_id;
1406 if (version < 2)
1407 return -ENODEV;
1408
1409 x86_pmu = intel_pmu;
1410 x86_pmu.version = version;
1411 x86_pmu.num_counters = eax.split.num_counters;
1412 x86_pmu.counter_bits = eax.split.bit_width;
1413 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
1414
1415 /*
1416 * Quirk: v2 perfmon does not report fixed-purpose counters, so
1417 * assume at least 3 counters:
1418 */
1419 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1420
1421 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1422
1423 /*
1424 * Install the hw-cache-events table:
1425 */
1426 switch (boot_cpu_data.x86_model) {
1427 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1428 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1429 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1430 case 29: /* six-core 45 nm xeon "Dunnington" */
1431 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1432 sizeof(hw_cache_event_ids));
1433
1434 pr_cont("Core2 events, ");
1435 break;
1436 default:
1437 case 26:
1438 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1439 sizeof(hw_cache_event_ids));
1440
1441 pr_cont("Nehalem/Corei7 events, ");
1442 break;
1443 case 28:
1444 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1445 sizeof(hw_cache_event_ids));
1446
1447 pr_cont("Atom events, ");
1448 break;
1449 }
1450 return 0;
1451}
1452
1453static int amd_pmu_init(void)
1454{
1455 x86_pmu = amd_pmu;
1456
1457 switch (boot_cpu_data.x86) {
1458 case 0x0f:
1459 case 0x10:
1460 case 0x11:
1461 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1462 sizeof(hw_cache_event_ids));
1463
1464 pr_cont("AMD Family 0f/10/11 events, ");
1465 break;
1466 }
1467 return 0;
1468}
1469
1470void __init init_hw_perf_counters(void)
1471{
1472 int err;
1473
1474 pr_info("Performance Counters: ");
1475
1476 switch (boot_cpu_data.x86_vendor) {
1477 case X86_VENDOR_INTEL:
1478 err = intel_pmu_init();
1479 break;
1480 case X86_VENDOR_AMD:
1481 err = amd_pmu_init();
1482 break;
1483 default:
1484 return;
1485 }
1486 if (err != 0) {
1487 pr_cont("no PMU driver, software counters only.\n");
1488 return;
1489 }
1490
1491 pr_cont("%s PMU driver.\n", x86_pmu.name);
1492
1493 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1494 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1495 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1496 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1497 }
1498 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1499 perf_max_counters = x86_pmu.num_counters;
1500
1501 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1502 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1503 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1504 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1505 }
1506
1507 perf_counter_mask |=
1508 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1509
1510 perf_counters_lapic_init();
1511 register_die_notifier(&perf_counter_nmi_notifier);
1512
1513 pr_info("... version: %d\n", x86_pmu.version);
1514 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1515 pr_info("... generic counters: %d\n", x86_pmu.num_counters);
1516 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
1517 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1518 pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed);
1519 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1520}
1521
1522static inline void x86_pmu_read(struct perf_counter *counter)
1523{
1524 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1525}
1526
1527static const struct pmu pmu = {
1528 .enable = x86_pmu_enable,
1529 .disable = x86_pmu_disable,
1530 .read = x86_pmu_read,
1531 .unthrottle = x86_pmu_unthrottle,
1532};
1533
1534const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1535{
1536 int err;
1537
1538 err = __hw_perf_counter_init(counter);
1539 if (err)
1540 return ERR_PTR(err);
1541
1542 return &pmu;
1543}
1544
1545/*
1546 * callchain support
1547 */
1548
1549static inline
1550void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1551{
1552 if (entry->nr < MAX_STACK_DEPTH)
1553 entry->ip[entry->nr++] = ip;
1554}
1555
1556static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1557static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1558
1559
1560static void
1561backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1562{
1563 /* Ignore warnings */
1564}
1565
1566static void backtrace_warning(void *data, char *msg)
1567{
1568 /* Ignore warnings */
1569}
1570
1571static int backtrace_stack(void *data, char *name)
1572{
1573 /* Don't bother with IRQ stacks for now */
1574 return -1;
1575}
1576
1577static void backtrace_address(void *data, unsigned long addr, int reliable)
1578{
1579 struct perf_callchain_entry *entry = data;
1580
1581 if (reliable)
1582 callchain_store(entry, addr);
1583}
1584
1585static const struct stacktrace_ops backtrace_ops = {
1586 .warning = backtrace_warning,
1587 .warning_symbol = backtrace_warning_symbol,
1588 .stack = backtrace_stack,
1589 .address = backtrace_address,
1590};
1591
1592static void
1593perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1594{
1595 unsigned long bp;
1596 char *stack;
1597 int nr = entry->nr;
1598
1599 callchain_store(entry, instruction_pointer(regs));
1600
1601 stack = ((char *)regs + sizeof(struct pt_regs));
1602#ifdef CONFIG_FRAME_POINTER
1603 bp = frame_pointer(regs);
1604#else
1605 bp = 0;
1606#endif
1607
1608 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
1609
1610 entry->kernel = entry->nr - nr;
1611}
1612
1613
1614struct stack_frame {
1615 const void __user *next_fp;
1616 unsigned long return_address;
1617};
1618
1619static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1620{
1621 int ret;
1622
1623 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1624 return 0;
1625
1626 ret = 1;
1627 pagefault_disable();
1628 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1629 ret = 0;
1630 pagefault_enable();
1631
1632 return ret;
1633}
1634
1635static void
1636perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1637{
1638 struct stack_frame frame;
1639 const void __user *fp;
1640 int nr = entry->nr;
1641
1642 regs = (struct pt_regs *)current->thread.sp0 - 1;
1643 fp = (void __user *)regs->bp;
1644
1645 callchain_store(entry, regs->ip);
1646
1647 while (entry->nr < MAX_STACK_DEPTH) {
1648 frame.next_fp = NULL;
1649 frame.return_address = 0;
1650
1651 if (!copy_stack_frame(fp, &frame))
1652 break;
1653
1654 if ((unsigned long)fp < user_stack_pointer(regs))
1655 break;
1656
1657 callchain_store(entry, frame.return_address);
1658 fp = frame.next_fp;
1659 }
1660
1661 entry->user = entry->nr - nr;
1662}
1663
1664static void
1665perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1666{
1667 int is_user;
1668
1669 if (!regs)
1670 return;
1671
1672 is_user = user_mode(regs);
1673
1674 if (!current || current->pid == 0)
1675 return;
1676
1677 if (is_user && current->state != TASK_RUNNING)
1678 return;
1679
1680 if (!is_user)
1681 perf_callchain_kernel(regs, entry);
1682
1683 if (current->mm)
1684 perf_callchain_user(regs, entry);
1685}
1686
1687struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1688{
1689 struct perf_callchain_entry *entry;
1690
1691 if (in_nmi())
1692 entry = &__get_cpu_var(nmi_entry);
1693 else
1694 entry = &__get_cpu_var(irq_entry);
1695
1696 entry->nr = 0;
1697 entry->hv = 0;
1698 entry->kernel = 0;
1699 entry->user = 0;
1700
1701 perf_do_callchain(regs, entry);
1702
1703 return entry;
1704}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f6c70a164e32..d6f5b9fbde32 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,8 +19,8 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/genapic.h> 22#include <asm/apic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/perf_counter.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr; 26 unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1c17d7c751a4..a4742a340d8d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1012,6 +1012,11 @@ apicinterrupt ERROR_APIC_VECTOR \
1012apicinterrupt SPURIOUS_APIC_VECTOR \ 1012apicinterrupt SPURIOUS_APIC_VECTOR \
1013 spurious_interrupt smp_spurious_interrupt 1013 spurious_interrupt smp_spurious_interrupt
1014 1014
1015#ifdef CONFIG_PERF_COUNTERS
1016apicinterrupt LOCAL_PENDING_VECTOR \
1017 perf_pending_interrupt smp_perf_pending_interrupt
1018#endif
1019
1015/* 1020/*
1016 * Exception entry points. 1021 * Exception entry points.
1017 */ 1022 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9a391bbb8ba8..38287b5f116e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -62,6 +62,14 @@ static int show_other_interrupts(struct seq_file *p, int prec)
62 for_each_online_cpu(j) 62 for_each_online_cpu(j)
63 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 63 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
64 seq_printf(p, " Spurious interrupts\n"); 64 seq_printf(p, " Spurious interrupts\n");
65 seq_printf(p, "%*s: ", prec, "CNT");
66 for_each_online_cpu(j)
67 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
68 seq_printf(p, " Performance counter interrupts\n");
69 seq_printf(p, "%*s: ", prec, "PND");
70 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
72 seq_printf(p, " Performance pending work\n");
65#endif 73#endif
66 if (generic_interrupt_extension) { 74 if (generic_interrupt_extension) {
67 seq_printf(p, "%*s: ", prec, "PLT"); 75 seq_printf(p, "%*s: ", prec, "PLT");
@@ -165,6 +173,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
165#ifdef CONFIG_X86_LOCAL_APIC 173#ifdef CONFIG_X86_LOCAL_APIC
166 sum += irq_stats(cpu)->apic_timer_irqs; 174 sum += irq_stats(cpu)->apic_timer_irqs;
167 sum += irq_stats(cpu)->irq_spurious_count; 175 sum += irq_stats(cpu)->irq_spurious_count;
176 sum += irq_stats(cpu)->apic_perf_irqs;
177 sum += irq_stats(cpu)->apic_pending_irqs;
168#endif 178#endif
169 if (generic_interrupt_extension) 179 if (generic_interrupt_extension)
170 sum += irq_stats(cpu)->generic_irqs; 180 sum += irq_stats(cpu)->generic_irqs;
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 2e08b10ad51a..267c6624c77f 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -181,10 +181,15 @@ static void __init apic_intr_init(void)
181{ 181{
182 smp_intr_init(); 182 smp_intr_init();
183 183
184#ifdef CONFIG_X86_64 184#ifdef CONFIG_X86_THERMAL_VECTOR
185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
186#endif
187#ifdef CONFIG_X86_THRESHOLD
186 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 188 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
187#endif 189#endif
190#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
191 alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
192#endif
188 193
189#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 194#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
190 /* self generated IPI for local APIC timer */ 195 /* self generated IPI for local APIC timer */
@@ -199,18 +204,10 @@ static void __init apic_intr_init(void)
199 204
200 /* Performance monitoring interrupts: */ 205 /* Performance monitoring interrupts: */
201# ifdef CONFIG_PERF_COUNTERS 206# ifdef CONFIG_PERF_COUNTERS
202 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
203 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 207 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
204# endif 208# endif
205 209
206#endif 210#endif
207
208#ifdef CONFIG_X86_32
209#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
210 /* thermal monitor LVT interrupt */
211 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
212#endif
213#endif
214} 211}
215 212
216/** 213/**
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 14425166b8e3..0a813b17b172 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen 7 * 2000-2002 x86-64 support by Andi Kleen
8 */ 8 */
9
10#include <linux/sched.h> 9#include <linux/sched.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 734f92c02dde..d51321ddafda 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,3 +335,4 @@ ENTRY(sys_call_table)
335 .long sys_preadv 335 .long sys_preadv
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */ 337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_counter_open
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ede024531f8f..07d60c870ce2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -942,8 +942,13 @@ void __init trap_init(void)
942#endif 942#endif
943 set_intr_gate(19, &simd_coprocessor_error); 943 set_intr_gate(19, &simd_coprocessor_error);
944 944
945 /* Reserve all the builtin and the syscall vector: */
946 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
947 set_bit(i, used_vectors);
948
945#ifdef CONFIG_IA32_EMULATION 949#ifdef CONFIG_IA32_EMULATION
946 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 950 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
951 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
947#endif 952#endif
948 953
949#ifdef CONFIG_X86_32 954#ifdef CONFIG_X86_32
@@ -960,14 +965,9 @@ void __init trap_init(void)
960 } 965 }
961 966
962 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 967 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
968 set_bit(SYSCALL_VECTOR, used_vectors);
963#endif 969#endif
964 970
965 /* Reserve all the builtin and the syscall vector: */
966 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
967 set_bit(i, used_vectors);
968
969 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
970
971 /* 971 /*
972 * Should be a barrier for any external CPU state: 972 * Should be a barrier for any external CPU state:
973 */ 973 */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 5ec7ae366615..c6acc6326374 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
10#include <linux/bootmem.h> /* max_low_pfn */ 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> /* __kprobes, ... */ 11#include <linux/kprobes.h> /* __kprobes, ... */
12#include <linux/mmiotrace.h> /* kmmio_handler, ... */ 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/perf_counter.h> /* perf_swcounter_event */
13 14
14#include <asm/traps.h> /* dotraplinkage, ... */ 15#include <asm/traps.h> /* dotraplinkage, ... */
15#include <asm/pgalloc.h> /* pgd_*(), ... */ 16#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -1013,6 +1014,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1013 if (unlikely(error_code & PF_RSVD)) 1014 if (unlikely(error_code & PF_RSVD))
1014 pgtable_bad(regs, error_code, address); 1015 pgtable_bad(regs, error_code, address);
1015 1016
1017 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
1018
1016 /* 1019 /*
1017 * If we're in an interrupt, have no user context or are running 1020 * If we're in an interrupt, have no user context or are running
1018 * in an atomic region then we must not take the fault: 1021 * in an atomic region then we must not take the fault:
@@ -1106,10 +1109,15 @@ good_area:
1106 return; 1109 return;
1107 } 1110 }
1108 1111
1109 if (fault & VM_FAULT_MAJOR) 1112 if (fault & VM_FAULT_MAJOR) {
1110 tsk->maj_flt++; 1113 tsk->maj_flt++;
1111 else 1114 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
1115 regs, address);
1116 } else {
1112 tsk->min_flt++; 1117 tsk->min_flt++;
1118 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
1119 regs, address);
1120 }
1113 1121
1114 check_v8086_mode(regs, address, tsk); 1122 check_v8086_mode(regs, address, tsk);
1115 1123
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3b285e656e27..b07dd8d0b321 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self,
40 40
41 switch (val) { 41 switch (val) {
42 case DIE_NMI: 42 case DIE_NMI:
43 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) 43 case DIE_NMI_IPI:
44 ret = NOTIFY_STOP; 44 model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
45 ret = NOTIFY_STOP;
45 break; 46 break;
46 default: 47 default:
47 break; 48 break;
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
134static struct notifier_block profile_exceptions_nb = { 135static struct notifier_block profile_exceptions_nb = {
135 .notifier_call = profile_exceptions_notify, 136 .notifier_call = profile_exceptions_notify,
136 .next = NULL, 137 .next = NULL,
137 .priority = 0 138 .priority = 2
138}; 139};
139 140
140static int nmi_setup(void) 141static int nmi_setup(void)
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fbdaada..4da7230b3d17 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/apic.h> 19#include <asm/apic.h>
20#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h> 21#include <asm/perf_counter.h>
22 22
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24#include "op_counter.h" 24#include "op_counter.h"
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
136 u64 val; 136 u64 val;
137 int i; 137 int i;
138 138
139 /*
140 * This can happen if perf counters are in use when
141 * we steal the die notifier NMI.
142 */
143 if (unlikely(!reset_value))
144 goto out;
145
139 for (i = 0 ; i < num_counters; ++i) { 146 for (i = 0 ; i < num_counters; ++i) {
140 if (!reset_value[i]) 147 if (!reset_value[i])
141 continue; 148 continue;
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
146 } 153 }
147 } 154 }
148 155
156out:
149 /* Only P6 based Pentium M need to re-unmask the apic vector but it 157 /* Only P6 based Pentium M need to re-unmask the apic vector but it
150 * doesn't hurt other P6 variant */ 158 * doesn't hurt other P6 variant */
151 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 159 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 1241f118ab56..58bc00f68b12 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
338 } 338 }
339 } 339 }
340 340
341 current->mm->context.vdso = (void *)addr;
342
341 if (compat_uses_vma || !compat) { 343 if (compat_uses_vma || !compat) {
342 /* 344 /*
343 * MAYWRITE to allow gdb to COW and set breakpoints 345 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
358 goto up_fail; 360 goto up_fail;
359 } 361 }
360 362
361 current->mm->context.vdso = (void *)addr;
362 current_thread_info()->sysenter_return = 363 current_thread_info()->sysenter_return =
363 VDSO32_SYMBOL(addr, SYSENTER_RETURN); 364 VDSO32_SYMBOL(addr, SYSENTER_RETURN);
364 365
365 up_fail: 366 up_fail:
367 if (ret)
368 current->mm->context.vdso = NULL;
369
366 up_write(&mm->mmap_sem); 370 up_write(&mm->mmap_sem);
367 371
368 return ret; 372 return ret;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index cac083386e03..21e1aeb9f3ea 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -116,15 +116,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
116 goto up_fail; 116 goto up_fail;
117 } 117 }
118 118
119 current->mm->context.vdso = (void *)addr;
120
119 ret = install_special_mapping(mm, addr, vdso_size, 121 ret = install_special_mapping(mm, addr, vdso_size,
120 VM_READ|VM_EXEC| 122 VM_READ|VM_EXEC|
121 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 123 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
122 VM_ALWAYSDUMP, 124 VM_ALWAYSDUMP,
123 vdso_pages); 125 vdso_pages);
124 if (ret) 126 if (ret) {
127 current->mm->context.vdso = NULL;
125 goto up_fail; 128 goto up_fail;
129 }
126 130
127 current->mm->context.vdso = (void *)addr;
128up_fail: 131up_fail:
129 up_write(&mm->mmap_sem); 132 up_write(&mm->mmap_sem);
130 return ret; 133 return ret;