aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/hw_irq.h39
-rw-r--r--arch/powerpc/include/asm/paca.h1
-rw-r--r--arch/powerpc/include/asm/perf_counter.h72
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/entry_64.S9
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/perf_counter.c846
-rw-r--r--arch/powerpc/kernel/power4-pmu.c557
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c452
-rw-r--r--arch/powerpc/kernel/power5-pmu.c475
-rw-r--r--arch/powerpc/kernel/power6-pmu.c283
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c375
-rw-r--r--arch/powerpc/mm/fault.c8
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/ia32/ia32entry.S3
-rw-r--r--arch/x86/include/asm/atomic_32.h236
-rw-r--r--arch/x86/include/asm/entry_arch.h1
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h3
-rw-r--r--arch/x86/include/asm/intel_arch_perfmon.h31
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/perf_counter.h100
-rw-r--r--arch/x86/include/asm/unistd_32.h1
-rw-r--r--arch/x86/include/asm/unistd_64.h3
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile12
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c1213
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/entry_64.S7
-rw-r--r--arch/x86/kernel/irq.c10
-rw-r--r--arch/x86/kernel/irqinit_32.c60
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/traps.c15
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/oprofile/nmi_int.c7
-rw-r--r--arch/x86/oprofile/op_model_ppro.c10
44 files changed, 4805 insertions, 84 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7e034b0a6dd..20a44d0c9fdd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
131 */ 131 */
132struct irq_chip; 132struct irq_chip;
133 133
134#ifdef CONFIG_PERF_COUNTERS
135static inline unsigned long test_perf_counter_pending(void)
136{
137 unsigned long x;
138
139 asm volatile("lbz %0,%1(13)"
140 : "=r" (x)
141 : "i" (offsetof(struct paca_struct, perf_counter_pending)));
142 return x;
143}
144
145static inline void set_perf_counter_pending(void)
146{
147 asm volatile("stb %0,%1(13)" : :
148 "r" (1),
149 "i" (offsetof(struct paca_struct, perf_counter_pending)));
150}
151
152static inline void clear_perf_counter_pending(void)
153{
154 asm volatile("stb %0,%1(13)" : :
155 "r" (0),
156 "i" (offsetof(struct paca_struct, perf_counter_pending)));
157}
158
159extern void perf_counter_do_pending(void);
160
161#else
162
163static inline unsigned long test_perf_counter_pending(void)
164{
165 return 0;
166}
167
168static inline void set_perf_counter_pending(void) {}
169static inline void clear_perf_counter_pending(void) {}
170static inline void perf_counter_do_pending(void) {}
171#endif /* CONFIG_PERF_COUNTERS */
172
134#endif /* __KERNEL__ */ 173#endif /* __KERNEL__ */
135#endif /* _ASM_POWERPC_HW_IRQ_H */ 174#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3aedf145..6ef055723019 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@ struct paca_struct {
99 u8 soft_enabled; /* irq soft-enable flag */ 99 u8 soft_enabled; /* irq soft-enable flag */
100 u8 hard_enabled; /* set if irqs are enabled in MSR */ 100 u8 hard_enabled; /* set if irqs are enabled in MSR */
101 u8 io_sync; /* writel() needs spin_unlock sync */ 101 u8 io_sync; /* writel() needs spin_unlock sync */
102 u8 perf_counter_pending; /* PM interrupt while soft-disabled */
102 103
103 /* Stuff for accurate time accounting */ 104 /* Stuff for accurate time accounting */
104 u64 user_time; /* accumulated usermode TB ticks */ 105 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 000000000000..9d7ff6d7fb56
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,72 @@
1/*
2 * Performance counter support - PowerPC-specific definitions.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/types.h>
12
13#define MAX_HWCOUNTERS 8
14#define MAX_EVENT_ALTERNATIVES 8
15
16/*
17 * This struct provides the constants and functions needed to
18 * describe the PMU on a particular POWER-family CPU.
19 */
20struct power_pmu {
21 int n_counter;
22 int max_alternatives;
23 u64 add_fields;
24 u64 test_adder;
25 int (*compute_mmcr)(unsigned int events[], int n_ev,
26 unsigned int hwc[], u64 mmcr[]);
27 int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
28 int (*get_alternatives)(unsigned int event, unsigned int alt[]);
29 void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
30 int n_generic;
31 int *generic_events;
32};
33
34extern struct power_pmu *ppmu;
35
36/*
37 * The power_pmu.get_constraint function returns a 64-bit value and
38 * a 64-bit mask that express the constraints between this event and
39 * other events.
40 *
41 * The value and mask are divided up into (non-overlapping) bitfields
42 * of three different types:
43 *
44 * Select field: this expresses the constraint that some set of bits
45 * in MMCR* needs to be set to a specific value for this event. For a
46 * select field, the mask contains 1s in every bit of the field, and
47 * the value contains a unique value for each possible setting of the
48 * MMCR* bits. The constraint checking code will ensure that two events
49 * that set the same field in their masks have the same value in their
50 * value dwords.
51 *
52 * Add field: this expresses the constraint that there can be at most
53 * N events in a particular class. A field of k bits can be used for
54 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
55 * set (and the other bits 0), and the value has only the least significant
56 * bit of the field set. In addition, the 'add_fields' and 'test_adder'
57 * in the struct power_pmu for this processor come into play. The
58 * add_fields value contains 1 in the LSB of the field, and the
59 * test_adder contains 2^(k-1) - 1 - N in the field.
60 *
61 * NAND field: this expresses the constraint that you may not have events
62 * in all of a set of classes. (For example, on PPC970, you can't select
63 * events from the FPU, ISU and IDU simultaneously, although any two are
64 * possible.) For N classes, the field is N+1 bits wide, and each class
65 * is assigned one bit from the least-significant N bits. The mask has
66 * only the most-significant bit set, and the value has only the bit
67 * for the event's class set. The test_adder has the least significant
68 * bit set in the field.
69 *
70 * If an event is not subject to the constraint expressed by a particular
71 * field, then it will have 0 in both the mask and value for that field.
72 */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index fe166491e9dc..affa8caed7eb 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,3 +322,4 @@ SYSCALL_SPU(epoll_create1)
322SYSCALL_SPU(dup3) 322SYSCALL_SPU(dup3)
323SYSCALL_SPU(pipe2) 323SYSCALL_SPU(pipe2)
324SYSCALL(inotify_init1) 324SYSCALL(inotify_init1)
325SYSCALL_SPU(perf_counter_open)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index e07d0c76ed77..7cef5afe89d8 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,10 +341,11 @@
341#define __NR_dup3 316 341#define __NR_dup3 316
342#define __NR_pipe2 317 342#define __NR_pipe2 317
343#define __NR_inotify_init1 318 343#define __NR_inotify_init1 318
344#define __NR_perf_counter_open 319
344 345
345#ifdef __KERNEL__ 346#ifdef __KERNEL__
346 347
347#define __NR_syscalls 319 348#define __NR_syscalls 320
348 349
349#define __NR__exit __NR_exit 350#define __NR__exit __NR_exit
350#define NR_syscalls __NR_syscalls 351#define NR_syscalls __NR_syscalls
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fbda4a5..9ba1bb731fcc 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,8 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
94 94
95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
97obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
98 power5-pmu.o power5+-pmu.o power6-pmu.o
97 99
98obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 100obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
99 101
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc053946..e981d1ce1914 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@ int main(void)
131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); 131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); 132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); 133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
134 DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
134 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 135 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
135 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 136 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
136 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 137 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc32330479..43e073477c34 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
5262: 5262:
527 TRACE_AND_RESTORE_IRQ(r5); 527 TRACE_AND_RESTORE_IRQ(r5);
528 528
529#ifdef CONFIG_PERF_COUNTERS
530 /* check paca->perf_counter_pending if we're enabling ints */
531 lbz r3,PACAPERFPEND(r13)
532 and. r3,r3,r5
533 beq 27f
534 bl .perf_counter_do_pending
53527:
536#endif /* CONFIG_PERF_COUNTERS */
537
529 /* extract EE bit and use it to restore paca->hard_enabled */ 538 /* extract EE bit and use it to restore paca->hard_enabled */
530 ld r3,_MSR(r1) 539 ld r3,_MSR(r1)
531 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 540 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5576147e57b6..2cd471f92fe6 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
135 iseries_handle_interrupts(); 135 iseries_handle_interrupts();
136 } 136 }
137 137
138 if (test_perf_counter_pending()) {
139 clear_perf_counter_pending();
140 perf_counter_do_pending();
141 }
142
138 /* 143 /*
139 * if (get_paca()->hard_enabled) return; 144 * if (get_paca()->hard_enabled) return;
140 * But again we need to take care that gcc gets hard_enabled directly 145 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 000000000000..f88c35d0710a
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,846 @@
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18#include <asm/machdep.h>
19#include <asm/firmware.h>
20
21struct cpu_hw_counters {
22 int n_counters;
23 int n_percpu;
24 int disabled;
25 int n_added;
26 struct perf_counter *counter[MAX_HWCOUNTERS];
27 unsigned int events[MAX_HWCOUNTERS];
28 u64 mmcr[3];
29 u8 pmcs_enabled;
30};
31DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
32
33struct power_pmu *ppmu;
34
35/*
36 * Normally, to ignore kernel events we set the FCS (freeze counters
37 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
38 * hypervisor bit set in the MSR, or if we are running on a processor
39 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
40 * then we need to use the FCHV bit to ignore kernel events.
41 */
42static unsigned int freeze_counters_kernel = MMCR0_FCS;
43
44static void perf_counter_interrupt(struct pt_regs *regs);
45
46void perf_counter_print_debug(void)
47{
48}
49
50/*
51 * Read one performance monitor counter (PMC).
52 */
53static unsigned long read_pmc(int idx)
54{
55 unsigned long val;
56
57 switch (idx) {
58 case 1:
59 val = mfspr(SPRN_PMC1);
60 break;
61 case 2:
62 val = mfspr(SPRN_PMC2);
63 break;
64 case 3:
65 val = mfspr(SPRN_PMC3);
66 break;
67 case 4:
68 val = mfspr(SPRN_PMC4);
69 break;
70 case 5:
71 val = mfspr(SPRN_PMC5);
72 break;
73 case 6:
74 val = mfspr(SPRN_PMC6);
75 break;
76 case 7:
77 val = mfspr(SPRN_PMC7);
78 break;
79 case 8:
80 val = mfspr(SPRN_PMC8);
81 break;
82 default:
83 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
84 val = 0;
85 }
86 return val;
87}
88
89/*
90 * Write one PMC.
91 */
92static void write_pmc(int idx, unsigned long val)
93{
94 switch (idx) {
95 case 1:
96 mtspr(SPRN_PMC1, val);
97 break;
98 case 2:
99 mtspr(SPRN_PMC2, val);
100 break;
101 case 3:
102 mtspr(SPRN_PMC3, val);
103 break;
104 case 4:
105 mtspr(SPRN_PMC4, val);
106 break;
107 case 5:
108 mtspr(SPRN_PMC5, val);
109 break;
110 case 6:
111 mtspr(SPRN_PMC6, val);
112 break;
113 case 7:
114 mtspr(SPRN_PMC7, val);
115 break;
116 case 8:
117 mtspr(SPRN_PMC8, val);
118 break;
119 default:
120 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
121 }
122}
123
124/*
125 * Check if a set of events can all go on the PMU at once.
126 * If they can't, this will look at alternative codes for the events
127 * and see if any combination of alternative codes is feasible.
128 * The feasible set is returned in event[].
129 */
130static int power_check_constraints(unsigned int event[], int n_ev)
131{
132 u64 mask, value, nv;
133 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
134 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
135 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
136 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
137 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
138 int i, j;
139 u64 addf = ppmu->add_fields;
140 u64 tadd = ppmu->test_adder;
141
142 if (n_ev > ppmu->n_counter)
143 return -1;
144
145 /* First see if the events will go on as-is */
146 for (i = 0; i < n_ev; ++i) {
147 alternatives[i][0] = event[i];
148 if (ppmu->get_constraint(event[i], &amasks[i][0],
149 &avalues[i][0]))
150 return -1;
151 choice[i] = 0;
152 }
153 value = mask = 0;
154 for (i = 0; i < n_ev; ++i) {
155 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
156 if ((((nv + tadd) ^ value) & mask) != 0 ||
157 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
158 break;
159 value = nv;
160 mask |= amasks[i][0];
161 }
162 if (i == n_ev)
163 return 0; /* all OK */
164
165 /* doesn't work, gather alternatives... */
166 if (!ppmu->get_alternatives)
167 return -1;
168 for (i = 0; i < n_ev; ++i) {
169 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
170 for (j = 1; j < n_alt[i]; ++j)
171 ppmu->get_constraint(alternatives[i][j],
172 &amasks[i][j], &avalues[i][j]);
173 }
174
175 /* enumerate all possibilities and see if any will work */
176 i = 0;
177 j = -1;
178 value = mask = nv = 0;
179 while (i < n_ev) {
180 if (j >= 0) {
181 /* we're backtracking, restore context */
182 value = svalues[i];
183 mask = smasks[i];
184 j = choice[i];
185 }
186 /*
187 * See if any alternative k for event i,
188 * where k > j, will satisfy the constraints.
189 */
190 while (++j < n_alt[i]) {
191 nv = (value | avalues[i][j]) +
192 (value & avalues[i][j] & addf);
193 if ((((nv + tadd) ^ value) & mask) == 0 &&
194 (((nv + tadd) ^ avalues[i][j])
195 & amasks[i][j]) == 0)
196 break;
197 }
198 if (j >= n_alt[i]) {
199 /*
200 * No feasible alternative, backtrack
201 * to event i-1 and continue enumerating its
202 * alternatives from where we got up to.
203 */
204 if (--i < 0)
205 return -1;
206 } else {
207 /*
208 * Found a feasible alternative for event i,
209 * remember where we got up to with this event,
210 * go on to the next event, and start with
211 * the first alternative for it.
212 */
213 choice[i] = j;
214 svalues[i] = value;
215 smasks[i] = mask;
216 value = nv;
217 mask |= amasks[i][j];
218 ++i;
219 j = -1;
220 }
221 }
222
223 /* OK, we have a feasible combination, tell the caller the solution */
224 for (i = 0; i < n_ev; ++i)
225 event[i] = alternatives[i][choice[i]];
226 return 0;
227}
228
229/*
230 * Check if newly-added counters have consistent settings for
231 * exclude_{user,kernel,hv} with each other and any previously
232 * added counters.
233 */
234static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
235{
236 int eu, ek, eh;
237 int i, n;
238 struct perf_counter *counter;
239
240 n = n_prev + n_new;
241 if (n <= 1)
242 return 0;
243
244 eu = ctrs[0]->hw_event.exclude_user;
245 ek = ctrs[0]->hw_event.exclude_kernel;
246 eh = ctrs[0]->hw_event.exclude_hv;
247 if (n_prev == 0)
248 n_prev = 1;
249 for (i = n_prev; i < n; ++i) {
250 counter = ctrs[i];
251 if (counter->hw_event.exclude_user != eu ||
252 counter->hw_event.exclude_kernel != ek ||
253 counter->hw_event.exclude_hv != eh)
254 return -EAGAIN;
255 }
256 return 0;
257}
258
259static void power_perf_read(struct perf_counter *counter)
260{
261 long val, delta, prev;
262
263 if (!counter->hw.idx)
264 return;
265 /*
266 * Performance monitor interrupts come even when interrupts
267 * are soft-disabled, as long as interrupts are hard-enabled.
268 * Therefore we treat them like NMIs.
269 */
270 do {
271 prev = atomic64_read(&counter->hw.prev_count);
272 barrier();
273 val = read_pmc(counter->hw.idx);
274 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
275
276 /* The counters are only 32 bits wide */
277 delta = (val - prev) & 0xfffffffful;
278 atomic64_add(delta, &counter->count);
279 atomic64_sub(delta, &counter->hw.period_left);
280}
281
282/*
283 * Disable all counters to prevent PMU interrupts and to allow
284 * counters to be added or removed.
285 */
286u64 hw_perf_save_disable(void)
287{
288 struct cpu_hw_counters *cpuhw;
289 unsigned long ret;
290 unsigned long flags;
291
292 local_irq_save(flags);
293 cpuhw = &__get_cpu_var(cpu_hw_counters);
294
295 ret = cpuhw->disabled;
296 if (!ret) {
297 cpuhw->disabled = 1;
298 cpuhw->n_added = 0;
299
300 /*
301 * Check if we ever enabled the PMU on this cpu.
302 */
303 if (!cpuhw->pmcs_enabled) {
304 if (ppc_md.enable_pmcs)
305 ppc_md.enable_pmcs();
306 cpuhw->pmcs_enabled = 1;
307 }
308
309 /*
310 * Set the 'freeze counters' bit.
311 * The barrier is to make sure the mtspr has been
312 * executed and the PMU has frozen the counters
313 * before we return.
314 */
315 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
316 mb();
317 }
318 local_irq_restore(flags);
319 return ret;
320}
321
322/*
323 * Re-enable all counters if disable == 0.
324 * If we were previously disabled and counters were added, then
325 * put the new config on the PMU.
326 */
327void hw_perf_restore(u64 disable)
328{
329 struct perf_counter *counter;
330 struct cpu_hw_counters *cpuhw;
331 unsigned long flags;
332 long i;
333 unsigned long val;
334 s64 left;
335 unsigned int hwc_index[MAX_HWCOUNTERS];
336
337 if (disable)
338 return;
339 local_irq_save(flags);
340 cpuhw = &__get_cpu_var(cpu_hw_counters);
341 cpuhw->disabled = 0;
342
343 /*
344 * If we didn't change anything, or only removed counters,
345 * no need to recalculate MMCR* settings and reset the PMCs.
346 * Just reenable the PMU with the current MMCR* settings
347 * (possibly updated for removal of counters).
348 */
349 if (!cpuhw->n_added) {
350 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
351 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
352 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
353 if (cpuhw->n_counters == 0)
354 get_lppaca()->pmcregs_in_use = 0;
355 goto out;
356 }
357
358 /*
359 * Compute MMCR* values for the new set of counters
360 */
361 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
362 cpuhw->mmcr)) {
363 /* shouldn't ever get here */
364 printk(KERN_ERR "oops compute_mmcr failed\n");
365 goto out;
366 }
367
368 /*
369 * Add in MMCR0 freeze bits corresponding to the
370 * hw_event.exclude_* bits for the first counter.
371 * We have already checked that all counters have the
372 * same values for these bits as the first counter.
373 */
374 counter = cpuhw->counter[0];
375 if (counter->hw_event.exclude_user)
376 cpuhw->mmcr[0] |= MMCR0_FCP;
377 if (counter->hw_event.exclude_kernel)
378 cpuhw->mmcr[0] |= freeze_counters_kernel;
379 if (counter->hw_event.exclude_hv)
380 cpuhw->mmcr[0] |= MMCR0_FCHV;
381
382 /*
383 * Write the new configuration to MMCR* with the freeze
384 * bit set and set the hardware counters to their initial values.
385 * Then unfreeze the counters.
386 */
387 get_lppaca()->pmcregs_in_use = 1;
388 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
389 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
390 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
391 | MMCR0_FC);
392
393 /*
394 * Read off any pre-existing counters that need to move
395 * to another PMC.
396 */
397 for (i = 0; i < cpuhw->n_counters; ++i) {
398 counter = cpuhw->counter[i];
399 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
400 power_perf_read(counter);
401 write_pmc(counter->hw.idx, 0);
402 counter->hw.idx = 0;
403 }
404 }
405
406 /*
407 * Initialize the PMCs for all the new and moved counters.
408 */
409 for (i = 0; i < cpuhw->n_counters; ++i) {
410 counter = cpuhw->counter[i];
411 if (counter->hw.idx)
412 continue;
413 val = 0;
414 if (counter->hw_event.irq_period) {
415 left = atomic64_read(&counter->hw.period_left);
416 if (left < 0x80000000L)
417 val = 0x80000000L - left;
418 }
419 atomic64_set(&counter->hw.prev_count, val);
420 counter->hw.idx = hwc_index[i] + 1;
421 write_pmc(counter->hw.idx, val);
422 perf_counter_update_userpage(counter);
423 }
424 mb();
425 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
426 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
427
428 out:
429 local_irq_restore(flags);
430}
431
432static int collect_events(struct perf_counter *group, int max_count,
433 struct perf_counter *ctrs[], unsigned int *events)
434{
435 int n = 0;
436 struct perf_counter *counter;
437
438 if (!is_software_counter(group)) {
439 if (n >= max_count)
440 return -1;
441 ctrs[n] = group;
442 events[n++] = group->hw.config;
443 }
444 list_for_each_entry(counter, &group->sibling_list, list_entry) {
445 if (!is_software_counter(counter) &&
446 counter->state != PERF_COUNTER_STATE_OFF) {
447 if (n >= max_count)
448 return -1;
449 ctrs[n] = counter;
450 events[n++] = counter->hw.config;
451 }
452 }
453 return n;
454}
455
456static void counter_sched_in(struct perf_counter *counter, int cpu)
457{
458 counter->state = PERF_COUNTER_STATE_ACTIVE;
459 counter->oncpu = cpu;
460 counter->tstamp_running += counter->ctx->time_now -
461 counter->tstamp_stopped;
462 if (is_software_counter(counter))
463 counter->hw_ops->enable(counter);
464}
465
466/*
467 * Called to enable a whole group of counters.
468 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
469 * Assumes the caller has disabled interrupts and has
470 * frozen the PMU with hw_perf_save_disable.
471 */
472int hw_perf_group_sched_in(struct perf_counter *group_leader,
473 struct perf_cpu_context *cpuctx,
474 struct perf_counter_context *ctx, int cpu)
475{
476 struct cpu_hw_counters *cpuhw;
477 long i, n, n0;
478 struct perf_counter *sub;
479
480 cpuhw = &__get_cpu_var(cpu_hw_counters);
481 n0 = cpuhw->n_counters;
482 n = collect_events(group_leader, ppmu->n_counter - n0,
483 &cpuhw->counter[n0], &cpuhw->events[n0]);
484 if (n < 0)
485 return -EAGAIN;
486 if (check_excludes(cpuhw->counter, n0, n))
487 return -EAGAIN;
488 if (power_check_constraints(cpuhw->events, n + n0))
489 return -EAGAIN;
490 cpuhw->n_counters = n0 + n;
491 cpuhw->n_added += n;
492
493 /*
494 * OK, this group can go on; update counter states etc.,
495 * and enable any software counters
496 */
497 for (i = n0; i < n0 + n; ++i)
498 cpuhw->counter[i]->hw.config = cpuhw->events[i];
499 cpuctx->active_oncpu += n;
500 n = 1;
501 counter_sched_in(group_leader, cpu);
502 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
503 if (sub->state != PERF_COUNTER_STATE_OFF) {
504 counter_sched_in(sub, cpu);
505 ++n;
506 }
507 }
508 ctx->nr_active += n;
509
510 return 1;
511}
512
513/*
514 * Add a counter to the PMU.
515 * If all counters are not already frozen, then we disable and
516 * re-enable the PMU in order to get hw_perf_restore to do the
517 * actual work of reconfiguring the PMU.
518 */
519static int power_perf_enable(struct perf_counter *counter)
520{
521 struct cpu_hw_counters *cpuhw;
522 unsigned long flags;
523 u64 pmudis;
524 int n0;
525 int ret = -EAGAIN;
526
527 local_irq_save(flags);
528 pmudis = hw_perf_save_disable();
529
530 /*
531 * Add the counter to the list (if there is room)
532 * and check whether the total set is still feasible.
533 */
534 cpuhw = &__get_cpu_var(cpu_hw_counters);
535 n0 = cpuhw->n_counters;
536 if (n0 >= ppmu->n_counter)
537 goto out;
538 cpuhw->counter[n0] = counter;
539 cpuhw->events[n0] = counter->hw.config;
540 if (check_excludes(cpuhw->counter, n0, 1))
541 goto out;
542 if (power_check_constraints(cpuhw->events, n0 + 1))
543 goto out;
544
545 counter->hw.config = cpuhw->events[n0];
546 ++cpuhw->n_counters;
547 ++cpuhw->n_added;
548
549 ret = 0;
550 out:
551 hw_perf_restore(pmudis);
552 local_irq_restore(flags);
553 return ret;
554}
555
556/*
557 * Remove a counter from the PMU.
558 */
559static void power_perf_disable(struct perf_counter *counter)
560{
561 struct cpu_hw_counters *cpuhw;
562 long i;
563 u64 pmudis;
564 unsigned long flags;
565
566 local_irq_save(flags);
567 pmudis = hw_perf_save_disable();
568
569 power_perf_read(counter);
570
571 cpuhw = &__get_cpu_var(cpu_hw_counters);
572 for (i = 0; i < cpuhw->n_counters; ++i) {
573 if (counter == cpuhw->counter[i]) {
574 while (++i < cpuhw->n_counters)
575 cpuhw->counter[i-1] = cpuhw->counter[i];
576 --cpuhw->n_counters;
577 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
578 write_pmc(counter->hw.idx, 0);
579 counter->hw.idx = 0;
580 perf_counter_update_userpage(counter);
581 break;
582 }
583 }
584 if (cpuhw->n_counters == 0) {
585 /* disable exceptions if no counters are running */
586 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
587 }
588
589 hw_perf_restore(pmudis);
590 local_irq_restore(flags);
591}
592
593struct hw_perf_counter_ops power_perf_ops = {
594 .enable = power_perf_enable,
595 .disable = power_perf_disable,
596 .read = power_perf_read
597};
598
599/* Number of perf_counters counting hardware events */
600static atomic_t num_counters;
601/* Used to avoid races in calling reserve/release_pmc_hardware */
602static DEFINE_MUTEX(pmc_reserve_mutex);
603
604/*
605 * Release the PMU if this is the last perf_counter.
606 */
607static void hw_perf_counter_destroy(struct perf_counter *counter)
608{
609 if (!atomic_add_unless(&num_counters, -1, 1)) {
610 mutex_lock(&pmc_reserve_mutex);
611 if (atomic_dec_return(&num_counters) == 0)
612 release_pmc_hardware();
613 mutex_unlock(&pmc_reserve_mutex);
614 }
615}
616
617const struct hw_perf_counter_ops *
618hw_perf_counter_init(struct perf_counter *counter)
619{
620 unsigned long ev;
621 struct perf_counter *ctrs[MAX_HWCOUNTERS];
622 unsigned int events[MAX_HWCOUNTERS];
623 int n;
624 int err;
625
626 if (!ppmu)
627 return ERR_PTR(-ENXIO);
628 if ((s64)counter->hw_event.irq_period < 0)
629 return ERR_PTR(-EINVAL);
630 if (!perf_event_raw(&counter->hw_event)) {
631 ev = perf_event_id(&counter->hw_event);
632 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
633 return ERR_PTR(-EOPNOTSUPP);
634 ev = ppmu->generic_events[ev];
635 } else {
636 ev = perf_event_config(&counter->hw_event);
637 }
638 counter->hw.config_base = ev;
639 counter->hw.idx = 0;
640
641 /*
642 * If we are not running on a hypervisor, force the
643 * exclude_hv bit to 0 so that we don't care what
644 * the user set it to.
645 */
646 if (!firmware_has_feature(FW_FEATURE_LPAR))
647 counter->hw_event.exclude_hv = 0;
648
649 /*
650 * If this is in a group, check if it can go on with all the
651 * other hardware counters in the group. We assume the counter
652 * hasn't been linked into its leader's sibling list at this point.
653 */
654 n = 0;
655 if (counter->group_leader != counter) {
656 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
657 ctrs, events);
658 if (n < 0)
659 return ERR_PTR(-EINVAL);
660 }
661 events[n] = ev;
662 ctrs[n] = counter;
663 if (check_excludes(ctrs, n, 1))
664 return ERR_PTR(-EINVAL);
665 if (power_check_constraints(events, n + 1))
666 return ERR_PTR(-EINVAL);
667
668 counter->hw.config = events[n];
669 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
670
671 /*
672 * See if we need to reserve the PMU.
673 * If no counters are currently in use, then we have to take a
674 * mutex to ensure that we don't race with another task doing
675 * reserve_pmc_hardware or release_pmc_hardware.
676 */
677 err = 0;
678 if (!atomic_inc_not_zero(&num_counters)) {
679 mutex_lock(&pmc_reserve_mutex);
680 if (atomic_read(&num_counters) == 0 &&
681 reserve_pmc_hardware(perf_counter_interrupt))
682 err = -EBUSY;
683 else
684 atomic_inc(&num_counters);
685 mutex_unlock(&pmc_reserve_mutex);
686 }
687 counter->destroy = hw_perf_counter_destroy;
688
689 if (err)
690 return ERR_PTR(err);
691 return &power_perf_ops;
692}
693
694/*
695 * A counter has overflowed; update its count and record
696 * things if requested. Note that interrupts are hard-disabled
697 * here so there is no possibility of being interrupted.
698 */
699static void record_and_restart(struct perf_counter *counter, long val,
700 struct pt_regs *regs)
701{
702 s64 prev, delta, left;
703 int record = 0;
704
705 /* we don't have to worry about interrupts here */
706 prev = atomic64_read(&counter->hw.prev_count);
707 delta = (val - prev) & 0xfffffffful;
708 atomic64_add(delta, &counter->count);
709
710 /*
711 * See if the total period for this counter has expired,
712 * and update for the next period.
713 */
714 val = 0;
715 left = atomic64_read(&counter->hw.period_left) - delta;
716 if (counter->hw_event.irq_period) {
717 if (left <= 0) {
718 left += counter->hw_event.irq_period;
719 if (left <= 0)
720 left = counter->hw_event.irq_period;
721 record = 1;
722 }
723 if (left < 0x80000000L)
724 val = 0x80000000L - left;
725 }
726 write_pmc(counter->hw.idx, val);
727 atomic64_set(&counter->hw.prev_count, val);
728 atomic64_set(&counter->hw.period_left, left);
729 perf_counter_update_userpage(counter);
730
731 /*
732 * Finally record data if requested.
733 */
734 if (record)
735 perf_counter_overflow(counter, 1, regs);
736}
737
738/*
739 * Performance monitor interrupt stuff
740 */
741static void perf_counter_interrupt(struct pt_regs *regs)
742{
743 int i;
744 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
745 struct perf_counter *counter;
746 long val;
747 int found = 0;
748
749 for (i = 0; i < cpuhw->n_counters; ++i) {
750 counter = cpuhw->counter[i];
751 val = read_pmc(counter->hw.idx);
752 if ((int)val < 0) {
753 /* counter has overflowed */
754 found = 1;
755 record_and_restart(counter, val, regs);
756 }
757 }
758
759 /*
760 * In case we didn't find and reset the counter that caused
761 * the interrupt, scan all counters and reset any that are
762 * negative, to avoid getting continual interrupts.
763 * Any that we processed in the previous loop will not be negative.
764 */
765 if (!found) {
766 for (i = 0; i < ppmu->n_counter; ++i) {
767 val = read_pmc(i + 1);
768 if ((int)val < 0)
769 write_pmc(i + 1, 0);
770 }
771 }
772
773 /*
774 * Reset MMCR0 to its normal value. This will set PMXE and
775 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
776 * and thus allow interrupts to occur again.
777 * XXX might want to use MSR.PM to keep the counters frozen until
778 * we get back out of this interrupt.
779 */
780 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
781
782 /*
783 * If we need a wakeup, check whether interrupts were soft-enabled
784 * when we took the interrupt. If they were, we can wake stuff up
785 * immediately; otherwise we'll have do the wakeup when interrupts
786 * get soft-enabled.
787 */
788 if (test_perf_counter_pending() && regs->softe) {
789 irq_enter();
790 clear_perf_counter_pending();
791 perf_counter_do_pending();
792 irq_exit();
793 }
794}
795
796void hw_perf_counter_setup(int cpu)
797{
798 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
799
800 memset(cpuhw, 0, sizeof(*cpuhw));
801 cpuhw->mmcr[0] = MMCR0_FC;
802}
803
804extern struct power_pmu power4_pmu;
805extern struct power_pmu ppc970_pmu;
806extern struct power_pmu power5_pmu;
807extern struct power_pmu power5p_pmu;
808extern struct power_pmu power6_pmu;
809
810static int init_perf_counters(void)
811{
812 unsigned long pvr;
813
814 /* XXX should get this from cputable */
815 pvr = mfspr(SPRN_PVR);
816 switch (PVR_VER(pvr)) {
817 case PV_POWER4:
818 case PV_POWER4p:
819 ppmu = &power4_pmu;
820 break;
821 case PV_970:
822 case PV_970FX:
823 case PV_970MP:
824 ppmu = &ppc970_pmu;
825 break;
826 case PV_POWER5:
827 ppmu = &power5_pmu;
828 break;
829 case PV_POWER5p:
830 ppmu = &power5p_pmu;
831 break;
832 case 0x3e:
833 ppmu = &power6_pmu;
834 break;
835 }
836
837 /*
838 * Use FCHV to ignore kernel events if MSR.HV is set.
839 */
840 if (mfmsr() & MSR_HV)
841 freeze_counters_kernel = MMCR0_FCHV;
842
843 return 0;
844}
845
846arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 000000000000..1407b19ab619
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,557 @@
1/*
2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER4
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_LOWER_SH 6
23#define PM_LOWER_MSK 1
24#define PM_LOWER_MSKS 0x40
25#define PM_BYTE_SH 4 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 3
27#define PM_PMCSEL_MSK 7
28
29/*
30 * Unit code values
31 */
32#define PM_FPU 1
33#define PM_ISU1 2
34#define PM_IFU 3
35#define PM_IDU0 4
36#define PM_ISU1_ALT 6
37#define PM_ISU2 7
38#define PM_IFU_ALT 8
39#define PM_LSU0 9
40#define PM_LSU1 0xc
41#define PM_GPS 0xf
42
43/*
44 * Bits in MMCR0 for POWER4
45 */
46#define MMCR0_PMC1SEL_SH 8
47#define MMCR0_PMC2SEL_SH 1
48#define MMCR_PMCSEL_MSK 0x1f
49
50/*
51 * Bits in MMCR1 for POWER4
52 */
53#define MMCR1_TTM0SEL_SH 62
54#define MMCR1_TTC0SEL_SH 61
55#define MMCR1_TTM1SEL_SH 59
56#define MMCR1_TTC1SEL_SH 58
57#define MMCR1_TTM2SEL_SH 56
58#define MMCR1_TTC2SEL_SH 55
59#define MMCR1_TTM3SEL_SH 53
60#define MMCR1_TTC3SEL_SH 52
61#define MMCR1_TTMSEL_MSK 3
62#define MMCR1_TD_CP_DBG0SEL_SH 50
63#define MMCR1_TD_CP_DBG1SEL_SH 48
64#define MMCR1_TD_CP_DBG2SEL_SH 46
65#define MMCR1_TD_CP_DBG3SEL_SH 44
66#define MMCR1_DEBUG0SEL_SH 43
67#define MMCR1_DEBUG1SEL_SH 42
68#define MMCR1_DEBUG2SEL_SH 41
69#define MMCR1_DEBUG3SEL_SH 40
70#define MMCR1_PMC1_ADDER_SEL_SH 39
71#define MMCR1_PMC2_ADDER_SEL_SH 38
72#define MMCR1_PMC6_ADDER_SEL_SH 37
73#define MMCR1_PMC5_ADDER_SEL_SH 36
74#define MMCR1_PMC8_ADDER_SEL_SH 35
75#define MMCR1_PMC7_ADDER_SEL_SH 34
76#define MMCR1_PMC3_ADDER_SEL_SH 33
77#define MMCR1_PMC4_ADDER_SEL_SH 32
78#define MMCR1_PMC3SEL_SH 27
79#define MMCR1_PMC4SEL_SH 22
80#define MMCR1_PMC5SEL_SH 17
81#define MMCR1_PMC6SEL_SH 12
82#define MMCR1_PMC7SEL_SH 7
83#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
84
85static short mmcr1_adder_bits[8] = {
86 MMCR1_PMC1_ADDER_SEL_SH,
87 MMCR1_PMC2_ADDER_SEL_SH,
88 MMCR1_PMC3_ADDER_SEL_SH,
89 MMCR1_PMC4_ADDER_SEL_SH,
90 MMCR1_PMC5_ADDER_SEL_SH,
91 MMCR1_PMC6_ADDER_SEL_SH,
92 MMCR1_PMC7_ADDER_SEL_SH,
93 MMCR1_PMC8_ADDER_SEL_SH
94};
95
96/*
97 * Bits in MMCRA
98 */
99#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
100
101/*
102 * Layout of constraint bits:
103 * 6666555555555544444444443333333333222222222211111111110000000000
104 * 3210987654321098765432109876543210987654321098765432109876543210
105 * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
106 * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
107 * \SMPL ||\TTC3SEL
108 * |\TTC_IFU_SEL
109 * \TTM2SEL0
110 *
111 * SMPL - SAMPLE_ENABLE constraint
112 * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
113 *
114 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
115 * 55: UC1 error 0x0080_0000_0000_0000
116 * 54: FPU events needed 0x0040_0000_0000_0000
117 * 53: ISU1 events needed 0x0020_0000_0000_0000
118 * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
119 *
120 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
121 * 51: UC2 error 0x0008_0000_0000_0000
122 * 50: FPU events needed 0x0004_0000_0000_0000
123 * 49: IFU events needed 0x0002_0000_0000_0000
124 * 48: LSU0 events needed 0x0001_0000_0000_0000
125 *
126 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
127 * 47: UC3 error 0x8000_0000_0000
128 * 46: LSU0 events needed 0x4000_0000_0000
129 * 45: IFU events needed 0x2000_0000_0000
130 * 44: IDU0|ISU2 events needed 0x1000_0000_0000
131 * 43: ISU1 events needed 0x0800_0000_0000
132 *
133 * TTM2SEL0
134 * 42: 0 = IDU0 events needed
135 * 1 = ISU2 events needed 0x0400_0000_0000
136 *
137 * TTC_IFU_SEL
138 * 41: 0 = IFU.U events needed
139 * 1 = IFU.L events needed 0x0200_0000_0000
140 *
141 * TTC3SEL
142 * 40: 0 = LSU1.U events needed
143 * 1 = LSU1.L events needed 0x0100_0000_0000
144 *
145 * PS1
146 * 39: PS1 error 0x0080_0000_0000
147 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
148 *
149 * PS2
150 * 35: PS2 error 0x0008_0000_0000
151 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
152 *
153 * B0
154 * 28-31: Byte 0 event source 0xf000_0000
155 * 1 = FPU
156 * 2 = ISU1
157 * 3 = IFU
158 * 4 = IDU0
159 * 7 = ISU2
160 * 9 = LSU0
161 * c = LSU1
162 * f = GPS
163 *
164 * B1, B2, B3
165 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
166 *
167 * P8
168 * 15: P8 error 0x8000
169 * 14-15: Count of events needing PMC8
170 *
171 * P1..P7
172 * 0-13: Count of events needing PMC1..PMC7
173 *
174 * Note: this doesn't allow events using IFU.U to be combined with events
175 * using IFU.L, though that is feasible (using TTM0 and TTM2). However
176 * there are no listed events for IFU.L (they are debug events not
177 * verified for performance monitoring) so this shouldn't cause a
178 * problem.
179 */
180
181static struct unitinfo {
182 u64 value, mask;
183 int unit;
184 int lowerbit;
185} p4_unitinfo[16] = {
186 [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
187 [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
188 [PM_ISU1_ALT] =
189 { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
190 [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
191 [PM_IFU_ALT] =
192 { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
193 [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
194 [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
195 [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
196 [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
197 [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
198};
199
200static unsigned char direct_marked_event[8] = {
201 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
202 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
203 (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
204 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
205 (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
206 (1<<3) | (1<<4) | (1<<5),
207 /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
208 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
209 (1<<4), /* PMC8: PM_MRK_LSU_FIN */
210};
211
212/*
213 * Returns 1 if event counts things relating to marked instructions
214 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
215 */
216static int p4_marked_instr_event(unsigned int event)
217{
218 int pmc, psel, unit, byte, bit;
219 unsigned int mask;
220
221 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
222 psel = event & PM_PMCSEL_MSK;
223 if (pmc) {
224 if (direct_marked_event[pmc - 1] & (1 << psel))
225 return 1;
226 if (psel == 0) /* add events */
227 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
228 else if (psel == 6) /* decode events */
229 bit = 4;
230 else
231 return 0;
232 } else
233 bit = psel;
234
235 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
236 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
237 mask = 0;
238 switch (unit) {
239 case PM_LSU1:
240 if (event & PM_LOWER_MSKS)
241 mask = 1 << 28; /* byte 7 bit 4 */
242 else
243 mask = 6 << 24; /* byte 3 bits 1 and 2 */
244 break;
245 case PM_LSU0:
246 /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
247 mask = 0x083dff00;
248 }
249 return (mask >> (byte * 8 + bit)) & 1;
250}
251
252static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
253{
254 int pmc, byte, unit, lower, sh;
255 u64 mask = 0, value = 0;
256 int grp = -1;
257
258 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
259 if (pmc) {
260 if (pmc > 8)
261 return -1;
262 sh = (pmc - 1) * 2;
263 mask |= 2 << sh;
264 value |= 1 << sh;
265 grp = ((pmc - 1) >> 1) & 1;
266 }
267 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
268 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
269 if (unit) {
270 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
271
272 /*
273 * Bus events on bytes 0 and 2 can be counted
274 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
275 */
276 if (!pmc)
277 grp = byte & 1;
278
279 if (!p4_unitinfo[unit].unit)
280 return -1;
281 mask |= p4_unitinfo[unit].mask;
282 value |= p4_unitinfo[unit].value;
283 sh = p4_unitinfo[unit].lowerbit;
284 if (sh > 1)
285 value |= (u64)lower << sh;
286 else if (lower != sh)
287 return -1;
288 unit = p4_unitinfo[unit].unit;
289
290 /* Set byte lane select field */
291 mask |= 0xfULL << (28 - 4 * byte);
292 value |= (u64)unit << (28 - 4 * byte);
293 }
294 if (grp == 0) {
295 /* increment PMC1/2/5/6 field */
296 mask |= 0x8000000000ull;
297 value |= 0x1000000000ull;
298 } else {
299 /* increment PMC3/4/7/8 field */
300 mask |= 0x800000000ull;
301 value |= 0x100000000ull;
302 }
303
304 /* Marked instruction events need sample_enable set */
305 if (p4_marked_instr_event(event)) {
306 mask |= 1ull << 56;
307 value |= 1ull << 56;
308 }
309
310 /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
311 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
312 mask |= 1ull << 56;
313
314 *maskp = mask;
315 *valp = value;
316 return 0;
317}
318
319static unsigned int ppc_inst_cmpl[] = {
320 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
321};
322
323static int p4_get_alternatives(unsigned int event, unsigned int alt[])
324{
325 int i, j, na;
326
327 alt[0] = event;
328 na = 1;
329
330 /* 2 possibilities for PM_GRP_DISP_REJECT */
331 if (event == 0x8003 || event == 0x0224) {
332 alt[1] = event ^ (0x8003 ^ 0x0224);
333 return 2;
334 }
335
336 /* 2 possibilities for PM_ST_MISS_L1 */
337 if (event == 0x0c13 || event == 0x0c23) {
338 alt[1] = event ^ (0x0c13 ^ 0x0c23);
339 return 2;
340 }
341
342 /* several possibilities for PM_INST_CMPL */
343 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
344 if (event == ppc_inst_cmpl[i]) {
345 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
346 if (j != i)
347 alt[na++] = ppc_inst_cmpl[j];
348 break;
349 }
350 }
351
352 return na;
353}
354
355static int p4_compute_mmcr(unsigned int event[], int n_ev,
356 unsigned int hwc[], u64 mmcr[])
357{
358 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
359 unsigned int pmc, unit, byte, psel, lower;
360 unsigned int ttm, grp;
361 unsigned int pmc_inuse = 0;
362 unsigned int pmc_grp_use[2];
363 unsigned char busbyte[4];
364 unsigned char unituse[16];
365 unsigned int unitlower = 0;
366 int i;
367
368 if (n_ev > 8)
369 return -1;
370
371 /* First pass to count resource use */
372 pmc_grp_use[0] = pmc_grp_use[1] = 0;
373 memset(busbyte, 0, sizeof(busbyte));
374 memset(unituse, 0, sizeof(unituse));
375 for (i = 0; i < n_ev; ++i) {
376 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
377 if (pmc) {
378 if (pmc_inuse & (1 << (pmc - 1)))
379 return -1;
380 pmc_inuse |= 1 << (pmc - 1);
381 /* count 1/2/5/6 vs 3/4/7/8 use */
382 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
383 }
384 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
385 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
386 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
387 if (unit) {
388 if (!pmc)
389 ++pmc_grp_use[byte & 1];
390 if (unit == 6 || unit == 8)
391 /* map alt ISU1/IFU codes: 6->2, 8->3 */
392 unit = (unit >> 1) - 1;
393 if (busbyte[byte] && busbyte[byte] != unit)
394 return -1;
395 busbyte[byte] = unit;
396 lower <<= unit;
397 if (unituse[unit] && lower != (unitlower & lower))
398 return -1;
399 unituse[unit] = 1;
400 unitlower |= lower;
401 }
402 }
403 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
404 return -1;
405
406 /*
407 * Assign resources and set multiplexer selects.
408 *
409 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
410 * Each TTMx can only select one unit, but since
411 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
412 * we have some choices.
413 */
414 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
415 unituse[6] = 1; /* Move 2 to 6 */
416 unituse[2] = 0;
417 }
418 if (unituse[3] & (unituse[1] | unituse[2])) {
419 unituse[8] = 1; /* Move 3 to 8 */
420 unituse[3] = 0;
421 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
422 }
423 /* Check only one unit per TTMx */
424 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
425 unituse[4] + unituse[6] + unituse[7] > 1 ||
426 unituse[8] + unituse[9] > 1 ||
427 (unituse[5] | unituse[10] | unituse[11] |
428 unituse[13] | unituse[14]))
429 return -1;
430
431 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
432 mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
433 mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
434 mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
435
436 /* Set TTCxSEL fields. */
437 if (unitlower & 0xe)
438 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
439 if (unitlower & 0xf0)
440 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
441 if (unitlower & 0xf00)
442 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
443 if (unitlower & 0x7000)
444 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
445
446 /* Set byte lane select fields. */
447 for (byte = 0; byte < 4; ++byte) {
448 unit = busbyte[byte];
449 if (!unit)
450 continue;
451 if (unit == 0xf) {
452 /* special case for GPS */
453 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
454 } else {
455 if (!unituse[unit])
456 ttm = unit - 1; /* 2->1, 3->2 */
457 else
458 ttm = unit >> 2;
459 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
460 }
461 }
462
463 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
464 for (i = 0; i < n_ev; ++i) {
465 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
466 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
467 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
468 psel = event[i] & PM_PMCSEL_MSK;
469 if (!pmc) {
470 /* Bus event or 00xxx direct event (off or cycles) */
471 if (unit)
472 psel |= 0x10 | ((byte & 2) << 2);
473 for (pmc = 0; pmc < 8; ++pmc) {
474 if (pmc_inuse & (1 << pmc))
475 continue;
476 grp = (pmc >> 1) & 1;
477 if (unit) {
478 if (grp == (byte & 1))
479 break;
480 } else if (pmc_grp_use[grp] < 4) {
481 ++pmc_grp_use[grp];
482 break;
483 }
484 }
485 pmc_inuse |= 1 << pmc;
486 } else {
487 /* Direct event */
488 --pmc;
489 if (psel == 0 && (byte & 2))
490 /* add events on higher-numbered bus */
491 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
492 else if (psel == 6 && byte == 3)
493 /* seem to need to set sample_enable here */
494 mmcra |= MMCRA_SAMPLE_ENABLE;
495 psel |= 8;
496 }
497 if (pmc <= 1)
498 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
499 else
500 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
501 if (pmc == 7) /* PMC8 */
502 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
503 hwc[i] = pmc;
504 if (p4_marked_instr_event(event[i]))
505 mmcra |= MMCRA_SAMPLE_ENABLE;
506 }
507
508 if (pmc_inuse & 1)
509 mmcr0 |= MMCR0_PMC1CE;
510 if (pmc_inuse & 0xfe)
511 mmcr0 |= MMCR0_PMCjCE;
512
513 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
514
515 /* Return MMCRx values */
516 mmcr[0] = mmcr0;
517 mmcr[1] = mmcr1;
518 mmcr[2] = mmcra;
519 return 0;
520}
521
522static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
523{
524 /*
525 * Setting the PMCxSEL field to 0 disables PMC x.
526 * (Note that pmc is 0-based here, not 1-based.)
527 */
528 if (pmc <= 1) {
529 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
530 } else {
531 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
532 if (pmc == 7)
533 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
534 }
535}
536
537static int p4_generic_events[] = {
538 [PERF_COUNT_CPU_CYCLES] = 7,
539 [PERF_COUNT_INSTRUCTIONS] = 0x1001,
540 [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
541 [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
542 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
543 [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
544};
545
546struct power_pmu power4_pmu = {
547 .n_counter = 8,
548 .max_alternatives = 5,
549 .add_fields = 0x0000001100005555ull,
550 .test_adder = 0x0011083300000000ull,
551 .compute_mmcr = p4_compute_mmcr,
552 .get_constraint = p4_get_constraint,
553 .get_alternatives = p4_get_alternatives,
554 .disable_pmc = p4_disable_pmc,
555 .n_generic = ARRAY_SIZE(p4_generic_events),
556 .generic_events = p4_generic_events,
557};
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 000000000000..cec21ea65b0e
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,452 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5+
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><>
82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1
83 *
84 * NC - number of counters
85 * 51: NC error 0x0008_0000_0000_0000
86 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
87 *
88 * G0..G3 - GRS mux constraints
89 * 46-47: GRS_L2SEL value
90 * 44-45: GRS_L3SEL value
91 * 41-44: GRS_MCSEL value
92 * 39-40: GRS_FABSEL value
93 * Note that these match up with their bit positions in MMCR1
94 *
95 * T0 - TTM0 constraint
96 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
97 *
98 * T1 - TTM1 constraint
99 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 33: UC3 error 0x02_0000_0000
103 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
104 * 31: ISU0 events needed 0x01_8000_0000
105 * 30: IDU|GRS events needed 0x00_4000_0000
106 *
107 * B0
108 * 20-23: Byte 0 event source 0x00f0_0000
109 * Encoding as for the event code
110 *
111 * B1, B2, B3
112 * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources
113 *
114 * P4
115 * 7: P1 error 0x80
116 * 6-7: Count of events needing PMC4
117 *
118 * P1..P3
119 * 0-6: Count of events needing PMC1..PMC3
120 */
121
122static const int grsel_shift[8] = {
123 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
124 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
125 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
126};
127
128/* Masks and values for using events from the various units */
129static u64 unit_cons[PM_LASTUNIT+1][2] = {
130 [PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
131 [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
132 [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
133 [PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
134 [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
135 [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
136};
137
138static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
139{
140 int pmc, byte, unit, sh;
141 int bit, fmask;
142 u64 mask = 0, value = 0;
143
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) {
146 if (pmc > 4)
147 return -1;
148 sh = (pmc - 1) * 2;
149 mask |= 2 << sh;
150 value |= 1 << sh;
151 }
152 if (event & PM_BUSEVENT_MSK) {
153 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
154 if (unit > PM_LASTUNIT)
155 return -1;
156 if (unit == PM_ISU0_ALT)
157 unit = PM_ISU0;
158 mask |= unit_cons[unit][0];
159 value |= unit_cons[unit][1];
160 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
161 if (byte >= 4) {
162 if (unit != PM_LSU1)
163 return -1;
164 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
165 ++unit;
166 byte &= 3;
167 }
168 if (unit == PM_GRS) {
169 bit = event & 7;
170 fmask = (bit == 6)? 7: 3;
171 sh = grsel_shift[bit];
172 mask |= (u64)fmask << sh;
173 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
174 }
175 /* Set byte lane select field */
176 mask |= 0xfULL << (20 - 4 * byte);
177 value |= (u64)unit << (20 - 4 * byte);
178 }
179 mask |= 0x8000000000000ull;
180 value |= 0x1000000000000ull;
181 *maskp = mask;
182 *valp = value;
183 return 0;
184}
185
186#define MAX_ALT 3 /* at most 3 alternatives for any event */
187
188static const unsigned int event_alternatives[][MAX_ALT] = {
189 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
190 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
191 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
192 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
193 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
194 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
195 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
196 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
197 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
198 { 0x300009, 0x400009 }, /* PM_INST_DISP */
199};
200
201/*
202 * Scan the alternatives table for a match and return the
203 * index into the alternatives table if found, else -1.
204 */
205static int find_alternative(unsigned int event)
206{
207 int i, j;
208
209 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
210 if (event < event_alternatives[i][0])
211 break;
212 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
213 if (event == event_alternatives[i][j])
214 return i;
215 }
216 return -1;
217}
218
219static const unsigned char bytedecode_alternatives[4][4] = {
220 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
221 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
222 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
223 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
224};
225
226/*
227 * Some direct events for decodes of event bus byte 3 have alternative
228 * PMCSEL values on other counters. This returns the alternative
229 * event code for those that do, or -1 otherwise. This also handles
230 * alternative PCMSEL values for add events.
231 */
232static int find_alternative_bdecode(unsigned int event)
233{
234 int pmc, altpmc, pp, j;
235
236 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
237 if (pmc == 0 || pmc > 4)
238 return -1;
239 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
240 pp = event & PM_PMCSEL_MSK;
241 for (j = 0; j < 4; ++j) {
242 if (bytedecode_alternatives[pmc - 1][j] == pp) {
243 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
244 (altpmc << PM_PMC_SH) |
245 bytedecode_alternatives[altpmc - 1][j];
246 }
247 }
248
249 /* new decode alternatives for power5+ */
250 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
251 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
252 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
253 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
254
255 /* alternative add event encodings */
256 if (pp == 0x10 || pp == 0x28)
257 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
258 (altpmc << PM_PMC_SH);
259
260 return -1;
261}
262
263static int power5p_get_alternatives(unsigned int event, unsigned int alt[])
264{
265 int i, j, ae, nalt = 1;
266
267 alt[0] = event;
268 nalt = 1;
269 i = find_alternative(event);
270 if (i >= 0) {
271 for (j = 0; j < MAX_ALT; ++j) {
272 ae = event_alternatives[i][j];
273 if (ae && ae != event)
274 alt[nalt++] = ae;
275 }
276 } else {
277 ae = find_alternative_bdecode(event);
278 if (ae > 0)
279 alt[nalt++] = ae;
280 }
281 return nalt;
282}
283
284static int power5p_compute_mmcr(unsigned int event[], int n_ev,
285 unsigned int hwc[], u64 mmcr[])
286{
287 u64 mmcr1 = 0;
288 unsigned int pmc, unit, byte, psel;
289 unsigned int ttm;
290 int i, isbus, bit, grsel;
291 unsigned int pmc_inuse = 0;
292 unsigned char busbyte[4];
293 unsigned char unituse[16];
294 int ttmuse;
295
296 if (n_ev > 4)
297 return -1;
298
299 /* First pass to count resource use */
300 memset(busbyte, 0, sizeof(busbyte));
301 memset(unituse, 0, sizeof(unituse));
302 for (i = 0; i < n_ev; ++i) {
303 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
304 if (pmc) {
305 if (pmc > 4)
306 return -1;
307 if (pmc_inuse & (1 << (pmc - 1)))
308 return -1;
309 pmc_inuse |= 1 << (pmc - 1);
310 }
311 if (event[i] & PM_BUSEVENT_MSK) {
312 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
313 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
314 if (unit > PM_LASTUNIT)
315 return -1;
316 if (unit == PM_ISU0_ALT)
317 unit = PM_ISU0;
318 if (byte >= 4) {
319 if (unit != PM_LSU1)
320 return -1;
321 ++unit;
322 byte &= 3;
323 }
324 if (busbyte[byte] && busbyte[byte] != unit)
325 return -1;
326 busbyte[byte] = unit;
327 unituse[unit] = 1;
328 }
329 }
330
331 /*
332 * Assign resources and set multiplexer selects.
333 *
334 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
335 * choice we have to deal with.
336 */
337 if (unituse[PM_ISU0] &
338 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
339 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
340 unituse[PM_ISU0] = 0;
341 }
342 /* Set TTM[01]SEL fields. */
343 ttmuse = 0;
344 for (i = PM_FPU; i <= PM_ISU1; ++i) {
345 if (!unituse[i])
346 continue;
347 if (ttmuse++)
348 return -1;
349 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
350 }
351 ttmuse = 0;
352 for (; i <= PM_GRS; ++i) {
353 if (!unituse[i])
354 continue;
355 if (ttmuse++)
356 return -1;
357 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
358 }
359 if (ttmuse > 1)
360 return -1;
361
362 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
363 for (byte = 0; byte < 4; ++byte) {
364 unit = busbyte[byte];
365 if (!unit)
366 continue;
367 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
368 /* get ISU0 through TTM1 rather than TTM0 */
369 unit = PM_ISU0_ALT;
370 } else if (unit == PM_LSU1 + 1) {
371 /* select lower word of LSU1 for this byte */
372 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
373 }
374 ttm = unit >> 2;
375 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
376 }
377
378 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
379 for (i = 0; i < n_ev; ++i) {
380 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
381 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
382 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
383 psel = event[i] & PM_PMCSEL_MSK;
384 isbus = event[i] & PM_BUSEVENT_MSK;
385 if (!pmc) {
386 /* Bus event or any-PMC direct event */
387 for (pmc = 0; pmc < 4; ++pmc) {
388 if (!(pmc_inuse & (1 << pmc)))
389 break;
390 }
391 if (pmc >= 4)
392 return -1;
393 pmc_inuse |= 1 << pmc;
394 } else {
395 /* Direct event */
396 --pmc;
397 if (isbus && (byte & 2) &&
398 (psel == 8 || psel == 0x10 || psel == 0x28))
399 /* add events on higher-numbered bus */
400 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
401 }
402 if (isbus && unit == PM_GRS) {
403 bit = psel & 7;
404 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
405 mmcr1 |= (u64)grsel << grsel_shift[bit];
406 }
407 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
408 /* select alternate byte lane */
409 psel |= 0x10;
410 if (pmc <= 3)
411 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
412 hwc[i] = pmc;
413 }
414
415 /* Return MMCRx values */
416 mmcr[0] = 0;
417 if (pmc_inuse & 1)
418 mmcr[0] = MMCR0_PMC1CE;
419 if (pmc_inuse & 0x3e)
420 mmcr[0] |= MMCR0_PMCjCE;
421 mmcr[1] = mmcr1;
422 mmcr[2] = 0;
423 return 0;
424}
425
426static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
427{
428 if (pmc <= 3)
429 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
430}
431
432static int power5p_generic_events[] = {
433 [PERF_COUNT_CPU_CYCLES] = 0xf,
434 [PERF_COUNT_INSTRUCTIONS] = 0x100009,
435 [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
436 [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
437 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
438 [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
439};
440
441struct power_pmu power5p_pmu = {
442 .n_counter = 4,
443 .max_alternatives = MAX_ALT,
444 .add_fields = 0x7000000000055ull,
445 .test_adder = 0x3000040000000ull,
446 .compute_mmcr = power5p_compute_mmcr,
447 .get_constraint = power5p_get_constraint,
448 .get_alternatives = power5p_get_alternatives,
449 .disable_pmc = power5p_disable_pmc,
450 .n_generic = ARRAY_SIZE(power5p_generic_events),
451 .generic_events = power5p_generic_events,
452};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 000000000000..379ed1087cca
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,475 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5 (not POWER5++)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
82 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
83 *
84 * T0 - TTM0 constraint
85 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
86 *
87 * T1 - TTM1 constraint
88 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
89 *
90 * NC - number of counters
91 * 51: NC error 0x0008_0000_0000_0000
92 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
93 *
94 * G0..G3 - GRS mux constraints
95 * 46-47: GRS_L2SEL value
96 * 44-45: GRS_L3SEL value
97 * 41-44: GRS_MCSEL value
98 * 39-40: GRS_FABSEL value
99 * Note that these match up with their bit positions in MMCR1
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 37: UC3 error 0x20_0000_0000
103 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
104 * 35: ISU0 events needed 0x08_0000_0000
105 * 34: IDU|GRS events needed 0x04_0000_0000
106 *
107 * PS1
108 * 33: PS1 error 0x2_0000_0000
109 * 31-32: count of events needing PMC1/2 0x1_8000_0000
110 *
111 * PS2
112 * 30: PS2 error 0x4000_0000
113 * 28-29: count of events needing PMC3/4 0x3000_0000
114 *
115 * B0
116 * 24-27: Byte 0 event source 0x0f00_0000
117 * Encoding as for the event code
118 *
119 * B1, B2, B3
120 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
121 *
122 * P1..P6
123 * 0-11: Count of events needing PMC1..PMC6
124 */
125
126static const int grsel_shift[8] = {
127 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
128 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
129 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
130};
131
132/* Masks and values for using events from the various units */
133static u64 unit_cons[PM_LASTUNIT+1][2] = {
134 [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
135 [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
136 [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
137 [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
138 [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
139 [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
140};
141
142static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
143{
144 int pmc, byte, unit, sh;
145 int bit, fmask;
146 u64 mask = 0, value = 0;
147 int grp = -1;
148
149 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
150 if (pmc) {
151 if (pmc > 6)
152 return -1;
153 sh = (pmc - 1) * 2;
154 mask |= 2 << sh;
155 value |= 1 << sh;
156 if (pmc <= 4)
157 grp = (pmc - 1) >> 1;
158 else if (event != 0x500009 && event != 0x600005)
159 return -1;
160 }
161 if (event & PM_BUSEVENT_MSK) {
162 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
163 if (unit > PM_LASTUNIT)
164 return -1;
165 if (unit == PM_ISU0_ALT)
166 unit = PM_ISU0;
167 mask |= unit_cons[unit][0];
168 value |= unit_cons[unit][1];
169 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
170 if (byte >= 4) {
171 if (unit != PM_LSU1)
172 return -1;
173 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
174 ++unit;
175 byte &= 3;
176 }
177 if (unit == PM_GRS) {
178 bit = event & 7;
179 fmask = (bit == 6)? 7: 3;
180 sh = grsel_shift[bit];
181 mask |= (u64)fmask << sh;
182 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
183 }
184 /*
185 * Bus events on bytes 0 and 2 can be counted
186 * on PMC1/2; bytes 1 and 3 on PMC3/4.
187 */
188 if (!pmc)
189 grp = byte & 1;
190 /* Set byte lane select field */
191 mask |= 0xfULL << (24 - 4 * byte);
192 value |= (u64)unit << (24 - 4 * byte);
193 }
194 if (grp == 0) {
195 /* increment PMC1/2 field */
196 mask |= 0x200000000ull;
197 value |= 0x080000000ull;
198 } else if (grp == 1) {
199 /* increment PMC3/4 field */
200 mask |= 0x40000000ull;
201 value |= 0x10000000ull;
202 }
203 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ull;
206 value |= 0x1000000000000ull;
207 }
208 *maskp = mask;
209 *valp = value;
210 return 0;
211}
212
213#define MAX_ALT 3 /* at most 3 alternatives for any event */
214
215static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
220 { 0x300009, 0x400009 }, /* PM_INST_DISP */
221};
222
223/*
224 * Scan the alternatives table for a match and return the
225 * index into the alternatives table if found, else -1.
226 */
227static int find_alternative(unsigned int event)
228{
229 int i, j;
230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0])
233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j])
236 return i;
237 }
238 return -1;
239}
240
241static const unsigned char bytedecode_alternatives[4][4] = {
242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
246};
247
248/*
249 * Some direct events for decodes of event bus byte 3 have alternative
250 * PMCSEL values on other counters. This returns the alternative
251 * event code for those that do, or -1 otherwise.
252 */
253static int find_alternative_bdecode(unsigned int event)
254{
255 int pmc, altpmc, pp, j;
256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4)
259 return -1;
260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j];
267 }
268 }
269 return -1;
270}
271
272static int power5_get_alternatives(unsigned int event, unsigned int alt[])
273{
274 int i, j, ae, nalt = 1;
275
276 alt[0] = event;
277 nalt = 1;
278 i = find_alternative(event);
279 if (i >= 0) {
280 for (j = 0; j < MAX_ALT; ++j) {
281 ae = event_alternatives[i][j];
282 if (ae && ae != event)
283 alt[nalt++] = ae;
284 }
285 } else {
286 ae = find_alternative_bdecode(event);
287 if (ae > 0)
288 alt[nalt++] = ae;
289 }
290 return nalt;
291}
292
293static int power5_compute_mmcr(unsigned int event[], int n_ev,
294 unsigned int hwc[], u64 mmcr[])
295{
296 u64 mmcr1 = 0;
297 unsigned int pmc, unit, byte, psel;
298 unsigned int ttm, grp;
299 int i, isbus, bit, grsel;
300 unsigned int pmc_inuse = 0;
301 unsigned int pmc_grp_use[2];
302 unsigned char busbyte[4];
303 unsigned char unituse[16];
304 int ttmuse;
305
306 if (n_ev > 6)
307 return -1;
308
309 /* First pass to count resource use */
310 pmc_grp_use[0] = pmc_grp_use[1] = 0;
311 memset(busbyte, 0, sizeof(busbyte));
312 memset(unituse, 0, sizeof(unituse));
313 for (i = 0; i < n_ev; ++i) {
314 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
315 if (pmc) {
316 if (pmc > 6)
317 return -1;
318 if (pmc_inuse & (1 << (pmc - 1)))
319 return -1;
320 pmc_inuse |= 1 << (pmc - 1);
321 /* count 1/2 vs 3/4 use */
322 if (pmc <= 4)
323 ++pmc_grp_use[(pmc - 1) >> 1];
324 }
325 if (event[i] & PM_BUSEVENT_MSK) {
326 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
327 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
328 if (unit > PM_LASTUNIT)
329 return -1;
330 if (unit == PM_ISU0_ALT)
331 unit = PM_ISU0;
332 if (byte >= 4) {
333 if (unit != PM_LSU1)
334 return -1;
335 ++unit;
336 byte &= 3;
337 }
338 if (!pmc)
339 ++pmc_grp_use[byte & 1];
340 if (busbyte[byte] && busbyte[byte] != unit)
341 return -1;
342 busbyte[byte] = unit;
343 unituse[unit] = 1;
344 }
345 }
346 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
347 return -1;
348
349 /*
350 * Assign resources and set multiplexer selects.
351 *
352 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
353 * choice we have to deal with.
354 */
355 if (unituse[PM_ISU0] &
356 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
357 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
358 unituse[PM_ISU0] = 0;
359 }
360 /* Set TTM[01]SEL fields. */
361 ttmuse = 0;
362 for (i = PM_FPU; i <= PM_ISU1; ++i) {
363 if (!unituse[i])
364 continue;
365 if (ttmuse++)
366 return -1;
367 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
368 }
369 ttmuse = 0;
370 for (; i <= PM_GRS; ++i) {
371 if (!unituse[i])
372 continue;
373 if (ttmuse++)
374 return -1;
375 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
376 }
377 if (ttmuse > 1)
378 return -1;
379
380 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
381 for (byte = 0; byte < 4; ++byte) {
382 unit = busbyte[byte];
383 if (!unit)
384 continue;
385 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
386 /* get ISU0 through TTM1 rather than TTM0 */
387 unit = PM_ISU0_ALT;
388 } else if (unit == PM_LSU1 + 1) {
389 /* select lower word of LSU1 for this byte */
390 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
391 }
392 ttm = unit >> 2;
393 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
394 }
395
396 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
397 for (i = 0; i < n_ev; ++i) {
398 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
399 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
400 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
401 psel = event[i] & PM_PMCSEL_MSK;
402 isbus = event[i] & PM_BUSEVENT_MSK;
403 if (!pmc) {
404 /* Bus event or any-PMC direct event */
405 for (pmc = 0; pmc < 4; ++pmc) {
406 if (pmc_inuse & (1 << pmc))
407 continue;
408 grp = (pmc >> 1) & 1;
409 if (isbus) {
410 if (grp == (byte & 1))
411 break;
412 } else if (pmc_grp_use[grp] < 2) {
413 ++pmc_grp_use[grp];
414 break;
415 }
416 }
417 pmc_inuse |= 1 << pmc;
418 } else if (pmc <= 4) {
419 /* Direct event */
420 --pmc;
421 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
422 /* add events on higher-numbered bus */
423 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
424 } else {
425 /* Instructions or run cycles on PMC5/6 */
426 --pmc;
427 }
428 if (isbus && unit == PM_GRS) {
429 bit = psel & 7;
430 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
431 mmcr1 |= (u64)grsel << grsel_shift[bit];
432 }
433 if (pmc <= 3)
434 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
435 hwc[i] = pmc;
436 }
437
438 /* Return MMCRx values */
439 mmcr[0] = 0;
440 if (pmc_inuse & 1)
441 mmcr[0] = MMCR0_PMC1CE;
442 if (pmc_inuse & 0x3e)
443 mmcr[0] |= MMCR0_PMCjCE;
444 mmcr[1] = mmcr1;
445 mmcr[2] = 0;
446 return 0;
447}
448
449static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
450{
451 if (pmc <= 3)
452 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
453}
454
455static int power5_generic_events[] = {
456 [PERF_COUNT_CPU_CYCLES] = 0xf,
457 [PERF_COUNT_INSTRUCTIONS] = 0x100009,
458 [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
459 [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
460 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
461 [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
462};
463
464struct power_pmu power5_pmu = {
465 .n_counter = 6,
466 .max_alternatives = MAX_ALT,
467 .add_fields = 0x7000090000555ull,
468 .test_adder = 0x3000490000000ull,
469 .compute_mmcr = power5_compute_mmcr,
470 .get_constraint = power5_get_constraint,
471 .get_alternatives = power5_get_alternatives,
472 .disable_pmc = power5_disable_pmc,
473 .n_generic = ARRAY_SIZE(power5_generic_events),
474 .generic_events = power5_generic_events,
475};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 000000000000..b1f61f3c97bb
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,283 @@
1/*
2 * Performance counter support for POWER6 processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER6
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0x7
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
22#define PM_UNIT_MSK 0xf
23#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
24#define PM_LLAV 0x8000 /* Load lookahead match value */
25#define PM_LLA 0x4000 /* Load lookahead match enable */
26#define PM_BYTE_SH 12 /* Byte of event bus to use */
27#define PM_BYTE_MSK 3
28#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
29#define PM_SUBUNIT_MSK 7
30#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
31#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
32#define PM_BUSEVENT_MSK 0xf3700
33
34/*
35 * Bits in MMCR1 for POWER6
36 */
37#define MMCR1_TTM0SEL_SH 60
38#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
39#define MMCR1_TTMSEL_MSK 0xf
40#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
41#define MMCR1_NESTSEL_SH 45
42#define MMCR1_NESTSEL_MSK 0x7
43#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
44#define MMCR1_PMC1_LLA ((u64)1 << 44)
45#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
46#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
47#define MMCR1_PMC1SEL_SH 24
48#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49#define MMCR1_PMCSEL_MSK 0xff
50
51/*
52 * Assign PMC numbers and compute MMCR1 value for a set of events
53 */
54static int p6_compute_mmcr(unsigned int event[], int n_ev,
55 unsigned int hwc[], u64 mmcr[])
56{
57 u64 mmcr1 = 0;
58 int i;
59 unsigned int pmc, ev, b, u, s, psel;
60 unsigned int ttmset = 0;
61 unsigned int pmc_inuse = 0;
62
63 if (n_ev > 4)
64 return -1;
65 for (i = 0; i < n_ev; ++i) {
66 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
67 if (pmc) {
68 if (pmc_inuse & (1 << (pmc - 1)))
69 return -1; /* collision! */
70 pmc_inuse |= 1 << (pmc - 1);
71 }
72 }
73 for (i = 0; i < n_ev; ++i) {
74 ev = event[i];
75 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
76 if (pmc) {
77 --pmc;
78 } else {
79 /* can go on any PMC; find a free one */
80 for (pmc = 0; pmc < 4; ++pmc)
81 if (!(pmc_inuse & (1 << pmc)))
82 break;
83 pmc_inuse |= 1 << pmc;
84 }
85 hwc[i] = pmc;
86 psel = ev & PM_PMCSEL_MSK;
87 if (ev & PM_BUSEVENT_MSK) {
88 /* this event uses the event bus */
89 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
90 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
91 /* check for conflict on this byte of event bus */
92 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
93 return -1;
94 mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
95 ttmset |= 1 << b;
96 if (u == 5) {
97 /* Nest events have a further mux */
98 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
99 if ((ttmset & 0x10) &&
100 MMCR1_NESTSEL(mmcr1) != s)
101 return -1;
102 ttmset |= 0x10;
103 mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
104 }
105 if (0x30 <= psel && psel <= 0x3d) {
106 /* these need the PMCx_ADDR_SEL bits */
107 if (b >= 2)
108 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
109 }
110 /* bus select values are different for PMC3/4 */
111 if (pmc >= 2 && (psel & 0x90) == 0x80)
112 psel ^= 0x20;
113 }
114 if (ev & PM_LLA) {
115 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
116 if (ev & PM_LLAV)
117 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
118 }
119 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
120 }
121 mmcr[0] = 0;
122 if (pmc_inuse & 1)
123 mmcr[0] = MMCR0_PMC1CE;
124 if (pmc_inuse & 0xe)
125 mmcr[0] |= MMCR0_PMCjCE;
126 mmcr[1] = mmcr1;
127 mmcr[2] = 0;
128 return 0;
129}
130
131/*
132 * Layout of constraint bits:
133 *
134 * 0-1 add field: number of uses of PMC1 (max 1)
135 * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4
136 * 8-10 select field: nest (subunit) event selector
137 * 16-19 select field: unit on byte 0 of event bus
138 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
139 */
140static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
141{
142 int pmc, byte, sh;
143 unsigned int mask = 0, value = 0;
144
145 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
146 if (pmc) {
147 if (pmc > 4)
148 return -1;
149 sh = (pmc - 1) * 2;
150 mask |= 2 << sh;
151 value |= 1 << sh;
152 }
153 if (event & PM_BUSEVENT_MSK) {
154 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
155 sh = byte * 4;
156 mask |= PM_UNIT_MSKS << sh;
157 value |= (event & PM_UNIT_MSKS) << sh;
158 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
159 mask |= PM_SUBUNIT_MSKS;
160 value |= event & PM_SUBUNIT_MSKS;
161 }
162 }
163 *maskp = mask;
164 *valp = value;
165 return 0;
166}
167
168#define MAX_ALT 4 /* at most 4 alternatives for any event */
169
170static const unsigned int event_alternatives[][MAX_ALT] = {
171 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
172 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
173 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
174 { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */
175 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
176 { 0x10000e, 0x400010 }, /* PM_PURR */
177 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
178 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
179 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
180 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
181 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
182 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
183 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
184 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
185 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
186 { 0x200012, 0x300012 }, /* PM_INST_DISP */
187 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
188 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
189 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
190 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
191 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
192 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
193 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
194};
195
196/*
197 * This could be made more efficient with a binary search on
198 * a presorted list, if necessary
199 */
200static int find_alternatives_list(unsigned int event)
201{
202 int i, j;
203 unsigned int alt;
204
205 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
206 if (event < event_alternatives[i][0])
207 return -1;
208 for (j = 0; j < MAX_ALT; ++j) {
209 alt = event_alternatives[i][j];
210 if (!alt || event < alt)
211 break;
212 if (event == alt)
213 return i;
214 }
215 }
216 return -1;
217}
218
219static int p6_get_alternatives(unsigned int event, unsigned int alt[])
220{
221 int i, j;
222 unsigned int aevent, psel, pmc;
223 unsigned int nalt = 1;
224
225 alt[0] = event;
226
227 /* check the alternatives table */
228 i = find_alternatives_list(event);
229 if (i >= 0) {
230 /* copy out alternatives from list */
231 for (j = 0; j < MAX_ALT; ++j) {
232 aevent = event_alternatives[i][j];
233 if (!aevent)
234 break;
235 if (aevent != event)
236 alt[nalt++] = aevent;
237 }
238
239 } else {
240 /* Check for alternative ways of computing sum events */
241 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
242 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
243 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
244 if (pmc && (psel == 0x32 || psel == 0x34))
245 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
246 ((5 - pmc) << PM_PMC_SH);
247
248 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
249 if (pmc && (psel == 0x38 || psel == 0x3a))
250 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
251 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
252 }
253
254 return nalt;
255}
256
257static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
258{
259 /* Set PMCxSEL to 0 to disable PMCx */
260 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
261}
262
263static int power6_generic_events[] = {
264 [PERF_COUNT_CPU_CYCLES] = 0x1e,
265 [PERF_COUNT_INSTRUCTIONS] = 2,
266 [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
267 [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
268 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
269 [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
270};
271
272struct power_pmu power6_pmu = {
273 .n_counter = 4,
274 .max_alternatives = MAX_ALT,
275 .add_fields = 0x55,
276 .test_adder = 0,
277 .compute_mmcr = p6_compute_mmcr,
278 .get_constraint = p6_get_constraint,
279 .get_alternatives = p6_get_alternatives,
280 .disable_pmc = p6_disable_pmc,
281 .n_generic = ARRAY_SIZE(power6_generic_events),
282 .generic_events = power6_generic_events,
283};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 000000000000..c3256580be1a
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,375 @@
1/*
2 * Performance counter support for PPC970-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for PPC970
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_BYTE_SH 4 /* Byte number of event bus to use */
23#define PM_BYTE_MSK 3
24#define PM_PMCSEL_MSK 0xf
25
26/* Values in PM_UNIT field */
27#define PM_NONE 0
28#define PM_FPU 1
29#define PM_VPU 2
30#define PM_ISU 3
31#define PM_IFU 4
32#define PM_IDU 5
33#define PM_STS 6
34#define PM_LSU0 7
35#define PM_LSU1U 8
36#define PM_LSU1L 9
37#define PM_LASTUNIT 9
38
39/*
40 * Bits in MMCR0 for PPC970
41 */
42#define MMCR0_PMC1SEL_SH 8
43#define MMCR0_PMC2SEL_SH 1
44#define MMCR_PMCSEL_MSK 0x1f
45
46/*
47 * Bits in MMCR1 for PPC970
48 */
49#define MMCR1_TTM0SEL_SH 62
50#define MMCR1_TTM1SEL_SH 59
51#define MMCR1_TTM3SEL_SH 53
52#define MMCR1_TTMSEL_MSK 3
53#define MMCR1_TD_CP_DBG0SEL_SH 50
54#define MMCR1_TD_CP_DBG1SEL_SH 48
55#define MMCR1_TD_CP_DBG2SEL_SH 46
56#define MMCR1_TD_CP_DBG3SEL_SH 44
57#define MMCR1_PMC1_ADDER_SEL_SH 39
58#define MMCR1_PMC2_ADDER_SEL_SH 38
59#define MMCR1_PMC6_ADDER_SEL_SH 37
60#define MMCR1_PMC5_ADDER_SEL_SH 36
61#define MMCR1_PMC8_ADDER_SEL_SH 35
62#define MMCR1_PMC7_ADDER_SEL_SH 34
63#define MMCR1_PMC3_ADDER_SEL_SH 33
64#define MMCR1_PMC4_ADDER_SEL_SH 32
65#define MMCR1_PMC3SEL_SH 27
66#define MMCR1_PMC4SEL_SH 22
67#define MMCR1_PMC5SEL_SH 17
68#define MMCR1_PMC6SEL_SH 12
69#define MMCR1_PMC7SEL_SH 7
70#define MMCR1_PMC8SEL_SH 2
71
72static short mmcr1_adder_bits[8] = {
73 MMCR1_PMC1_ADDER_SEL_SH,
74 MMCR1_PMC2_ADDER_SEL_SH,
75 MMCR1_PMC3_ADDER_SEL_SH,
76 MMCR1_PMC4_ADDER_SEL_SH,
77 MMCR1_PMC5_ADDER_SEL_SH,
78 MMCR1_PMC6_ADDER_SEL_SH,
79 MMCR1_PMC7_ADDER_SEL_SH,
80 MMCR1_PMC8_ADDER_SEL_SH
81};
82
83/*
84 * Bits in MMCRA
85 */
86
87/*
88 * Layout of constraint bits:
89 * 6666555555555544444444443333333333222222222211111111110000000000
90 * 3210987654321098765432109876543210987654321098765432109876543210
91 * <><>[ >[ >[ >< >< >< >< ><><><><><><><><>
92 * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
93 *
94 * T0 - TTM0 constraint
95 * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
96 *
97 * T1 - TTM1 constraint
98 * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
99 *
100 * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
101 * 43: UC3 error 0x0800_0000_0000
102 * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
103 * 41: ISU events needed 0x0200_0000_0000
104 * 40: IDU|STS events needed 0x0100_0000_0000
105 *
106 * PS1
107 * 39: PS1 error 0x0080_0000_0000
108 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
109 *
110 * PS2
111 * 35: PS2 error 0x0008_0000_0000
112 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
113 *
114 * B0
115 * 28-31: Byte 0 event source 0xf000_0000
116 * Encoding as for the event code
117 *
118 * B1, B2, B3
119 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
120 *
121 * P1
122 * 15: P1 error 0x8000
123 * 14-15: Count of events needing PMC1
124 *
125 * P2..P8
126 * 0-13: Count of events needing PMC2..PMC8
127 */
128
129/* Masks and values for using events from the various units */
130static u64 unit_cons[PM_LASTUNIT+1][2] = {
131 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
132 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
133 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
134 [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
135 [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
136 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
137};
138
139static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
140{
141 int pmc, byte, unit, sh;
142 u64 mask = 0, value = 0;
143 int grp = -1;
144
145 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
146 if (pmc) {
147 if (pmc > 8)
148 return -1;
149 sh = (pmc - 1) * 2;
150 mask |= 2 << sh;
151 value |= 1 << sh;
152 grp = ((pmc - 1) >> 1) & 1;
153 }
154 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
155 if (unit) {
156 if (unit > PM_LASTUNIT)
157 return -1;
158 mask |= unit_cons[unit][0];
159 value |= unit_cons[unit][1];
160 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
161 /*
162 * Bus events on bytes 0 and 2 can be counted
163 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
164 */
165 if (!pmc)
166 grp = byte & 1;
167 /* Set byte lane select field */
168 mask |= 0xfULL << (28 - 4 * byte);
169 value |= (u64)unit << (28 - 4 * byte);
170 }
171 if (grp == 0) {
172 /* increment PMC1/2/5/6 field */
173 mask |= 0x8000000000ull;
174 value |= 0x1000000000ull;
175 } else if (grp == 1) {
176 /* increment PMC3/4/7/8 field */
177 mask |= 0x800000000ull;
178 value |= 0x100000000ull;
179 }
180 *maskp = mask;
181 *valp = value;
182 return 0;
183}
184
185static int p970_get_alternatives(unsigned int event, unsigned int alt[])
186{
187 alt[0] = event;
188
189 /* 2 alternatives for LSU empty */
190 if (event == 0x2002 || event == 0x3002) {
191 alt[1] = event ^ 0x1000;
192 return 2;
193 }
194
195 return 1;
196}
197
198static int p970_compute_mmcr(unsigned int event[], int n_ev,
199 unsigned int hwc[], u64 mmcr[])
200{
201 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
202 unsigned int pmc, unit, byte, psel;
203 unsigned int ttm, grp;
204 unsigned int pmc_inuse = 0;
205 unsigned int pmc_grp_use[2];
206 unsigned char busbyte[4];
207 unsigned char unituse[16];
208 unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
209 unsigned char ttmuse[2];
210 unsigned char pmcsel[8];
211 int i;
212
213 if (n_ev > 8)
214 return -1;
215
216 /* First pass to count resource use */
217 pmc_grp_use[0] = pmc_grp_use[1] = 0;
218 memset(busbyte, 0, sizeof(busbyte));
219 memset(unituse, 0, sizeof(unituse));
220 for (i = 0; i < n_ev; ++i) {
221 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
222 if (pmc) {
223 if (pmc_inuse & (1 << (pmc - 1)))
224 return -1;
225 pmc_inuse |= 1 << (pmc - 1);
226 /* count 1/2/5/6 vs 3/4/7/8 use */
227 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
228 }
229 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
230 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
231 if (unit) {
232 if (unit > PM_LASTUNIT)
233 return -1;
234 if (!pmc)
235 ++pmc_grp_use[byte & 1];
236 if (busbyte[byte] && busbyte[byte] != unit)
237 return -1;
238 busbyte[byte] = unit;
239 unituse[unit] = 1;
240 }
241 }
242 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
243 return -1;
244
245 /*
246 * Assign resources and set multiplexer selects.
247 *
248 * PM_ISU can go either on TTM0 or TTM1, but that's the only
249 * choice we have to deal with.
250 */
251 if (unituse[PM_ISU] &
252 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
253 unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
254 /* Set TTM[01]SEL fields. */
255 ttmuse[0] = ttmuse[1] = 0;
256 for (i = PM_FPU; i <= PM_STS; ++i) {
257 if (!unituse[i])
258 continue;
259 ttm = unitmap[i];
260 ++ttmuse[(ttm >> 2) & 1];
261 mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
262 }
263 /* Check only one unit per TTMx */
264 if (ttmuse[0] > 1 || ttmuse[1] > 1)
265 return -1;
266
267 /* Set byte lane select fields and TTM3SEL. */
268 for (byte = 0; byte < 4; ++byte) {
269 unit = busbyte[byte];
270 if (!unit)
271 continue;
272 if (unit <= PM_STS)
273 ttm = (unitmap[unit] >> 2) & 1;
274 else if (unit == PM_LSU0)
275 ttm = 2;
276 else {
277 ttm = 3;
278 if (unit == PM_LSU1L && byte >= 2)
279 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
280 }
281 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
282 }
283
284 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
285 memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
286 for (i = 0; i < n_ev; ++i) {
287 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
288 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
289 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
290 psel = event[i] & PM_PMCSEL_MSK;
291 if (!pmc) {
292 /* Bus event or any-PMC direct event */
293 if (unit)
294 psel |= 0x10 | ((byte & 2) << 2);
295 else
296 psel |= 8;
297 for (pmc = 0; pmc < 8; ++pmc) {
298 if (pmc_inuse & (1 << pmc))
299 continue;
300 grp = (pmc >> 1) & 1;
301 if (unit) {
302 if (grp == (byte & 1))
303 break;
304 } else if (pmc_grp_use[grp] < 4) {
305 ++pmc_grp_use[grp];
306 break;
307 }
308 }
309 pmc_inuse |= 1 << pmc;
310 } else {
311 /* Direct event */
312 --pmc;
313 if (psel == 0 && (byte & 2))
314 /* add events on higher-numbered bus */
315 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
316 }
317 pmcsel[pmc] = psel;
318 hwc[i] = pmc;
319 }
320 for (pmc = 0; pmc < 2; ++pmc)
321 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
322 for (; pmc < 8; ++pmc)
323 mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
324 if (pmc_inuse & 1)
325 mmcr0 |= MMCR0_PMC1CE;
326 if (pmc_inuse & 0xfe)
327 mmcr0 |= MMCR0_PMCjCE;
328
329 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
330
331 /* Return MMCRx values */
332 mmcr[0] = mmcr0;
333 mmcr[1] = mmcr1;
334 mmcr[2] = mmcra;
335 return 0;
336}
337
338static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
339{
340 int shift, i;
341
342 if (pmc <= 1) {
343 shift = MMCR0_PMC1SEL_SH - 7 * pmc;
344 i = 0;
345 } else {
346 shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
347 i = 1;
348 }
349 /*
350 * Setting the PMCxSEL field to 0x08 disables PMC x.
351 */
352 mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
353}
354
355static int ppc970_generic_events[] = {
356 [PERF_COUNT_CPU_CYCLES] = 7,
357 [PERF_COUNT_INSTRUCTIONS] = 1,
358 [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
359 [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
360 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
361 [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
362};
363
364struct power_pmu ppc970_pmu = {
365 .n_counter = 8,
366 .max_alternatives = 2,
367 .add_fields = 0x001100005555ull,
368 .test_adder = 0x013300000000ull,
369 .compute_mmcr = p970_compute_mmcr,
370 .get_constraint = p970_get_constraint,
371 .get_alternatives = p970_get_alternatives,
372 .disable_pmc = p970_disable_pmc,
373 .n_generic = ARRAY_SIZE(ppc970_generic_events),
374 .generic_events = ppc970_generic_events,
375};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 76993941cac9..17bbf6f91fbe 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32#include <linux/perf_counter.h>
32 33
33#include <asm/firmware.h> 34#include <asm/firmware.h>
34#include <asm/page.h> 35#include <asm/page.h>
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
170 die("Weird page fault", regs, SIGSEGV); 171 die("Weird page fault", regs, SIGSEGV);
171 } 172 }
172 173
174 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
175
173 /* When running in the kernel we expect faults to occur only to 176 /* When running in the kernel we expect faults to occur only to
174 * addresses in user space. All other faults represent errors in the 177 * addresses in user space. All other faults represent errors in the
175 * kernel and should generate an OOPS. Unfortunately, in the case of an 178 * kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -309,6 +312,7 @@ good_area:
309 } 312 }
310 if (ret & VM_FAULT_MAJOR) { 313 if (ret & VM_FAULT_MAJOR) {
311 current->maj_flt++; 314 current->maj_flt++;
315 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
312#ifdef CONFIG_PPC_SMLPAR 316#ifdef CONFIG_PPC_SMLPAR
313 if (firmware_has_feature(FW_FEATURE_CMO)) { 317 if (firmware_has_feature(FW_FEATURE_CMO)) {
314 preempt_disable(); 318 preempt_disable();
@@ -316,8 +320,10 @@ good_area:
316 preempt_enable(); 320 preempt_enable();
317 } 321 }
318#endif 322#endif
319 } else 323 } else {
320 current->min_flt++; 324 current->min_flt++;
325 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
326 }
321 up_read(&mm->mmap_sem); 327 up_read(&mm->mmap_sem);
322 return 0; 328 return 0;
323 329
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9da795e49337..732ee93a8e98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
1config PPC64 1config PPC64
2 bool "64-bit kernel" 2 bool "64-bit kernel"
3 default n 3 default n
4 select HAVE_PERF_COUNTERS
4 help 5 help
5 This option selects whether a 32-bit or a 64-bit kernel 6 This option selects whether a 32-bit or a 64-bit kernel
6 will be built. 7 will be built.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4b3408206091..6da24fc6a09e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -728,6 +728,7 @@ config X86_UP_IOAPIC
728config X86_LOCAL_APIC 728config X86_LOCAL_APIC
729 def_bool y 729 def_bool y
730 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 730 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
731 select HAVE_PERF_COUNTERS if (!M386 && !M486)
731 732
732config X86_IO_APIC 733config X86_IO_APIC
733 def_bool y 734 def_bool y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202086e8..19c61ef6ab57 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -825,9 +825,10 @@ ia32_sys_call_table:
825 .quad compat_sys_signalfd4 825 .quad compat_sys_signalfd4
826 .quad sys_eventfd2 826 .quad sys_eventfd2
827 .quad sys_epoll_create1 827 .quad sys_epoll_create1
828 .quad sys_dup3 /* 330 */ 828 .quad sys_dup3 /* 330 */
829 .quad sys_pipe2 829 .quad sys_pipe2
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv 831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev 832 .quad compat_sys_pwritev
833 .quad sys_perf_counter_open
833ia32_syscall_end: 834ia32_syscall_end:
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fba4229..aff9f1fcdcd7 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
247#define smp_mb__before_atomic_inc() barrier() 247#define smp_mb__before_atomic_inc() barrier()
248#define smp_mb__after_atomic_inc() barrier() 248#define smp_mb__after_atomic_inc() barrier()
249 249
250/* An 64bit atomic type */
251
252typedef struct {
253 unsigned long long counter;
254} atomic64_t;
255
256#define ATOMIC64_INIT(val) { (val) }
257
258/**
259 * atomic64_read - read atomic64 variable
260 * @v: pointer of type atomic64_t
261 *
262 * Atomically reads the value of @v.
263 * Doesn't imply a read memory barrier.
264 */
265#define __atomic64_read(ptr) ((ptr)->counter)
266
267static inline unsigned long long
268cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
269{
270 asm volatile(
271
272 LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
273
274 : "=A" (old)
275
276 : [ptr] "D" (ptr),
277 "A" (old),
278 "b" (ll_low(new)),
279 "c" (ll_high(new))
280
281 : "memory");
282
283 return old;
284}
285
286static inline unsigned long long
287atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
288 unsigned long long new_val)
289{
290 return cmpxchg8b(&ptr->counter, old_val, new_val);
291}
292
293/**
294 * atomic64_xchg - xchg atomic64 variable
295 * @ptr: pointer to type atomic64_t
296 * @new_val: value to assign
297 * @old_val: old value that was there
298 *
299 * Atomically xchgs the value of @ptr to @new_val and returns
300 * the old value.
301 */
302
303static inline unsigned long long
304atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
305{
306 unsigned long long old_val;
307
308 do {
309 old_val = atomic_read(ptr);
310 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
311
312 return old_val;
313}
314
315/**
316 * atomic64_set - set atomic64 variable
317 * @ptr: pointer to type atomic64_t
318 * @new_val: value to assign
319 *
320 * Atomically sets the value of @ptr to @new_val.
321 */
322static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
323{
324 atomic64_xchg(ptr, new_val);
325}
326
327/**
328 * atomic64_read - read atomic64 variable
329 * @ptr: pointer to type atomic64_t
330 *
331 * Atomically reads the value of @ptr and returns it.
332 */
333static inline unsigned long long atomic64_read(atomic64_t *ptr)
334{
335 unsigned long long curr_val;
336
337 do {
338 curr_val = __atomic64_read(ptr);
339 } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
340
341 return curr_val;
342}
343
344/**
345 * atomic64_add_return - add and return
346 * @delta: integer value to add
347 * @ptr: pointer to type atomic64_t
348 *
349 * Atomically adds @delta to @ptr and returns @delta + *@ptr
350 */
351static inline unsigned long long
352atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
353{
354 unsigned long long old_val, new_val;
355
356 do {
357 old_val = atomic_read(ptr);
358 new_val = old_val + delta;
359
360 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
361
362 return new_val;
363}
364
365static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
366{
367 return atomic64_add_return(-delta, ptr);
368}
369
370static inline long atomic64_inc_return(atomic64_t *ptr)
371{
372 return atomic64_add_return(1, ptr);
373}
374
375static inline long atomic64_dec_return(atomic64_t *ptr)
376{
377 return atomic64_sub_return(1, ptr);
378}
379
380/**
381 * atomic64_add - add integer to atomic64 variable
382 * @delta: integer value to add
383 * @ptr: pointer to type atomic64_t
384 *
385 * Atomically adds @delta to @ptr.
386 */
387static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
388{
389 atomic64_add_return(delta, ptr);
390}
391
392/**
393 * atomic64_sub - subtract the atomic64 variable
394 * @delta: integer value to subtract
395 * @ptr: pointer to type atomic64_t
396 *
397 * Atomically subtracts @delta from @ptr.
398 */
399static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
400{
401 atomic64_add(-delta, ptr);
402}
403
404/**
405 * atomic64_sub_and_test - subtract value from variable and test result
406 * @delta: integer value to subtract
407 * @ptr: pointer to type atomic64_t
408 *
409 * Atomically subtracts @delta from @ptr and returns
410 * true if the result is zero, or false for all
411 * other cases.
412 */
413static inline int
414atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
415{
416 unsigned long long old_val = atomic64_sub_return(delta, ptr);
417
418 return old_val == 0;
419}
420
421/**
422 * atomic64_inc - increment atomic64 variable
423 * @ptr: pointer to type atomic64_t
424 *
425 * Atomically increments @ptr by 1.
426 */
427static inline void atomic64_inc(atomic64_t *ptr)
428{
429 atomic64_add(1, ptr);
430}
431
432/**
433 * atomic64_dec - decrement atomic64 variable
434 * @ptr: pointer to type atomic64_t
435 *
436 * Atomically decrements @ptr by 1.
437 */
438static inline void atomic64_dec(atomic64_t *ptr)
439{
440 atomic64_sub(1, ptr);
441}
442
443/**
444 * atomic64_dec_and_test - decrement and test
445 * @ptr: pointer to type atomic64_t
446 *
447 * Atomically decrements @ptr by 1 and
448 * returns true if the result is 0, or false for all other
449 * cases.
450 */
451static inline int atomic64_dec_and_test(atomic64_t *ptr)
452{
453 return atomic64_sub_and_test(1, ptr);
454}
455
456/**
457 * atomic64_inc_and_test - increment and test
458 * @ptr: pointer to type atomic64_t
459 *
460 * Atomically increments @ptr by 1
461 * and returns true if the result is zero, or false for all
462 * other cases.
463 */
464static inline int atomic64_inc_and_test(atomic64_t *ptr)
465{
466 return atomic64_sub_and_test(-1, ptr);
467}
468
469/**
470 * atomic64_add_negative - add and test if negative
471 * @delta: integer value to add
472 * @ptr: pointer to type atomic64_t
473 *
474 * Atomically adds @delta to @ptr and returns true
475 * if the result is negative, or false when
476 * result is greater than or equal to zero.
477 */
478static inline int
479atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
480{
481 long long old_val = atomic64_add_return(delta, ptr);
482
483 return old_val < 0;
484}
485
250#include <asm-generic/atomic.h> 486#include <asm-generic/atomic.h>
251#endif /* _ASM_X86_ATOMIC_32_H */ 487#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bedaf258..fe24d2802490 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
50 50
51#ifdef CONFIG_PERF_COUNTERS 51#ifdef CONFIG_PERF_COUNTERS
52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) 52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
53#endif 54#endif
54 55
55#ifdef CONFIG_X86_MCE_P4THERMAL 56#ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 039db6aa8e02..f5ebe2aaca4b 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -13,6 +13,8 @@ typedef struct {
13 unsigned int irq_spurious_count; 13 unsigned int irq_spurious_count;
14#endif 14#endif
15 unsigned int generic_irqs; /* arch dependent */ 15 unsigned int generic_irqs; /* arch dependent */
16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs;
16#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
17 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
18 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea49bd70..7309c0ad6902 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,9 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void generic_interrupt(void); 30extern void generic_interrupt(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_counter_interrupt(void);
33extern void perf_pending_interrupt(void);
34
32extern void spurious_interrupt(void); 35extern void spurious_interrupt(void);
33extern void thermal_interrupt(void); 36extern void thermal_interrupt(void);
34extern void reschedule_interrupt(void); 37extern void reschedule_interrupt(void);
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd068bc2e..000000000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
1#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
2#define _ASM_X86_INTEL_ARCH_PERFMON_H
3
4#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
5#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
6
7#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
8#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
9
10#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
11#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
12#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
13#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
18#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
19 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
20
21union cpuid10_eax {
22 struct {
23 unsigned int version_id:8;
24 unsigned int num_counters:8;
25 unsigned int bit_width:8;
26 unsigned int mask_length:8;
27 } split;
28 unsigned int full;
29};
30
31#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79bbb47c..545bb811ccb5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -117,6 +117,11 @@
117#define GENERIC_INTERRUPT_VECTOR 0xed 117#define GENERIC_INTERRUPT_VECTOR 0xed
118 118
119/* 119/*
120 * Performance monitoring pending work vector:
121 */
122#define LOCAL_PENDING_VECTOR 0xec
123
124/*
120 * First APIC vector available to drivers: (vectors 0x30-0xee) we 125 * First APIC vector available to drivers: (vectors 0x30-0xee) we
121 * start at 0x31(0x41) to spread out vectors evenly between priority 126 * start at 0x31(0x41) to spread out vectors evenly between priority
122 * levels. (0x80 is the syscall vector) 127 * levels. (0x80 is the syscall vector)
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 000000000000..d08dd52cb8ff
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,100 @@
1#ifndef _ASM_X86_PERF_COUNTER_H
2#define _ASM_X86_PERF_COUNTER_H
3
4/*
5 * Performance counter hw details:
6 */
7
8#define X86_PMC_MAX_GENERIC 8
9#define X86_PMC_MAX_FIXED 3
10
11#define X86_PMC_IDX_GENERIC 0
12#define X86_PMC_IDX_FIXED 32
13#define X86_PMC_IDX_MAX 64
14
15#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
16#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
17
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
23#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
24#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
25
26/*
27 * Includes eventsel and unit mask as well:
28 */
29#define ARCH_PERFMON_EVENT_MASK 0xffff
30
31#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
32#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
33#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
34#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
35 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
36
37#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
38
39/*
40 * Intel "Architectural Performance Monitoring" CPUID
41 * detection/enumeration details:
42 */
43union cpuid10_eax {
44 struct {
45 unsigned int version_id:8;
46 unsigned int num_counters:8;
47 unsigned int bit_width:8;
48 unsigned int mask_length:8;
49 } split;
50 unsigned int full;
51};
52
53union cpuid10_edx {
54 struct {
55 unsigned int num_counters_fixed:4;
56 unsigned int reserved:28;
57 } split;
58 unsigned int full;
59};
60
61
62/*
63 * Fixed-purpose performance counters:
64 */
65
66/*
67 * All 3 fixed-mode PMCs are configured via this single MSR:
68 */
69#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
70
71/*
72 * The counts are available in three separate MSRs:
73 */
74
75/* Instr_Retired.Any: */
76#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
77#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
78
79/* CPU_CLK_Unhalted.Core: */
80#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
81#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
82
83/* CPU_CLK_Unhalted.Ref: */
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(int nmi);
95#else
96static inline void init_hw_perf_counters(void) { }
97static inline void perf_counters_lapic_init(int nmi) { }
98#endif
99
100#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74cf8dc..0b4d8c2b157d 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,7 @@
340#define __NR_inotify_init1 332 340#define __NR_inotify_init1 332
341#define __NR_preadv 333 341#define __NR_preadv 333
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_perf_counter_open 333
343 344
344#ifdef __KERNEL__ 345#ifdef __KERNEL__
345 346
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f81829462325..d9aad876ad76 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,7 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
657__SYSCALL(__NR_preadv, sys_preadv) 657__SYSCALL(__NR_preadv, sys_preadv)
658#define __NR_pwritev 296 658#define __NR_pwritev 296
659__SYSCALL(__NR_pwritev, sys_pwritev) 659__SYSCALL(__NR_pwritev, sys_pwritev)
660 660#define __NR_perf_counter_open 295
661__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
661 662
662#ifndef __NO_STUBS 663#ifndef __NO_STUBS
663#define __ARCH_WANT_OLD_READDIR 664#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 098ec84b8c00..fb504f843e58 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/mm.h> 35#include <linux/mm.h>
36 36
37#include <asm/perf_counter.h>
37#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
38#include <asm/atomic.h> 39#include <asm/atomic.h>
39#include <asm/mpspec.h> 40#include <asm/mpspec.h>
@@ -755,6 +756,8 @@ static void local_apic_timer_interrupt(void)
755 inc_irq_stat(apic_timer_irqs); 756 inc_irq_stat(apic_timer_irqs);
756 757
757 evt->event_handler(evt); 758 evt->event_handler(evt);
759
760 perf_counter_unthrottle();
758} 761}
759 762
760/* 763/*
@@ -1127,6 +1130,7 @@ void __cpuinit setup_local_APIC(void)
1127 apic_write(APIC_ESR, 0); 1130 apic_write(APIC_ESR, 0);
1128 } 1131 }
1129#endif 1132#endif
1133 perf_counters_lapic_init(0);
1130 1134
1131 preempt_disable(); 1135 preempt_disable();
1132 1136
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9a06e4..3efcb2b96a15 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details, features and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
25 25
26obj-$(CONFIG_X86_MCE) += mcheck/ 26obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
27obj-$(CONFIG_MTRR) += mtrr/
28obj-$(CONFIG_CPU_FREQ) += cpufreq/
29 27
30obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 28obj-$(CONFIG_X86_MCE) += mcheck/
29obj-$(CONFIG_MTRR) += mtrr/
30obj-$(CONFIG_CPU_FREQ) += cpufreq/
31
32obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
31 33
32quiet_cmd_mkcapflags = MKCAP $@ 34quiet_cmd_mkcapflags = MKCAP $@
33 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 35 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7e4a459daa64..fd69c514ca2a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -420,6 +420,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
420 if (c->x86 >= 6) 420 if (c->x86 >= 6)
421 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 421 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
422 422
423 /* Enable Performance counter for K7 and later */
424 if (c->x86 > 6 && c->x86 <= 0x11)
425 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
426
423 if (!c->x86_model_id[0]) { 427 if (!c->x86_model_id[0]) {
424 switch (c->x86) { 428 switch (c->x86) {
425 case 0xf: 429 case 0xf:
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c4f667896c28..a86769efe0df 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14 14
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_counter.h>
16#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
17#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -854,6 +855,7 @@ void __init identify_boot_cpu(void)
854#else 855#else
855 vgetcpu_set_mode(); 856 vgetcpu_set_mode();
856#endif 857#endif
858 init_hw_perf_counters();
857} 859}
858 860
859void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 861void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 000000000000..1116a41bc7b5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1213 @@
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2009 Jaswinder Singh Rajput
7 *
8 * For licencing details see kernel-base/COPYING
9 */
10
11#include <linux/perf_counter.h>
12#include <linux/capability.h>
13#include <linux/notifier.h>
14#include <linux/hardirq.h>
15#include <linux/kprobes.h>
16#include <linux/module.h>
17#include <linux/kdebug.h>
18#include <linux/sched.h>
19#include <linux/uaccess.h>
20
21#include <asm/apic.h>
22#include <asm/stacktrace.h>
23#include <asm/nmi.h>
24
25static bool perf_counters_initialized __read_mostly;
26
27/*
28 * Number of (generic) HW counters:
29 */
30static int nr_counters_generic __read_mostly;
31static u64 perf_counter_mask __read_mostly;
32static u64 counter_value_mask __read_mostly;
33static int counter_value_bits __read_mostly;
34
35static int nr_counters_fixed __read_mostly;
36
37struct cpu_hw_counters {
38 struct perf_counter *counters[X86_PMC_IDX_MAX];
39 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
40 unsigned long interrupts;
41 u64 throttle_ctrl;
42 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
43 int enabled;
44};
45
46/*
47 * struct pmc_x86_ops - performance counter x86 ops
48 */
49struct pmc_x86_ops {
50 u64 (*save_disable_all)(void);
51 void (*restore_all)(u64);
52 u64 (*get_status)(u64);
53 void (*ack_status)(u64);
54 void (*enable)(int, u64);
55 void (*disable)(int, u64);
56 unsigned eventsel;
57 unsigned perfctr;
58 u64 (*event_map)(int);
59 u64 (*raw_event)(u64);
60 int max_events;
61};
62
63static struct pmc_x86_ops *pmc_ops __read_mostly;
64
65static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
66 .enabled = 1,
67};
68
69static __read_mostly int intel_perfmon_version;
70
71/*
72 * Intel PerfMon v3. Used on Core2 and later.
73 */
74static const u64 intel_perfmon_event_map[] =
75{
76 [PERF_COUNT_CPU_CYCLES] = 0x003c,
77 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
78 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
79 [PERF_COUNT_CACHE_MISSES] = 0x412e,
80 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
81 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
82 [PERF_COUNT_BUS_CYCLES] = 0x013c,
83};
84
85static u64 pmc_intel_event_map(int event)
86{
87 return intel_perfmon_event_map[event];
88}
89
90static u64 pmc_intel_raw_event(u64 event)
91{
92#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
93#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
94#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
95
96#define CORE_EVNTSEL_MASK \
97 (CORE_EVNTSEL_EVENT_MASK | \
98 CORE_EVNTSEL_UNIT_MASK | \
99 CORE_EVNTSEL_COUNTER_MASK)
100
101 return event & CORE_EVNTSEL_MASK;
102}
103
104/*
105 * AMD Performance Monitor K7 and later.
106 */
107static const u64 amd_perfmon_event_map[] =
108{
109 [PERF_COUNT_CPU_CYCLES] = 0x0076,
110 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
111 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
112 [PERF_COUNT_CACHE_MISSES] = 0x0081,
113 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
114 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
115};
116
117static u64 pmc_amd_event_map(int event)
118{
119 return amd_perfmon_event_map[event];
120}
121
122static u64 pmc_amd_raw_event(u64 event)
123{
124#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
125#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
126#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
127
128#define K7_EVNTSEL_MASK \
129 (K7_EVNTSEL_EVENT_MASK | \
130 K7_EVNTSEL_UNIT_MASK | \
131 K7_EVNTSEL_COUNTER_MASK)
132
133 return event & K7_EVNTSEL_MASK;
134}
135
136/*
137 * Propagate counter elapsed time into the generic counter.
138 * Can only be executed on the CPU where the counter is active.
139 * Returns the delta events processed.
140 */
141static void
142x86_perf_counter_update(struct perf_counter *counter,
143 struct hw_perf_counter *hwc, int idx)
144{
145 u64 prev_raw_count, new_raw_count, delta;
146
147 /*
148 * Careful: an NMI might modify the previous counter value.
149 *
150 * Our tactic to handle this is to first atomically read and
151 * exchange a new raw count - then add that new-prev delta
152 * count to the generic counter atomically:
153 */
154again:
155 prev_raw_count = atomic64_read(&hwc->prev_count);
156 rdmsrl(hwc->counter_base + idx, new_raw_count);
157
158 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
159 new_raw_count) != prev_raw_count)
160 goto again;
161
162 /*
163 * Now we have the new raw value and have updated the prev
164 * timestamp already. We can now calculate the elapsed delta
165 * (counter-)time and add that to the generic counter.
166 *
167 * Careful, not all hw sign-extends above the physical width
168 * of the count, so we do that by clipping the delta to 32 bits:
169 */
170 delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
171
172 atomic64_add(delta, &counter->count);
173 atomic64_sub(delta, &hwc->period_left);
174}
175
176static atomic_t num_counters;
177static DEFINE_MUTEX(pmc_reserve_mutex);
178
179static bool reserve_pmc_hardware(void)
180{
181 int i;
182
183 if (nmi_watchdog == NMI_LOCAL_APIC)
184 disable_lapic_nmi_watchdog();
185
186 for (i = 0; i < nr_counters_generic; i++) {
187 if (!reserve_perfctr_nmi(pmc_ops->perfctr + i))
188 goto perfctr_fail;
189 }
190
191 for (i = 0; i < nr_counters_generic; i++) {
192 if (!reserve_evntsel_nmi(pmc_ops->eventsel + i))
193 goto eventsel_fail;
194 }
195
196 return true;
197
198eventsel_fail:
199 for (i--; i >= 0; i--)
200 release_evntsel_nmi(pmc_ops->eventsel + i);
201
202 i = nr_counters_generic;
203
204perfctr_fail:
205 for (i--; i >= 0; i--)
206 release_perfctr_nmi(pmc_ops->perfctr + i);
207
208 if (nmi_watchdog == NMI_LOCAL_APIC)
209 enable_lapic_nmi_watchdog();
210
211 return false;
212}
213
214static void release_pmc_hardware(void)
215{
216 int i;
217
218 for (i = 0; i < nr_counters_generic; i++) {
219 release_perfctr_nmi(pmc_ops->perfctr + i);
220 release_evntsel_nmi(pmc_ops->eventsel + i);
221 }
222
223 if (nmi_watchdog == NMI_LOCAL_APIC)
224 enable_lapic_nmi_watchdog();
225}
226
227static void hw_perf_counter_destroy(struct perf_counter *counter)
228{
229 if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) {
230 release_pmc_hardware();
231 mutex_unlock(&pmc_reserve_mutex);
232 }
233}
234
235/*
236 * Setup the hardware configuration for a given hw_event_type
237 */
238static int __hw_perf_counter_init(struct perf_counter *counter)
239{
240 struct perf_counter_hw_event *hw_event = &counter->hw_event;
241 struct hw_perf_counter *hwc = &counter->hw;
242 int err;
243
244 if (unlikely(!perf_counters_initialized))
245 return -EINVAL;
246
247 err = 0;
248 if (atomic_inc_not_zero(&num_counters)) {
249 mutex_lock(&pmc_reserve_mutex);
250 if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware())
251 err = -EBUSY;
252 else
253 atomic_inc(&num_counters);
254 mutex_unlock(&pmc_reserve_mutex);
255 }
256 if (err)
257 return err;
258
259 /*
260 * Generate PMC IRQs:
261 * (keep 'enabled' bit clear for now)
262 */
263 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
264
265 /*
266 * Count user and OS events unless requested not to.
267 */
268 if (!hw_event->exclude_user)
269 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
270 if (!hw_event->exclude_kernel)
271 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
272
273 /*
274 * If privileged enough, allow NMI events:
275 */
276 hwc->nmi = 0;
277 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
278 hwc->nmi = 1;
279
280 hwc->irq_period = hw_event->irq_period;
281 /*
282 * Intel PMCs cannot be accessed sanely above 32 bit width,
283 * so we install an artificial 1<<31 period regardless of
284 * the generic counter period:
285 */
286 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
287 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
288 hwc->irq_period = 0x7FFFFFFF;
289
290 atomic64_set(&hwc->period_left, hwc->irq_period);
291
292 /*
293 * Raw event type provide the config in the event structure
294 */
295 if (perf_event_raw(hw_event)) {
296 hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
297 } else {
298 if (perf_event_id(hw_event) >= pmc_ops->max_events)
299 return -EINVAL;
300 /*
301 * The generic map:
302 */
303 hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
304 }
305
306 counter->destroy = hw_perf_counter_destroy;
307
308 return 0;
309}
310
311static u64 pmc_intel_save_disable_all(void)
312{
313 u64 ctrl;
314
315 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
316 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
317
318 return ctrl;
319}
320
321static u64 pmc_amd_save_disable_all(void)
322{
323 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
324 int enabled, idx;
325
326 enabled = cpuc->enabled;
327 cpuc->enabled = 0;
328 /*
329 * ensure we write the disable before we start disabling the
330 * counters proper, so that pcm_amd_enable() does the right thing.
331 */
332 barrier();
333
334 for (idx = 0; idx < nr_counters_generic; idx++) {
335 u64 val;
336
337 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
338 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
339 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
340 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
341 }
342 }
343
344 return enabled;
345}
346
347u64 hw_perf_save_disable(void)
348{
349 if (unlikely(!perf_counters_initialized))
350 return 0;
351
352 return pmc_ops->save_disable_all();
353}
354/*
355 * Exported because of ACPI idle
356 */
357EXPORT_SYMBOL_GPL(hw_perf_save_disable);
358
359static void pmc_intel_restore_all(u64 ctrl)
360{
361 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
362}
363
364static void pmc_amd_restore_all(u64 ctrl)
365{
366 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
367 int idx;
368
369 cpuc->enabled = ctrl;
370 barrier();
371 if (!ctrl)
372 return;
373
374 for (idx = 0; idx < nr_counters_generic; idx++) {
375 if (test_bit(idx, cpuc->active_mask)) {
376 u64 val;
377
378 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
379 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
380 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
381 }
382 }
383}
384
385void hw_perf_restore(u64 ctrl)
386{
387 if (unlikely(!perf_counters_initialized))
388 return;
389
390 pmc_ops->restore_all(ctrl);
391}
392/*
393 * Exported because of ACPI idle
394 */
395EXPORT_SYMBOL_GPL(hw_perf_restore);
396
397static u64 pmc_intel_get_status(u64 mask)
398{
399 u64 status;
400
401 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
402
403 return status;
404}
405
406static u64 pmc_amd_get_status(u64 mask)
407{
408 u64 status = 0;
409 int idx;
410
411 for (idx = 0; idx < nr_counters_generic; idx++) {
412 s64 val;
413
414 if (!(mask & (1 << idx)))
415 continue;
416
417 rdmsrl(MSR_K7_PERFCTR0 + idx, val);
418 val <<= (64 - counter_value_bits);
419 if (val >= 0)
420 status |= (1 << idx);
421 }
422
423 return status;
424}
425
426static u64 hw_perf_get_status(u64 mask)
427{
428 if (unlikely(!perf_counters_initialized))
429 return 0;
430
431 return pmc_ops->get_status(mask);
432}
433
434static void pmc_intel_ack_status(u64 ack)
435{
436 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
437}
438
439static void pmc_amd_ack_status(u64 ack)
440{
441}
442
443static void hw_perf_ack_status(u64 ack)
444{
445 if (unlikely(!perf_counters_initialized))
446 return;
447
448 pmc_ops->ack_status(ack);
449}
450
451static void pmc_intel_enable(int idx, u64 config)
452{
453 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
454 config | ARCH_PERFMON_EVENTSEL0_ENABLE);
455}
456
457static void pmc_amd_enable(int idx, u64 config)
458{
459 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
460
461 set_bit(idx, cpuc->active_mask);
462 if (cpuc->enabled)
463 config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
464
465 wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
466}
467
468static void hw_perf_enable(int idx, u64 config)
469{
470 if (unlikely(!perf_counters_initialized))
471 return;
472
473 pmc_ops->enable(idx, config);
474}
475
476static void pmc_intel_disable(int idx, u64 config)
477{
478 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
479}
480
481static void pmc_amd_disable(int idx, u64 config)
482{
483 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
484
485 clear_bit(idx, cpuc->active_mask);
486 wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
487
488}
489
490static void hw_perf_disable(int idx, u64 config)
491{
492 if (unlikely(!perf_counters_initialized))
493 return;
494
495 pmc_ops->disable(idx, config);
496}
497
498static inline void
499__pmc_fixed_disable(struct perf_counter *counter,
500 struct hw_perf_counter *hwc, unsigned int __idx)
501{
502 int idx = __idx - X86_PMC_IDX_FIXED;
503 u64 ctrl_val, mask;
504 int err;
505
506 mask = 0xfULL << (idx * 4);
507
508 rdmsrl(hwc->config_base, ctrl_val);
509 ctrl_val &= ~mask;
510 err = checking_wrmsrl(hwc->config_base, ctrl_val);
511}
512
513static inline void
514__pmc_generic_disable(struct perf_counter *counter,
515 struct hw_perf_counter *hwc, unsigned int idx)
516{
517 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
518 __pmc_fixed_disable(counter, hwc, idx);
519 else
520 hw_perf_disable(idx, hwc->config);
521}
522
523static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
524
525/*
526 * Set the next IRQ period, based on the hwc->period_left value.
527 * To be called with the counter disabled in hw:
528 */
529static void
530__hw_perf_counter_set_period(struct perf_counter *counter,
531 struct hw_perf_counter *hwc, int idx)
532{
533 s64 left = atomic64_read(&hwc->period_left);
534 s64 period = hwc->irq_period;
535 int err;
536
537 /*
538 * If we are way outside a reasoable range then just skip forward:
539 */
540 if (unlikely(left <= -period)) {
541 left = period;
542 atomic64_set(&hwc->period_left, left);
543 }
544
545 if (unlikely(left <= 0)) {
546 left += period;
547 atomic64_set(&hwc->period_left, left);
548 }
549
550 per_cpu(prev_left[idx], smp_processor_id()) = left;
551
552 /*
553 * The hw counter starts counting from this counter offset,
554 * mark it to be able to extra future deltas:
555 */
556 atomic64_set(&hwc->prev_count, (u64)-left);
557
558 err = checking_wrmsrl(hwc->counter_base + idx,
559 (u64)(-left) & counter_value_mask);
560}
561
562static inline void
563__pmc_fixed_enable(struct perf_counter *counter,
564 struct hw_perf_counter *hwc, unsigned int __idx)
565{
566 int idx = __idx - X86_PMC_IDX_FIXED;
567 u64 ctrl_val, bits, mask;
568 int err;
569
570 /*
571 * Enable IRQ generation (0x8),
572 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
573 * if requested:
574 */
575 bits = 0x8ULL;
576 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
577 bits |= 0x2;
578 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
579 bits |= 0x1;
580 bits <<= (idx * 4);
581 mask = 0xfULL << (idx * 4);
582
583 rdmsrl(hwc->config_base, ctrl_val);
584 ctrl_val &= ~mask;
585 ctrl_val |= bits;
586 err = checking_wrmsrl(hwc->config_base, ctrl_val);
587}
588
589static void
590__pmc_generic_enable(struct perf_counter *counter,
591 struct hw_perf_counter *hwc, int idx)
592{
593 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
594 __pmc_fixed_enable(counter, hwc, idx);
595 else
596 hw_perf_enable(idx, hwc->config);
597}
598
599static int
600fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
601{
602 unsigned int event;
603
604 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
605 return -1;
606
607 if (unlikely(hwc->nmi))
608 return -1;
609
610 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
611
612 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
613 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
614 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
615 return X86_PMC_IDX_FIXED_CPU_CYCLES;
616 if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
617 return X86_PMC_IDX_FIXED_BUS_CYCLES;
618
619 return -1;
620}
621
622/*
623 * Find a PMC slot for the freshly enabled / scheduled in counter:
624 */
625static int pmc_generic_enable(struct perf_counter *counter)
626{
627 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
628 struct hw_perf_counter *hwc = &counter->hw;
629 int idx;
630
631 idx = fixed_mode_idx(counter, hwc);
632 if (idx >= 0) {
633 /*
634 * Try to get the fixed counter, if that is already taken
635 * then try to get a generic counter:
636 */
637 if (test_and_set_bit(idx, cpuc->used))
638 goto try_generic;
639
640 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
641 /*
642 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
643 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
644 */
645 hwc->counter_base =
646 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
647 hwc->idx = idx;
648 } else {
649 idx = hwc->idx;
650 /* Try to get the previous generic counter again */
651 if (test_and_set_bit(idx, cpuc->used)) {
652try_generic:
653 idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
654 if (idx == nr_counters_generic)
655 return -EAGAIN;
656
657 set_bit(idx, cpuc->used);
658 hwc->idx = idx;
659 }
660 hwc->config_base = pmc_ops->eventsel;
661 hwc->counter_base = pmc_ops->perfctr;
662 }
663
664 perf_counters_lapic_init(hwc->nmi);
665
666 __pmc_generic_disable(counter, hwc, idx);
667
668 cpuc->counters[idx] = counter;
669 /*
670 * Make it visible before enabling the hw:
671 */
672 smp_wmb();
673
674 __hw_perf_counter_set_period(counter, hwc, idx);
675 __pmc_generic_enable(counter, hwc, idx);
676
677 return 0;
678}
679
680void perf_counter_print_debug(void)
681{
682 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
683 struct cpu_hw_counters *cpuc;
684 int cpu, idx;
685
686 if (!nr_counters_generic)
687 return;
688
689 local_irq_disable();
690
691 cpu = smp_processor_id();
692 cpuc = &per_cpu(cpu_hw_counters, cpu);
693
694 if (intel_perfmon_version >= 2) {
695 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
696 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
697 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
698 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
699
700 pr_info("\n");
701 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
702 pr_info("CPU#%d: status: %016llx\n", cpu, status);
703 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
704 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
705 }
706 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
707
708 for (idx = 0; idx < nr_counters_generic; idx++) {
709 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
710 rdmsrl(pmc_ops->perfctr + idx, pmc_count);
711
712 prev_left = per_cpu(prev_left[idx], cpu);
713
714 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
715 cpu, idx, pmc_ctrl);
716 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
717 cpu, idx, pmc_count);
718 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
719 cpu, idx, prev_left);
720 }
721 for (idx = 0; idx < nr_counters_fixed; idx++) {
722 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
723
724 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
725 cpu, idx, pmc_count);
726 }
727 local_irq_enable();
728}
729
730static void pmc_generic_disable(struct perf_counter *counter)
731{
732 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
733 struct hw_perf_counter *hwc = &counter->hw;
734 unsigned int idx = hwc->idx;
735
736 __pmc_generic_disable(counter, hwc, idx);
737
738 clear_bit(idx, cpuc->used);
739 cpuc->counters[idx] = NULL;
740 /*
741 * Make sure the cleared pointer becomes visible before we
742 * (potentially) free the counter:
743 */
744 smp_wmb();
745
746 /*
747 * Drain the remaining delta count out of a counter
748 * that we are disabling:
749 */
750 x86_perf_counter_update(counter, hwc, idx);
751}
752
753/*
754 * Save and restart an expired counter. Called by NMI contexts,
755 * so it has to be careful about preempting normal counter ops:
756 */
757static void perf_save_and_restart(struct perf_counter *counter)
758{
759 struct hw_perf_counter *hwc = &counter->hw;
760 int idx = hwc->idx;
761
762 x86_perf_counter_update(counter, hwc, idx);
763 __hw_perf_counter_set_period(counter, hwc, idx);
764
765 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
766 __pmc_generic_enable(counter, hwc, idx);
767}
768
769/*
770 * Maximum interrupt frequency of 100KHz per CPU
771 */
772#define PERFMON_MAX_INTERRUPTS (100000/HZ)
773
774/*
775 * This handler is triggered by the local APIC, so the APIC IRQ handling
776 * rules apply:
777 */
778static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
779{
780 int bit, cpu = smp_processor_id();
781 u64 ack, status;
782 struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
783 int ret = 0;
784
785 cpuc->throttle_ctrl = hw_perf_save_disable();
786
787 status = hw_perf_get_status(cpuc->throttle_ctrl);
788 if (!status)
789 goto out;
790
791 ret = 1;
792again:
793 inc_irq_stat(apic_perf_irqs);
794 ack = status;
795 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
796 struct perf_counter *counter = cpuc->counters[bit];
797
798 clear_bit(bit, (unsigned long *) &status);
799 if (!counter)
800 continue;
801
802 perf_save_and_restart(counter);
803 if (perf_counter_overflow(counter, nmi, regs))
804 __pmc_generic_disable(counter, &counter->hw, bit);
805 }
806
807 hw_perf_ack_status(ack);
808
809 /*
810 * Repeat if there is more work to be done:
811 */
812 status = hw_perf_get_status(cpuc->throttle_ctrl);
813 if (status)
814 goto again;
815out:
816 /*
817 * Restore - do not reenable when global enable is off or throttled:
818 */
819 if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
820 hw_perf_restore(cpuc->throttle_ctrl);
821
822 return ret;
823}
824
825void perf_counter_unthrottle(void)
826{
827 struct cpu_hw_counters *cpuc;
828
829 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
830 return;
831
832 if (unlikely(!perf_counters_initialized))
833 return;
834
835 cpuc = &__get_cpu_var(cpu_hw_counters);
836 if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
837 if (printk_ratelimit())
838 printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
839 hw_perf_restore(cpuc->throttle_ctrl);
840 }
841 cpuc->interrupts = 0;
842}
843
844void smp_perf_counter_interrupt(struct pt_regs *regs)
845{
846 irq_enter();
847 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
848 ack_APIC_irq();
849 __smp_perf_counter_interrupt(regs, 0);
850 irq_exit();
851}
852
853void smp_perf_pending_interrupt(struct pt_regs *regs)
854{
855 irq_enter();
856 ack_APIC_irq();
857 inc_irq_stat(apic_pending_irqs);
858 perf_counter_do_pending();
859 irq_exit();
860}
861
862void set_perf_counter_pending(void)
863{
864 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
865}
866
867void perf_counters_lapic_init(int nmi)
868{
869 u32 apic_val;
870
871 if (!perf_counters_initialized)
872 return;
873 /*
874 * Enable the performance counter vector in the APIC LVT:
875 */
876 apic_val = apic_read(APIC_LVTERR);
877
878 apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
879 if (nmi)
880 apic_write(APIC_LVTPC, APIC_DM_NMI);
881 else
882 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
883 apic_write(APIC_LVTERR, apic_val);
884}
885
886static int __kprobes
887perf_counter_nmi_handler(struct notifier_block *self,
888 unsigned long cmd, void *__args)
889{
890 struct die_args *args = __args;
891 struct pt_regs *regs;
892 int ret;
893
894 switch (cmd) {
895 case DIE_NMI:
896 case DIE_NMI_IPI:
897 break;
898
899 default:
900 return NOTIFY_DONE;
901 }
902
903 regs = args->regs;
904
905 apic_write(APIC_LVTPC, APIC_DM_NMI);
906 ret = __smp_perf_counter_interrupt(regs, 1);
907
908 return ret ? NOTIFY_STOP : NOTIFY_OK;
909}
910
911static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
912 .notifier_call = perf_counter_nmi_handler,
913 .next = NULL,
914 .priority = 1
915};
916
917static struct pmc_x86_ops pmc_intel_ops = {
918 .save_disable_all = pmc_intel_save_disable_all,
919 .restore_all = pmc_intel_restore_all,
920 .get_status = pmc_intel_get_status,
921 .ack_status = pmc_intel_ack_status,
922 .enable = pmc_intel_enable,
923 .disable = pmc_intel_disable,
924 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
925 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
926 .event_map = pmc_intel_event_map,
927 .raw_event = pmc_intel_raw_event,
928 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
929};
930
931static struct pmc_x86_ops pmc_amd_ops = {
932 .save_disable_all = pmc_amd_save_disable_all,
933 .restore_all = pmc_amd_restore_all,
934 .get_status = pmc_amd_get_status,
935 .ack_status = pmc_amd_ack_status,
936 .enable = pmc_amd_enable,
937 .disable = pmc_amd_disable,
938 .eventsel = MSR_K7_EVNTSEL0,
939 .perfctr = MSR_K7_PERFCTR0,
940 .event_map = pmc_amd_event_map,
941 .raw_event = pmc_amd_raw_event,
942 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
943};
944
945static struct pmc_x86_ops *pmc_intel_init(void)
946{
947 union cpuid10_edx edx;
948 union cpuid10_eax eax;
949 unsigned int unused;
950 unsigned int ebx;
951
952 /*
953 * Check whether the Architectural PerfMon supports
954 * Branch Misses Retired Event or not.
955 */
956 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
957 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
958 return NULL;
959
960 intel_perfmon_version = eax.split.version_id;
961 if (intel_perfmon_version < 2)
962 return NULL;
963
964 pr_info("Intel Performance Monitoring support detected.\n");
965 pr_info("... version: %d\n", intel_perfmon_version);
966 pr_info("... bit width: %d\n", eax.split.bit_width);
967 pr_info("... mask length: %d\n", eax.split.mask_length);
968
969 nr_counters_generic = eax.split.num_counters;
970 nr_counters_fixed = edx.split.num_counters_fixed;
971 counter_value_mask = (1ULL << eax.split.bit_width) - 1;
972
973 return &pmc_intel_ops;
974}
975
976static struct pmc_x86_ops *pmc_amd_init(void)
977{
978 nr_counters_generic = 4;
979 nr_counters_fixed = 0;
980 counter_value_mask = 0x0000FFFFFFFFFFFFULL;
981 counter_value_bits = 48;
982
983 pr_info("AMD Performance Monitoring support detected.\n");
984
985 return &pmc_amd_ops;
986}
987
988void __init init_hw_perf_counters(void)
989{
990 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
991 return;
992
993 switch (boot_cpu_data.x86_vendor) {
994 case X86_VENDOR_INTEL:
995 pmc_ops = pmc_intel_init();
996 break;
997 case X86_VENDOR_AMD:
998 pmc_ops = pmc_amd_init();
999 break;
1000 }
1001 if (!pmc_ops)
1002 return;
1003
1004 pr_info("... num counters: %d\n", nr_counters_generic);
1005 if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
1006 nr_counters_generic = X86_PMC_MAX_GENERIC;
1007 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1008 nr_counters_generic, X86_PMC_MAX_GENERIC);
1009 }
1010 perf_counter_mask = (1 << nr_counters_generic) - 1;
1011 perf_max_counters = nr_counters_generic;
1012
1013 pr_info("... value mask: %016Lx\n", counter_value_mask);
1014
1015 if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
1016 nr_counters_fixed = X86_PMC_MAX_FIXED;
1017 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1018 nr_counters_fixed, X86_PMC_MAX_FIXED);
1019 }
1020 pr_info("... fixed counters: %d\n", nr_counters_fixed);
1021
1022 perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1023
1024 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1025 perf_counters_initialized = true;
1026
1027 perf_counters_lapic_init(0);
1028 register_die_notifier(&perf_counter_nmi_notifier);
1029}
1030
1031static void pmc_generic_read(struct perf_counter *counter)
1032{
1033 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1034}
1035
1036static const struct hw_perf_counter_ops x86_perf_counter_ops = {
1037 .enable = pmc_generic_enable,
1038 .disable = pmc_generic_disable,
1039 .read = pmc_generic_read,
1040};
1041
1042const struct hw_perf_counter_ops *
1043hw_perf_counter_init(struct perf_counter *counter)
1044{
1045 int err;
1046
1047 err = __hw_perf_counter_init(counter);
1048 if (err)
1049 return ERR_PTR(err);
1050
1051 return &x86_perf_counter_ops;
1052}
1053
1054/*
1055 * callchain support
1056 */
1057
1058static inline
1059void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1060{
1061 if (entry->nr < MAX_STACK_DEPTH)
1062 entry->ip[entry->nr++] = ip;
1063}
1064
1065static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1066static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1067
1068
1069static void
1070backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1071{
1072 /* Ignore warnings */
1073}
1074
1075static void backtrace_warning(void *data, char *msg)
1076{
1077 /* Ignore warnings */
1078}
1079
1080static int backtrace_stack(void *data, char *name)
1081{
1082 /* Don't bother with IRQ stacks for now */
1083 return -1;
1084}
1085
1086static void backtrace_address(void *data, unsigned long addr, int reliable)
1087{
1088 struct perf_callchain_entry *entry = data;
1089
1090 if (reliable)
1091 callchain_store(entry, addr);
1092}
1093
1094static const struct stacktrace_ops backtrace_ops = {
1095 .warning = backtrace_warning,
1096 .warning_symbol = backtrace_warning_symbol,
1097 .stack = backtrace_stack,
1098 .address = backtrace_address,
1099};
1100
1101static void
1102perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1103{
1104 unsigned long bp;
1105 char *stack;
1106 int nr = entry->nr;
1107
1108 callchain_store(entry, instruction_pointer(regs));
1109
1110 stack = ((char *)regs + sizeof(struct pt_regs));
1111#ifdef CONFIG_FRAME_POINTER
1112 bp = frame_pointer(regs);
1113#else
1114 bp = 0;
1115#endif
1116
1117 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
1118
1119 entry->kernel = entry->nr - nr;
1120}
1121
1122
1123struct stack_frame {
1124 const void __user *next_fp;
1125 unsigned long return_address;
1126};
1127
1128static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1129{
1130 int ret;
1131
1132 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1133 return 0;
1134
1135 ret = 1;
1136 pagefault_disable();
1137 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1138 ret = 0;
1139 pagefault_enable();
1140
1141 return ret;
1142}
1143
1144static void
1145perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1146{
1147 struct stack_frame frame;
1148 const void __user *fp;
1149 int nr = entry->nr;
1150
1151 regs = (struct pt_regs *)current->thread.sp0 - 1;
1152 fp = (void __user *)regs->bp;
1153
1154 callchain_store(entry, regs->ip);
1155
1156 while (entry->nr < MAX_STACK_DEPTH) {
1157 frame.next_fp = NULL;
1158 frame.return_address = 0;
1159
1160 if (!copy_stack_frame(fp, &frame))
1161 break;
1162
1163 if ((unsigned long)fp < user_stack_pointer(regs))
1164 break;
1165
1166 callchain_store(entry, frame.return_address);
1167 fp = frame.next_fp;
1168 }
1169
1170 entry->user = entry->nr - nr;
1171}
1172
1173static void
1174perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1175{
1176 int is_user;
1177
1178 if (!regs)
1179 return;
1180
1181 is_user = user_mode(regs);
1182
1183 if (!current || current->pid == 0)
1184 return;
1185
1186 if (is_user && current->state != TASK_RUNNING)
1187 return;
1188
1189 if (!is_user)
1190 perf_callchain_kernel(regs, entry);
1191
1192 if (current->mm)
1193 perf_callchain_user(regs, entry);
1194}
1195
1196struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1197{
1198 struct perf_callchain_entry *entry;
1199
1200 if (in_nmi())
1201 entry = &__get_cpu_var(nmi_entry);
1202 else
1203 entry = &__get_cpu_var(irq_entry);
1204
1205 entry->nr = 0;
1206 entry->hv = 0;
1207 entry->kernel = 0;
1208 entry->user = 0;
1209
1210 perf_do_callchain(regs, entry);
1211
1212 return entry;
1213}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f6c70a164e32..d6f5b9fbde32 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,8 +19,8 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/genapic.h> 22#include <asm/apic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/perf_counter.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr; 26 unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a331ec38af9e..1d46cba56fd8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1025,6 +1025,13 @@ apicinterrupt ERROR_APIC_VECTOR \
1025apicinterrupt SPURIOUS_APIC_VECTOR \ 1025apicinterrupt SPURIOUS_APIC_VECTOR \
1026 spurious_interrupt smp_spurious_interrupt 1026 spurious_interrupt smp_spurious_interrupt
1027 1027
1028#ifdef CONFIG_PERF_COUNTERS
1029apicinterrupt LOCAL_PERF_VECTOR \
1030 perf_counter_interrupt smp_perf_counter_interrupt
1031apicinterrupt LOCAL_PENDING_VECTOR \
1032 perf_pending_interrupt smp_perf_pending_interrupt
1033#endif
1034
1028/* 1035/*
1029 * Exception entry points. 1036 * Exception entry points.
1030 */ 1037 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3aaf7b9e3a8b..d465487da587 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "CNT: ");
67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n");
70 seq_printf(p, "PND: ");
71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
73 seq_printf(p, " Performance pending work\n");
66#endif 74#endif
67 if (generic_interrupt_extension) { 75 if (generic_interrupt_extension) {
68 seq_printf(p, "PLT: "); 76 seq_printf(p, "PLT: ");
@@ -166,6 +174,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
166#ifdef CONFIG_X86_LOCAL_APIC 174#ifdef CONFIG_X86_LOCAL_APIC
167 sum += irq_stats(cpu)->apic_timer_irqs; 175 sum += irq_stats(cpu)->apic_timer_irqs;
168 sum += irq_stats(cpu)->irq_spurious_count; 176 sum += irq_stats(cpu)->irq_spurious_count;
177 sum += irq_stats(cpu)->apic_perf_irqs;
178 sum += irq_stats(cpu)->apic_pending_irqs;
169#endif 179#endif
170 if (generic_interrupt_extension) 180 if (generic_interrupt_extension)
171 sum += irq_stats(cpu)->generic_irqs; 181 sum += irq_stats(cpu)->generic_irqs;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 368b0a8836f9..3190a6b961e6 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -118,28 +118,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
118 return 0; 118 return 0;
119} 119}
120 120
121/* Overridden in paravirt.c */ 121static void __init smp_intr_init(void)
122void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
123
124void __init native_init_IRQ(void)
125{ 122{
126 int i;
127
128 /* Execute any quirks before the call gates are initialised: */
129 x86_quirk_pre_intr_init();
130
131 /*
132 * Cover the whole vector space, no vector can escape
133 * us. (some of these will be overridden and become
134 * 'special' SMP interrupts)
135 */
136 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
137 /* SYSCALL_VECTOR was reserved in trap_init. */
138 if (i != SYSCALL_VECTOR)
139 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
140 }
141
142
143#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) 123#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
144 /* 124 /*
145 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 125 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -168,6 +148,11 @@ void __init native_init_IRQ(void)
168 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 148 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
169 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 149 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
170#endif 150#endif
151}
152
153static void __init apic_intr_init(void)
154{
155 smp_intr_init();
171 156
172#ifdef CONFIG_X86_LOCAL_APIC 157#ifdef CONFIG_X86_LOCAL_APIC
173 /* self generated IPI for local APIC timer */ 158 /* self generated IPI for local APIC timer */
@@ -179,12 +164,41 @@ void __init native_init_IRQ(void)
179 /* IPI vectors for APIC spurious and error interrupts */ 164 /* IPI vectors for APIC spurious and error interrupts */
180 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 165 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
181 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 166 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
182#endif 167# ifdef CONFIG_PERF_COUNTERS
168 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
169 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
170# endif
183 171
184#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) 172# ifdef CONFIG_X86_MCE_P4THERMAL
185 /* thermal monitor LVT interrupt */ 173 /* thermal monitor LVT interrupt */
186 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 174 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
175# endif
187#endif 176#endif
177}
178
179/* Overridden in paravirt.c */
180void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
181
182void __init native_init_IRQ(void)
183{
184 int i;
185
186 /* Execute any quirks before the call gates are initialised: */
187 x86_quirk_pre_intr_init();
188
189 apic_intr_init();
190
191 /*
192 * Cover the whole vector space, no vector can escape
193 * us. (some of these will be overridden and become
194 * 'special' SMP interrupts)
195 */
196 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
197 int vector = FIRST_EXTERNAL_VECTOR + i;
198 /* SYSCALL_VECTOR was reserved in trap_init. */
199 if (!test_bit(vector, used_vectors))
200 set_intr_gate(vector, interrupt[i]);
201 }
188 202
189 if (!acpi_ioapic) 203 if (!acpi_ioapic)
190 setup_irq(2, &irq2); 204 setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8cd10537fd46..53ceb26f80ff 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -152,6 +152,12 @@ static void __init apic_intr_init(void)
152 /* IPI vectors for APIC spurious and error interrupts */ 152 /* IPI vectors for APIC spurious and error interrupts */
153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
155
156 /* Performance monitoring interrupt: */
157#ifdef CONFIG_PERF_COUNTERS
158 alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
159 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
160#endif
155} 161}
156 162
157void __init native_init_IRQ(void) 163void __init native_init_IRQ(void)
@@ -159,6 +165,9 @@ void __init native_init_IRQ(void)
159 int i; 165 int i;
160 166
161 init_ISA_irqs(); 167 init_ISA_irqs();
168
169 apic_intr_init();
170
162 /* 171 /*
163 * Cover the whole vector space, no vector can escape 172 * Cover the whole vector space, no vector can escape
164 * us. (some of these will be overridden and become 173 * us. (some of these will be overridden and become
@@ -166,12 +175,10 @@ void __init native_init_IRQ(void)
166 */ 175 */
167 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 176 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
168 int vector = FIRST_EXTERNAL_VECTOR + i; 177 int vector = FIRST_EXTERNAL_VECTOR + i;
169 if (vector != IA32_SYSCALL_VECTOR) 178 if (!test_bit(vector, used_vectors))
170 set_intr_gate(vector, interrupt[i]); 179 set_intr_gate(vector, interrupt[i]);
171 } 180 }
172 181
173 apic_intr_init();
174
175 if (!acpi_ioapic) 182 if (!acpi_ioapic)
176 setup_irq(2, &irq2); 183 setup_irq(2, &irq2);
177} 184}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 14425166b8e3..0a813b17b172 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen 7 * 2000-2002 x86-64 support by Andi Kleen
8 */ 8 */
9
10#include <linux/sched.h> 9#include <linux/sched.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c8736b491..c3ebbb901379 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,5 +332,6 @@ ENTRY(sys_call_table)
332 .long sys_dup3 /* 330 */ 332 .long sys_dup3 /* 330 */
333 .long sys_pipe2 333 .long sys_pipe2
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_perf_counter_open
335 .long sys_preadv 336 .long sys_preadv
336 .long sys_pwritev 337 .long sys_pwritev
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d288327ff0..2cc162e09c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -945,8 +945,13 @@ void __init trap_init(void)
945#endif 945#endif
946 set_intr_gate(19, &simd_coprocessor_error); 946 set_intr_gate(19, &simd_coprocessor_error);
947 947
948 /* Reserve all the builtin and the syscall vector: */
949 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
950 set_bit(i, used_vectors);
951
948#ifdef CONFIG_IA32_EMULATION 952#ifdef CONFIG_IA32_EMULATION
949 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 953 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
954 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
950#endif 955#endif
951 956
952#ifdef CONFIG_X86_32 957#ifdef CONFIG_X86_32
@@ -963,17 +968,9 @@ void __init trap_init(void)
963 } 968 }
964 969
965 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 970 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
966#endif
967
968 /* Reserve all the builtin and the syscall vector: */
969 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
970 set_bit(i, used_vectors);
971
972#ifdef CONFIG_X86_64
973 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
974#else
975 set_bit(SYSCALL_VECTOR, used_vectors); 971 set_bit(SYSCALL_VECTOR, used_vectors);
976#endif 972#endif
973
977 /* 974 /*
978 * Should be a barrier for any external CPU state: 975 * Should be a barrier for any external CPU state:
979 */ 976 */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..f2d3324d9215 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -27,6 +27,7 @@
27#include <linux/tty.h> 27#include <linux/tty.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/perf_counter.h>
30 31
31#include <asm-generic/sections.h> 32#include <asm-generic/sections.h>
32 33
@@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1044 if (unlikely(error_code & PF_RSVD)) 1045 if (unlikely(error_code & PF_RSVD))
1045 pgtable_bad(regs, error_code, address); 1046 pgtable_bad(regs, error_code, address);
1046 1047
1048 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
1049
1047 /* 1050 /*
1048 * If we're in an interrupt, have no user context or are running 1051 * If we're in an interrupt, have no user context or are running
1049 * in an atomic region then we must not take the fault: 1052 * in an atomic region then we must not take the fault:
@@ -1137,10 +1140,13 @@ good_area:
1137 return; 1140 return;
1138 } 1141 }
1139 1142
1140 if (fault & VM_FAULT_MAJOR) 1143 if (fault & VM_FAULT_MAJOR) {
1141 tsk->maj_flt++; 1144 tsk->maj_flt++;
1142 else 1145 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
1146 } else {
1143 tsk->min_flt++; 1147 tsk->min_flt++;
1148 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
1149 }
1144 1150
1145 check_v8086_mode(regs, address, tsk); 1151 check_v8086_mode(regs, address, tsk);
1146 1152
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864ad49a7..c638685136e1 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self,
40 40
41 switch (val) { 41 switch (val) {
42 case DIE_NMI: 42 case DIE_NMI:
43 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) 43 case DIE_NMI_IPI:
44 ret = NOTIFY_STOP; 44 model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
45 ret = NOTIFY_STOP;
45 break; 46 break;
46 default: 47 default:
47 break; 48 break;
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
134static struct notifier_block profile_exceptions_nb = { 135static struct notifier_block profile_exceptions_nb = {
135 .notifier_call = profile_exceptions_notify, 136 .notifier_call = profile_exceptions_notify,
136 .next = NULL, 137 .next = NULL,
137 .priority = 0 138 .priority = 2
138}; 139};
139 140
140static int nmi_setup(void) 141static int nmi_setup(void)
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fbdaada..4da7230b3d17 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/apic.h> 19#include <asm/apic.h>
20#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h> 21#include <asm/perf_counter.h>
22 22
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24#include "op_counter.h" 24#include "op_counter.h"
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
136 u64 val; 136 u64 val;
137 int i; 137 int i;
138 138
139 /*
140 * This can happen if perf counters are in use when
141 * we steal the die notifier NMI.
142 */
143 if (unlikely(!reset_value))
144 goto out;
145
139 for (i = 0 ; i < num_counters; ++i) { 146 for (i = 0 ; i < num_counters; ++i) {
140 if (!reset_value[i]) 147 if (!reset_value[i])
141 continue; 148 continue;
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
146 } 153 }
147 } 154 }
148 155
156out:
149 /* Only P6 based Pentium M need to re-unmask the apic vector but it 157 /* Only P6 based Pentium M need to re-unmask the apic vector but it
150 * doesn't hurt other P6 variant */ 158 * doesn't hurt other P6 variant */
151 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 159 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);