aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-20 14:29:32 -0400
commit12e24f34cb0d55efd08c18b2112507d4bf498008 (patch)
tree83b07be17b8ef45f42360a3b9159b3aaae3fbad4 /arch
parent1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff)
parenteadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff)
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits) perfcounter: Handle some IO return values perf_counter: Push perf_sample_data through the swcounter code perf_counter tools: Define and use our own u64, s64 etc. definitions perf_counter: Close race in perf_lock_task_context() perf_counter, x86: Improve interactions with fast-gup perf_counter: Simplify and fix task migration counting perf_counter tools: Add a data file header perf_counter: Update userspace callchain sampling uses perf_counter: Make callchain samples extensible perf report: Filter to parent set by default perf_counter tools: Handle lost events perf_counter: Add event overlow handling fs: Provide empty .set_page_dirty() aop for anon inodes perf_counter: tools: Makefile tweaks for 64-bit powerpc perf_counter: powerpc: Add processor back-end for MPC7450 family perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels perf_counter: powerpc: Change how processor-specific back-ends get selected perf_counter: powerpc: Use unsigned long for register and constraint values perf_counter: powerpc: Enable use of software counters on 32-bit powerpc perf_counter tools: Add and use isprint() ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/hw_irq.h6
-rw-r--r--arch/powerpc/include/asm/perf_counter.h52
-rw-r--r--arch/powerpc/kernel/Makefile8
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c417
-rw-r--r--arch/powerpc/kernel/perf_counter.c257
-rw-r--r--arch/powerpc/kernel/power4-pmu.c89
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c95
-rw-r--r--arch/powerpc/kernel/power5-pmu.c98
-rw-r--r--arch/powerpc/kernel/power6-pmu.c72
-rw-r--r--arch/powerpc/kernel/power7-pmu.c61
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c63
-rw-r--r--arch/powerpc/kernel/time.c25
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype12
-rw-r--r--arch/x86/include/asm/perf_counter.h5
-rw-r--r--arch/x86/include/asm/pgtable_32.h8
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c138
-rw-r--r--arch/x86/mm/gup.c58
19 files changed, 1075 insertions, 397 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fb344d5a86a..bf6cedfa05db 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -126,6 +126,7 @@ config PPC
126 select HAVE_OPROFILE 126 select HAVE_OPROFILE
127 select HAVE_SYSCALL_WRAPPERS if PPC64 127 select HAVE_SYSCALL_WRAPPERS if PPC64
128 select GENERIC_ATOMIC64 if PPC32 128 select GENERIC_ATOMIC64 if PPC32
129 select HAVE_PERF_COUNTERS
129 130
130config EARLY_PRINTK 131config EARLY_PRINTK
131 bool 132 bool
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7f8f4a87cc0..867ab8ed69b3 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags)
131struct irq_chip; 131struct irq_chip;
132 132
133#ifdef CONFIG_PERF_COUNTERS 133#ifdef CONFIG_PERF_COUNTERS
134
135#ifdef CONFIG_PPC64
134static inline unsigned long test_perf_counter_pending(void) 136static inline unsigned long test_perf_counter_pending(void)
135{ 137{
136 unsigned long x; 138 unsigned long x;
@@ -154,15 +156,15 @@ static inline void clear_perf_counter_pending(void)
154 "r" (0), 156 "r" (0),
155 "i" (offsetof(struct paca_struct, perf_counter_pending))); 157 "i" (offsetof(struct paca_struct, perf_counter_pending)));
156} 158}
159#endif /* CONFIG_PPC64 */
157 160
158#else 161#else /* CONFIG_PERF_COUNTERS */
159 162
160static inline unsigned long test_perf_counter_pending(void) 163static inline unsigned long test_perf_counter_pending(void)
161{ 164{
162 return 0; 165 return 0;
163} 166}
164 167
165static inline void set_perf_counter_pending(void) {}
166static inline void clear_perf_counter_pending(void) {} 168static inline void clear_perf_counter_pending(void) {}
167#endif /* CONFIG_PERF_COUNTERS */ 169#endif /* CONFIG_PERF_COUNTERS */
168 170
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index cc7c887705b8..8ccd4e155768 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -10,6 +10,8 @@
10 */ 10 */
11#include <linux/types.h> 11#include <linux/types.h>
12 12
13#include <asm/hw_irq.h>
14
13#define MAX_HWCOUNTERS 8 15#define MAX_HWCOUNTERS 8
14#define MAX_EVENT_ALTERNATIVES 8 16#define MAX_EVENT_ALTERNATIVES 8
15#define MAX_LIMITED_HWCOUNTERS 2 17#define MAX_LIMITED_HWCOUNTERS 2
@@ -19,27 +21,27 @@
19 * describe the PMU on a particular POWER-family CPU. 21 * describe the PMU on a particular POWER-family CPU.
20 */ 22 */
21struct power_pmu { 23struct power_pmu {
22 int n_counter; 24 const char *name;
23 int max_alternatives; 25 int n_counter;
24 u64 add_fields; 26 int max_alternatives;
25 u64 test_adder; 27 unsigned long add_fields;
26 int (*compute_mmcr)(u64 events[], int n_ev, 28 unsigned long test_adder;
27 unsigned int hwc[], u64 mmcr[]); 29 int (*compute_mmcr)(u64 events[], int n_ev,
28 int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); 30 unsigned int hwc[], unsigned long mmcr[]);
29 int (*get_alternatives)(u64 event, unsigned int flags, 31 int (*get_constraint)(u64 event, unsigned long *mskp,
30 u64 alt[]); 32 unsigned long *valp);
31 void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); 33 int (*get_alternatives)(u64 event, unsigned int flags,
32 int (*limited_pmc_event)(u64 event); 34 u64 alt[]);
33 u32 flags; 35 void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
34 int n_generic; 36 int (*limited_pmc_event)(u64 event);
35 int *generic_events; 37 u32 flags;
36 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] 38 int n_generic;
39 int *generic_events;
40 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
37 [PERF_COUNT_HW_CACHE_OP_MAX] 41 [PERF_COUNT_HW_CACHE_OP_MAX]
38 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 42 [PERF_COUNT_HW_CACHE_RESULT_MAX];
39}; 43};
40 44
41extern struct power_pmu *ppmu;
42
43/* 45/*
44 * Values for power_pmu.flags 46 * Values for power_pmu.flags
45 */ 47 */
@@ -53,15 +55,23 @@ extern struct power_pmu *ppmu;
53#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ 55#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
54#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ 56#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
55 57
58extern int register_power_pmu(struct power_pmu *);
59
56struct pt_regs; 60struct pt_regs;
57extern unsigned long perf_misc_flags(struct pt_regs *regs); 61extern unsigned long perf_misc_flags(struct pt_regs *regs);
58#define perf_misc_flags(regs) perf_misc_flags(regs)
59
60extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 62extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
61 63
62/* 64/*
63 * The power_pmu.get_constraint function returns a 64-bit value and 65 * Only override the default definitions in include/linux/perf_counter.h
64 * a 64-bit mask that express the constraints between this event and 66 * if we have hardware PMU support.
67 */
68#ifdef CONFIG_PPC_PERF_CTRS
69#define perf_misc_flags(regs) perf_misc_flags(regs)
70#endif
71
72/*
73 * The power_pmu.get_constraint function returns a 32/64-bit value and
74 * a 32/64-bit mask that express the constraints between this event and
65 * other events. 75 * other events.
66 * 76 *
67 * The value and mask are divided up into (non-overlapping) bitfields 77 * The value and mask are divided up into (non-overlapping) bitfields
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 6a4fb29a0618..b73396b93905 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,9 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
97 97
98obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 98obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
99obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 99obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
100obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ 100obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o
101 power5-pmu.o power5+-pmu.o power6-pmu.o \ 101obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
102 power7-pmu.o 102 power5+-pmu.o power6-pmu.o power7-pmu.o
103obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
103 104
104obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 105obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
105 106
@@ -108,6 +109,7 @@ obj-y += iomap.o
108endif 109endif
109 110
110obj-$(CONFIG_PPC64) += $(obj64-y) 111obj-$(CONFIG_PPC64) += $(obj64-y)
112obj-$(CONFIG_PPC32) += $(obj32-y)
111 113
112ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) 114ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
113obj-y += ppc_save_regs.o 115obj-y += ppc_save_regs.o
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
new file mode 100644
index 000000000000..75ff47fed7bf
--- /dev/null
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -0,0 +1,417 @@
1/*
2 * Performance counter support for MPC7450-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17#define N_COUNTER 6 /* Number of hardware counters */
18#define MAX_ALT 3 /* Maximum number of event alternative codes */
19
20/*
21 * Bits in event code for MPC7450 family
22 */
23#define PM_THRMULT_MSKS 0x40000
24#define PM_THRESH_SH 12
25#define PM_THRESH_MSK 0x3f
26#define PM_PMC_SH 8
27#define PM_PMC_MSK 7
28#define PM_PMCSEL_MSK 0x7f
29
30/*
31 * Classify events according to how specific their PMC requirements are.
32 * Result is:
33 * 0: can go on any PMC
34 * 1: can go on PMCs 1-4
35 * 2: can go on PMCs 1,2,4
36 * 3: can go on PMCs 1 or 2
37 * 4: can only go on one PMC
38 * -1: event code is invalid
39 */
40#define N_CLASSES 5
41
42static int mpc7450_classify_event(u32 event)
43{
44 int pmc;
45
46 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
47 if (pmc) {
48 if (pmc > N_COUNTER)
49 return -1;
50 return 4;
51 }
52 event &= PM_PMCSEL_MSK;
53 if (event <= 1)
54 return 0;
55 if (event <= 7)
56 return 1;
57 if (event <= 13)
58 return 2;
59 if (event <= 22)
60 return 3;
61 return -1;
62}
63
64/*
65 * Events using threshold and possible threshold scale:
66 * code scale? name
67 * 11e N PM_INSTQ_EXCEED_CYC
68 * 11f N PM_ALTV_IQ_EXCEED_CYC
69 * 128 Y PM_DTLB_SEARCH_EXCEED_CYC
70 * 12b Y PM_LD_MISS_EXCEED_L1_CYC
71 * 220 N PM_CQ_EXCEED_CYC
72 * 30c N PM_GPR_RB_EXCEED_CYC
73 * 30d ? PM_FPR_IQ_EXCEED_CYC ?
74 * 311 Y PM_ITLB_SEARCH_EXCEED
75 * 410 N PM_GPR_IQ_EXCEED_CYC
76 */
77
78/*
79 * Return use of threshold and threshold scale bits:
80 * 0 = uses neither, 1 = uses threshold, 2 = uses both
81 */
82static int mpc7450_threshold_use(u32 event)
83{
84 int pmc, sel;
85
86 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
87 sel = event & PM_PMCSEL_MSK;
88 switch (pmc) {
89 case 1:
90 if (sel == 0x1e || sel == 0x1f)
91 return 1;
92 if (sel == 0x28 || sel == 0x2b)
93 return 2;
94 break;
95 case 2:
96 if (sel == 0x20)
97 return 1;
98 break;
99 case 3:
100 if (sel == 0xc || sel == 0xd)
101 return 1;
102 if (sel == 0x11)
103 return 2;
104 break;
105 case 4:
106 if (sel == 0x10)
107 return 1;
108 break;
109 }
110 return 0;
111}
112
113/*
114 * Layout of constraint bits:
115 * 33222222222211111111110000000000
116 * 10987654321098765432109876543210
117 * |< >< > < > < ><><><><><><>
118 * TS TV G4 G3 G2P6P5P4P3P2P1
119 *
120 * P1 - P6
121 * 0 - 11: Count of events needing PMC1 .. PMC6
122 *
123 * G2
124 * 12 - 14: Count of events needing PMC1 or PMC2
125 *
126 * G3
127 * 16 - 18: Count of events needing PMC1, PMC2 or PMC4
128 *
129 * G4
130 * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4
131 *
132 * TV
133 * 24 - 29: Threshold value requested
134 *
135 * TS
136 * 30: Threshold scale value requested
137 */
138
139static u32 pmcbits[N_COUNTER][2] = {
140 { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */
141 { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */
142 { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */
143 { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */
144 { 0x00000200, 0x00000100 }, /* PMC5: P5 */
145 { 0x00000800, 0x00000400 } /* PMC6: P6 */
146};
147
148static u32 classbits[N_CLASSES - 1][2] = {
149 { 0x00000000, 0x00000000 }, /* class 0: no constraint */
150 { 0x00800000, 0x00100000 }, /* class 1: G4 */
151 { 0x00040000, 0x00010000 }, /* class 2: G3 */
152 { 0x00004000, 0x00001000 }, /* class 3: G2 */
153};
154
155static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
156 unsigned long *valp)
157{
158 int pmc, class;
159 u32 mask, value;
160 int thresh, tuse;
161
162 class = mpc7450_classify_event(event);
163 if (class < 0)
164 return -1;
165 if (class == 4) {
166 pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK;
167 mask = pmcbits[pmc - 1][0];
168 value = pmcbits[pmc - 1][1];
169 } else {
170 mask = classbits[class][0];
171 value = classbits[class][1];
172 }
173
174 tuse = mpc7450_threshold_use(event);
175 if (tuse) {
176 thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK;
177 mask |= 0x3f << 24;
178 value |= thresh << 24;
179 if (tuse == 2) {
180 mask |= 0x40000000;
181 if ((unsigned int)event & PM_THRMULT_MSKS)
182 value |= 0x40000000;
183 }
184 }
185
186 *maskp = mask;
187 *valp = value;
188 return 0;
189}
190
191static const unsigned int event_alternatives[][MAX_ALT] = {
192 { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */
193 { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */
194 { 0x502, 0x602 }, /* PM_L2_HIT */
195 { 0x503, 0x603 }, /* PM_L3_HIT */
196 { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */
197 { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */
198 { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */
199 { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */
200 { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */
201 { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */
202 { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */
203 { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */
204 { 0x512, 0x612 }, /* PM_INT_LOCAL */
205 { 0x513, 0x61d }, /* PM_L2_MISS */
206 { 0x514, 0x61e }, /* PM_L3_MISS */
207};
208
209/*
210 * Scan the alternatives table for a match and return the
211 * index into the alternatives table if found, else -1.
212 */
213static int find_alternative(u32 event)
214{
215 int i, j;
216
217 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
218 if (event < event_alternatives[i][0])
219 break;
220 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
221 if (event == event_alternatives[i][j])
222 return i;
223 }
224 return -1;
225}
226
227static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[])
228{
229 int i, j, nalt = 1;
230 u32 ae;
231
232 alt[0] = event;
233 nalt = 1;
234 i = find_alternative((u32)event);
235 if (i >= 0) {
236 for (j = 0; j < MAX_ALT; ++j) {
237 ae = event_alternatives[i][j];
238 if (ae && ae != (u32)event)
239 alt[nalt++] = ae;
240 }
241 }
242 return nalt;
243}
244
245/*
246 * Bitmaps of which PMCs each class can use for classes 0 - 3.
247 * Bit i is set if PMC i+1 is usable.
248 */
249static const u8 classmap[N_CLASSES] = {
250 0x3f, 0x0f, 0x0b, 0x03, 0
251};
252
253/* Bit position and width of each PMCSEL field */
254static const int pmcsel_shift[N_COUNTER] = {
255 6, 0, 27, 22, 17, 11
256};
257static const u32 pmcsel_mask[N_COUNTER] = {
258 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f
259};
260
261/*
262 * Compute MMCR0/1/2 values for a set of events.
263 */
264static int mpc7450_compute_mmcr(u64 event[], int n_ev,
265 unsigned int hwc[], unsigned long mmcr[])
266{
267 u8 event_index[N_CLASSES][N_COUNTER];
268 int n_classevent[N_CLASSES];
269 int i, j, class, tuse;
270 u32 pmc_inuse = 0, pmc_avail;
271 u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0;
272 u32 ev, pmc, thresh;
273
274 if (n_ev > N_COUNTER)
275 return -1;
276
277 /* First pass: count usage in each class */
278 for (i = 0; i < N_CLASSES; ++i)
279 n_classevent[i] = 0;
280 for (i = 0; i < n_ev; ++i) {
281 class = mpc7450_classify_event(event[i]);
282 if (class < 0)
283 return -1;
284 j = n_classevent[class]++;
285 event_index[class][j] = i;
286 }
287
288 /* Second pass: allocate PMCs from most specific event to least */
289 for (class = N_CLASSES - 1; class >= 0; --class) {
290 for (i = 0; i < n_classevent[class]; ++i) {
291 ev = event[event_index[class][i]];
292 if (class == 4) {
293 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
294 if (pmc_inuse & (1 << (pmc - 1)))
295 return -1;
296 } else {
297 /* Find a suitable PMC */
298 pmc_avail = classmap[class] & ~pmc_inuse;
299 if (!pmc_avail)
300 return -1;
301 pmc = ffs(pmc_avail);
302 }
303 pmc_inuse |= 1 << (pmc - 1);
304
305 tuse = mpc7450_threshold_use(ev);
306 if (tuse) {
307 thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK;
308 mmcr0 |= thresh << 16;
309 if (tuse == 2 && (ev & PM_THRMULT_MSKS))
310 mmcr2 = 0x80000000;
311 }
312 ev &= pmcsel_mask[pmc - 1];
313 ev <<= pmcsel_shift[pmc - 1];
314 if (pmc <= 2)
315 mmcr0 |= ev;
316 else
317 mmcr1 |= ev;
318 hwc[event_index[class][i]] = pmc - 1;
319 }
320 }
321
322 if (pmc_inuse & 1)
323 mmcr0 |= MMCR0_PMC1CE;
324 if (pmc_inuse & 0x3e)
325 mmcr0 |= MMCR0_PMCnCE;
326
327 /* Return MMCRx values */
328 mmcr[0] = mmcr0;
329 mmcr[1] = mmcr1;
330 mmcr[2] = mmcr2;
331 return 0;
332}
333
334/*
335 * Disable counting by a PMC.
336 * Note that the pmc argument is 0-based here, not 1-based.
337 */
338static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
339{
340 if (pmc <= 1)
341 mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
342 else
343 mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
344}
345
346static int mpc7450_generic_events[] = {
347 [PERF_COUNT_HW_CPU_CYCLES] = 1,
348 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
349 [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */
350 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */
351 [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */
352};
353
354#define C(x) PERF_COUNT_HW_CACHE_##x
355
356/*
357 * Table of generalized cache-related events.
358 * 0 means not supported, -1 means nonsensical, other values
359 * are event codes.
360 */
361static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
362 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
363 [C(OP_READ)] = { 0, 0x225 },
364 [C(OP_WRITE)] = { 0, 0x227 },
365 [C(OP_PREFETCH)] = { 0, 0 },
366 },
367 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
368 [C(OP_READ)] = { 0x129, 0x115 },
369 [C(OP_WRITE)] = { -1, -1 },
370 [C(OP_PREFETCH)] = { 0x634, 0 },
371 },
372 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
373 [C(OP_READ)] = { 0, 0 },
374 [C(OP_WRITE)] = { 0, 0 },
375 [C(OP_PREFETCH)] = { 0, 0 },
376 },
377 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
378 [C(OP_READ)] = { 0, 0x312 },
379 [C(OP_WRITE)] = { -1, -1 },
380 [C(OP_PREFETCH)] = { -1, -1 },
381 },
382 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
383 [C(OP_READ)] = { 0, 0x223 },
384 [C(OP_WRITE)] = { -1, -1 },
385 [C(OP_PREFETCH)] = { -1, -1 },
386 },
387 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
388 [C(OP_READ)] = { 0x122, 0x41c },
389 [C(OP_WRITE)] = { -1, -1 },
390 [C(OP_PREFETCH)] = { -1, -1 },
391 },
392};
393
394struct power_pmu mpc7450_pmu = {
395 .name = "MPC7450 family",
396 .n_counter = N_COUNTER,
397 .max_alternatives = MAX_ALT,
398 .add_fields = 0x00111555ul,
399 .test_adder = 0x00301000ul,
400 .compute_mmcr = mpc7450_compute_mmcr,
401 .get_constraint = mpc7450_get_constraint,
402 .get_alternatives = mpc7450_get_alternatives,
403 .disable_pmc = mpc7450_disable_pmc,
404 .n_generic = ARRAY_SIZE(mpc7450_generic_events),
405 .generic_events = mpc7450_generic_events,
406 .cache_events = &mpc7450_cache_events,
407};
408
409static int init_mpc7450_pmu(void)
410{
411 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
412 return -ENODEV;
413
414 return register_power_pmu(&mpc7450_pmu);
415}
416
417arch_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb202388170e..809fdf94b95f 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -29,7 +29,7 @@ struct cpu_hw_counters {
29 struct perf_counter *counter[MAX_HWCOUNTERS]; 29 struct perf_counter *counter[MAX_HWCOUNTERS];
30 u64 events[MAX_HWCOUNTERS]; 30 u64 events[MAX_HWCOUNTERS];
31 unsigned int flags[MAX_HWCOUNTERS]; 31 unsigned int flags[MAX_HWCOUNTERS];
32 u64 mmcr[3]; 32 unsigned long mmcr[3];
33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; 33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; 34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35}; 35};
@@ -46,6 +46,115 @@ struct power_pmu *ppmu;
46 */ 46 */
47static unsigned int freeze_counters_kernel = MMCR0_FCS; 47static unsigned int freeze_counters_kernel = MMCR0_FCS;
48 48
49/*
50 * 32-bit doesn't have MMCRA but does have an MMCR2,
51 * and a few other names are different.
52 */
53#ifdef CONFIG_PPC32
54
55#define MMCR0_FCHV 0
56#define MMCR0_PMCjCE MMCR0_PMCnCE
57
58#define SPRN_MMCRA SPRN_MMCR2
59#define MMCRA_SAMPLE_ENABLE 0
60
61static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
62{
63 return 0;
64}
65static inline void perf_set_pmu_inuse(int inuse) { }
66static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
67static inline u32 perf_get_misc_flags(struct pt_regs *regs)
68{
69 return 0;
70}
71static inline void perf_read_regs(struct pt_regs *regs) { }
72static inline int perf_intr_is_nmi(struct pt_regs *regs)
73{
74 return 0;
75}
76
77#endif /* CONFIG_PPC32 */
78
79/*
80 * Things that are specific to 64-bit implementations.
81 */
82#ifdef CONFIG_PPC64
83
84static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
85{
86 unsigned long mmcra = regs->dsisr;
87
88 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
89 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
90 if (slot > 1)
91 return 4 * (slot - 1);
92 }
93 return 0;
94}
95
96static inline void perf_set_pmu_inuse(int inuse)
97{
98 get_lppaca()->pmcregs_in_use = inuse;
99}
100
101/*
102 * The user wants a data address recorded.
103 * If we're not doing instruction sampling, give them the SDAR
104 * (sampled data address). If we are doing instruction sampling, then
105 * only give them the SDAR if it corresponds to the instruction
106 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
107 * bit in MMCRA.
108 */
109static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
110{
111 unsigned long mmcra = regs->dsisr;
112 unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
113 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
114
115 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
116 *addrp = mfspr(SPRN_SDAR);
117}
118
119static inline u32 perf_get_misc_flags(struct pt_regs *regs)
120{
121 unsigned long mmcra = regs->dsisr;
122
123 if (TRAP(regs) != 0xf00)
124 return 0; /* not a PMU interrupt */
125
126 if (ppmu->flags & PPMU_ALT_SIPR) {
127 if (mmcra & POWER6_MMCRA_SIHV)
128 return PERF_EVENT_MISC_HYPERVISOR;
129 return (mmcra & POWER6_MMCRA_SIPR) ?
130 PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
131 }
132 if (mmcra & MMCRA_SIHV)
133 return PERF_EVENT_MISC_HYPERVISOR;
134 return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
135 PERF_EVENT_MISC_KERNEL;
136}
137
138/*
139 * Overload regs->dsisr to store MMCRA so we only need to read it once
140 * on each interrupt.
141 */
142static inline void perf_read_regs(struct pt_regs *regs)
143{
144 regs->dsisr = mfspr(SPRN_MMCRA);
145}
146
147/*
148 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
149 * it as an NMI.
150 */
151static inline int perf_intr_is_nmi(struct pt_regs *regs)
152{
153 return !regs->softe;
154}
155
156#endif /* CONFIG_PPC64 */
157
49static void perf_counter_interrupt(struct pt_regs *regs); 158static void perf_counter_interrupt(struct pt_regs *regs);
50 159
51void perf_counter_print_debug(void) 160void perf_counter_print_debug(void)
@@ -78,12 +187,14 @@ static unsigned long read_pmc(int idx)
78 case 6: 187 case 6:
79 val = mfspr(SPRN_PMC6); 188 val = mfspr(SPRN_PMC6);
80 break; 189 break;
190#ifdef CONFIG_PPC64
81 case 7: 191 case 7:
82 val = mfspr(SPRN_PMC7); 192 val = mfspr(SPRN_PMC7);
83 break; 193 break;
84 case 8: 194 case 8:
85 val = mfspr(SPRN_PMC8); 195 val = mfspr(SPRN_PMC8);
86 break; 196 break;
197#endif /* CONFIG_PPC64 */
87 default: 198 default:
88 printk(KERN_ERR "oops trying to read PMC%d\n", idx); 199 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
89 val = 0; 200 val = 0;
@@ -115,12 +226,14 @@ static void write_pmc(int idx, unsigned long val)
115 case 6: 226 case 6:
116 mtspr(SPRN_PMC6, val); 227 mtspr(SPRN_PMC6, val);
117 break; 228 break;
229#ifdef CONFIG_PPC64
118 case 7: 230 case 7:
119 mtspr(SPRN_PMC7, val); 231 mtspr(SPRN_PMC7, val);
120 break; 232 break;
121 case 8: 233 case 8:
122 mtspr(SPRN_PMC8, val); 234 mtspr(SPRN_PMC8, val);
123 break; 235 break;
236#endif /* CONFIG_PPC64 */
124 default: 237 default:
125 printk(KERN_ERR "oops trying to write PMC%d\n", idx); 238 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
126 } 239 }
@@ -135,15 +248,15 @@ static void write_pmc(int idx, unsigned long val)
135static int power_check_constraints(u64 event[], unsigned int cflags[], 248static int power_check_constraints(u64 event[], unsigned int cflags[],
136 int n_ev) 249 int n_ev)
137{ 250{
138 u64 mask, value, nv; 251 unsigned long mask, value, nv;
139 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; 252 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
140 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; 253 unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
141 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; 254 unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
142 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; 255 unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
143 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; 256 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
144 int i, j; 257 int i, j;
145 u64 addf = ppmu->add_fields; 258 unsigned long addf = ppmu->add_fields;
146 u64 tadd = ppmu->test_adder; 259 unsigned long tadd = ppmu->test_adder;
147 260
148 if (n_ev > ppmu->n_counter) 261 if (n_ev > ppmu->n_counter)
149 return -1; 262 return -1;
@@ -283,7 +396,7 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
283 396
284static void power_pmu_read(struct perf_counter *counter) 397static void power_pmu_read(struct perf_counter *counter)
285{ 398{
286 long val, delta, prev; 399 s64 val, delta, prev;
287 400
288 if (!counter->hw.idx) 401 if (!counter->hw.idx)
289 return; 402 return;
@@ -403,14 +516,12 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
403void hw_perf_disable(void) 516void hw_perf_disable(void)
404{ 517{
405 struct cpu_hw_counters *cpuhw; 518 struct cpu_hw_counters *cpuhw;
406 unsigned long ret;
407 unsigned long flags; 519 unsigned long flags;
408 520
409 local_irq_save(flags); 521 local_irq_save(flags);
410 cpuhw = &__get_cpu_var(cpu_hw_counters); 522 cpuhw = &__get_cpu_var(cpu_hw_counters);
411 523
412 ret = cpuhw->disabled; 524 if (!cpuhw->disabled) {
413 if (!ret) {
414 cpuhw->disabled = 1; 525 cpuhw->disabled = 1;
415 cpuhw->n_added = 0; 526 cpuhw->n_added = 0;
416 527
@@ -479,7 +590,7 @@ void hw_perf_enable(void)
479 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 590 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
480 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 591 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
481 if (cpuhw->n_counters == 0) 592 if (cpuhw->n_counters == 0)
482 get_lppaca()->pmcregs_in_use = 0; 593 perf_set_pmu_inuse(0);
483 goto out_enable; 594 goto out_enable;
484 } 595 }
485 596
@@ -512,7 +623,7 @@ void hw_perf_enable(void)
512 * bit set and set the hardware counters to their initial values. 623 * bit set and set the hardware counters to their initial values.
513 * Then unfreeze the counters. 624 * Then unfreeze the counters.
514 */ 625 */
515 get_lppaca()->pmcregs_in_use = 1; 626 perf_set_pmu_inuse(1);
516 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 627 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
517 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 628 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
518 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) 629 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
@@ -913,6 +1024,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
913 case PERF_TYPE_RAW: 1024 case PERF_TYPE_RAW:
914 ev = counter->attr.config; 1025 ev = counter->attr.config;
915 break; 1026 break;
1027 default:
1028 return ERR_PTR(-EINVAL);
916 } 1029 }
917 counter->hw.config_base = ev; 1030 counter->hw.config_base = ev;
918 counter->hw.idx = 0; 1031 counter->hw.idx = 0;
@@ -1007,13 +1120,12 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1007 * things if requested. Note that interrupts are hard-disabled 1120 * things if requested. Note that interrupts are hard-disabled
1008 * here so there is no possibility of being interrupted. 1121 * here so there is no possibility of being interrupted.
1009 */ 1122 */
1010static void record_and_restart(struct perf_counter *counter, long val, 1123static void record_and_restart(struct perf_counter *counter, unsigned long val,
1011 struct pt_regs *regs, int nmi) 1124 struct pt_regs *regs, int nmi)
1012{ 1125{
1013 u64 period = counter->hw.sample_period; 1126 u64 period = counter->hw.sample_period;
1014 s64 prev, delta, left; 1127 s64 prev, delta, left;
1015 int record = 0; 1128 int record = 0;
1016 u64 addr, mmcra, sdsync;
1017 1129
1018 /* we don't have to worry about interrupts here */ 1130 /* we don't have to worry about interrupts here */
1019 prev = atomic64_read(&counter->hw.prev_count); 1131 prev = atomic64_read(&counter->hw.prev_count);
@@ -1033,8 +1145,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
1033 left = period; 1145 left = period;
1034 record = 1; 1146 record = 1;
1035 } 1147 }
1036 if (left < 0x80000000L) 1148 if (left < 0x80000000LL)
1037 val = 0x80000000L - left; 1149 val = 0x80000000LL - left;
1038 } 1150 }
1039 1151
1040 /* 1152 /*
@@ -1047,22 +1159,9 @@ static void record_and_restart(struct perf_counter *counter, long val,
1047 .period = counter->hw.last_period, 1159 .period = counter->hw.last_period,
1048 }; 1160 };
1049 1161
1050 if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { 1162 if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
1051 /* 1163 perf_get_data_addr(regs, &data.addr);
1052 * The user wants a data address recorded. 1164
1053 * If we're not doing instruction sampling,
1054 * give them the SDAR (sampled data address).
1055 * If we are doing instruction sampling, then only
1056 * give them the SDAR if it corresponds to the
1057 * instruction pointed to by SIAR; this is indicated
1058 * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
1059 */
1060 mmcra = regs->dsisr;
1061 sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
1062 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
1063 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
1064 data.addr = mfspr(SPRN_SDAR);
1065 }
1066 if (perf_counter_overflow(counter, nmi, &data)) { 1165 if (perf_counter_overflow(counter, nmi, &data)) {
1067 /* 1166 /*
1068 * Interrupts are coming too fast - throttle them 1167 * Interrupts are coming too fast - throttle them
@@ -1088,25 +1187,12 @@ static void record_and_restart(struct perf_counter *counter, long val,
1088 */ 1187 */
1089unsigned long perf_misc_flags(struct pt_regs *regs) 1188unsigned long perf_misc_flags(struct pt_regs *regs)
1090{ 1189{
1091 unsigned long mmcra; 1190 u32 flags = perf_get_misc_flags(regs);
1092
1093 if (TRAP(regs) != 0xf00) {
1094 /* not a PMU interrupt */
1095 return user_mode(regs) ? PERF_EVENT_MISC_USER :
1096 PERF_EVENT_MISC_KERNEL;
1097 }
1098 1191
1099 mmcra = regs->dsisr; 1192 if (flags)
1100 if (ppmu->flags & PPMU_ALT_SIPR) { 1193 return flags;
1101 if (mmcra & POWER6_MMCRA_SIHV) 1194 return user_mode(regs) ? PERF_EVENT_MISC_USER :
1102 return PERF_EVENT_MISC_HYPERVISOR; 1195 PERF_EVENT_MISC_KERNEL;
1103 return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1104 PERF_EVENT_MISC_KERNEL;
1105 }
1106 if (mmcra & MMCRA_SIHV)
1107 return PERF_EVENT_MISC_HYPERVISOR;
1108 return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1109 PERF_EVENT_MISC_KERNEL;
1110} 1196}
1111 1197
1112/* 1198/*
@@ -1115,20 +1201,12 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1115 */ 1201 */
1116unsigned long perf_instruction_pointer(struct pt_regs *regs) 1202unsigned long perf_instruction_pointer(struct pt_regs *regs)
1117{ 1203{
1118 unsigned long mmcra;
1119 unsigned long ip; 1204 unsigned long ip;
1120 unsigned long slot;
1121 1205
1122 if (TRAP(regs) != 0xf00) 1206 if (TRAP(regs) != 0xf00)
1123 return regs->nip; /* not a PMU interrupt */ 1207 return regs->nip; /* not a PMU interrupt */
1124 1208
1125 ip = mfspr(SPRN_SIAR); 1209 ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
1126 mmcra = regs->dsisr;
1127 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
1128 slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
1129 if (slot > 1)
1130 ip += 4 * (slot - 1);
1131 }
1132 return ip; 1210 return ip;
1133} 1211}
1134 1212
@@ -1140,7 +1218,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
1140 int i; 1218 int i;
1141 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); 1219 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
1142 struct perf_counter *counter; 1220 struct perf_counter *counter;
1143 long val; 1221 unsigned long val;
1144 int found = 0; 1222 int found = 0;
1145 int nmi; 1223 int nmi;
1146 1224
@@ -1148,16 +1226,9 @@ static void perf_counter_interrupt(struct pt_regs *regs)
1148 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), 1226 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1149 mfspr(SPRN_PMC6)); 1227 mfspr(SPRN_PMC6));
1150 1228
1151 /* 1229 perf_read_regs(regs);
1152 * Overload regs->dsisr to store MMCRA so we only need to read it once.
1153 */
1154 regs->dsisr = mfspr(SPRN_MMCRA);
1155 1230
1156 /* 1231 nmi = perf_intr_is_nmi(regs);
1157 * If interrupts were soft-disabled when this PMU interrupt
1158 * occurred, treat it as an NMI.
1159 */
1160 nmi = !regs->softe;
1161 if (nmi) 1232 if (nmi)
1162 nmi_enter(); 1233 nmi_enter();
1163 else 1234 else
@@ -1214,50 +1285,22 @@ void hw_perf_counter_setup(int cpu)
1214 cpuhw->mmcr[0] = MMCR0_FC; 1285 cpuhw->mmcr[0] = MMCR0_FC;
1215} 1286}
1216 1287
1217extern struct power_pmu power4_pmu; 1288int register_power_pmu(struct power_pmu *pmu)
1218extern struct power_pmu ppc970_pmu;
1219extern struct power_pmu power5_pmu;
1220extern struct power_pmu power5p_pmu;
1221extern struct power_pmu power6_pmu;
1222extern struct power_pmu power7_pmu;
1223
1224static int init_perf_counters(void)
1225{ 1289{
1226 unsigned long pvr; 1290 if (ppmu)
1227 1291 return -EBUSY; /* something's already registered */
1228 /* XXX should get this from cputable */ 1292
1229 pvr = mfspr(SPRN_PVR); 1293 ppmu = pmu;
1230 switch (PVR_VER(pvr)) { 1294 pr_info("%s performance monitor hardware support registered\n",
1231 case PV_POWER4: 1295 pmu->name);
1232 case PV_POWER4p:
1233 ppmu = &power4_pmu;
1234 break;
1235 case PV_970:
1236 case PV_970FX:
1237 case PV_970MP:
1238 ppmu = &ppc970_pmu;
1239 break;
1240 case PV_POWER5:
1241 ppmu = &power5_pmu;
1242 break;
1243 case PV_POWER5p:
1244 ppmu = &power5p_pmu;
1245 break;
1246 case 0x3e:
1247 ppmu = &power6_pmu;
1248 break;
1249 case 0x3f:
1250 ppmu = &power7_pmu;
1251 break;
1252 }
1253 1296
1297#ifdef MSR_HV
1254 /* 1298 /*
1255 * Use FCHV to ignore kernel events if MSR.HV is set. 1299 * Use FCHV to ignore kernel events if MSR.HV is set.
1256 */ 1300 */
1257 if (mfmsr() & MSR_HV) 1301 if (mfmsr() & MSR_HV)
1258 freeze_counters_kernel = MMCR0_FCHV; 1302 freeze_counters_kernel = MMCR0_FCHV;
1303#endif /* CONFIG_PPC64 */
1259 1304
1260 return 0; 1305 return 0;
1261} 1306}
1262
1263arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 07bd308a5fa7..db90b0c5c27b 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for POWER4 18 * Bits in event code for POWER4
@@ -179,22 +181,22 @@ static short mmcr1_adder_bits[8] = {
179 */ 181 */
180 182
181static struct unitinfo { 183static struct unitinfo {
182 u64 value, mask; 184 unsigned long value, mask;
183 int unit; 185 int unit;
184 int lowerbit; 186 int lowerbit;
185} p4_unitinfo[16] = { 187} p4_unitinfo[16] = {
186 [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, 188 [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
187 [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, 189 [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
188 [PM_ISU1_ALT] = 190 [PM_ISU1_ALT] =
189 { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, 191 { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
190 [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, 192 [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
191 [PM_IFU_ALT] = 193 [PM_IFU_ALT] =
192 { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, 194 { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
193 [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, 195 [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
194 [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, 196 [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
195 [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, 197 [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
196 [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, 198 [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
197 [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } 199 [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
198}; 200};
199 201
200static unsigned char direct_marked_event[8] = { 202static unsigned char direct_marked_event[8] = {
@@ -249,10 +251,11 @@ static int p4_marked_instr_event(u64 event)
249 return (mask >> (byte * 8 + bit)) & 1; 251 return (mask >> (byte * 8 + bit)) & 1;
250} 252}
251 253
252static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) 254static int p4_get_constraint(u64 event, unsigned long *maskp,
255 unsigned long *valp)
253{ 256{
254 int pmc, byte, unit, lower, sh; 257 int pmc, byte, unit, lower, sh;
255 u64 mask = 0, value = 0; 258 unsigned long mask = 0, value = 0;
256 int grp = -1; 259 int grp = -1;
257 260
258 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 261 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
@@ -282,14 +285,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
282 value |= p4_unitinfo[unit].value; 285 value |= p4_unitinfo[unit].value;
283 sh = p4_unitinfo[unit].lowerbit; 286 sh = p4_unitinfo[unit].lowerbit;
284 if (sh > 1) 287 if (sh > 1)
285 value |= (u64)lower << sh; 288 value |= (unsigned long)lower << sh;
286 else if (lower != sh) 289 else if (lower != sh)
287 return -1; 290 return -1;
288 unit = p4_unitinfo[unit].unit; 291 unit = p4_unitinfo[unit].unit;
289 292
290 /* Set byte lane select field */ 293 /* Set byte lane select field */
291 mask |= 0xfULL << (28 - 4 * byte); 294 mask |= 0xfULL << (28 - 4 * byte);
292 value |= (u64)unit << (28 - 4 * byte); 295 value |= (unsigned long)unit << (28 - 4 * byte);
293 } 296 }
294 if (grp == 0) { 297 if (grp == 0) {
295 /* increment PMC1/2/5/6 field */ 298 /* increment PMC1/2/5/6 field */
@@ -353,9 +356,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
353} 356}
354 357
355static int p4_compute_mmcr(u64 event[], int n_ev, 358static int p4_compute_mmcr(u64 event[], int n_ev,
356 unsigned int hwc[], u64 mmcr[]) 359 unsigned int hwc[], unsigned long mmcr[])
357{ 360{
358 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; 361 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
359 unsigned int pmc, unit, byte, psel, lower; 362 unsigned int pmc, unit, byte, psel, lower;
360 unsigned int ttm, grp; 363 unsigned int ttm, grp;
361 unsigned int pmc_inuse = 0; 364 unsigned int pmc_inuse = 0;
@@ -429,9 +432,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
429 return -1; 432 return -1;
430 433
431 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ 434 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
432 mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; 435 mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
433 mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; 436 << MMCR1_TTM0SEL_SH;
434 mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; 437 mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
438 << MMCR1_TTM1SEL_SH;
439 mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
435 440
436 /* Set TTCxSEL fields. */ 441 /* Set TTCxSEL fields. */
437 if (unitlower & 0xe) 442 if (unitlower & 0xe)
@@ -456,7 +461,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
456 ttm = unit - 1; /* 2->1, 3->2 */ 461 ttm = unit - 1; /* 2->1, 3->2 */
457 else 462 else
458 ttm = unit >> 2; 463 ttm = unit >> 2;
459 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); 464 mmcr1 |= (unsigned long)ttm
465 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
460 } 466 }
461 } 467 }
462 468
@@ -519,7 +525,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev,
519 return 0; 525 return 0;
520} 526}
521 527
522static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) 528static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
523{ 529{
524 /* 530 /*
525 * Setting the PMCxSEL field to 0 disables PMC x. 531 * Setting the PMCxSEL field to 0 disables PMC x.
@@ -583,16 +589,27 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
583 }, 589 },
584}; 590};
585 591
586struct power_pmu power4_pmu = { 592static struct power_pmu power4_pmu = {
587 .n_counter = 8, 593 .name = "POWER4/4+",
588 .max_alternatives = 5, 594 .n_counter = 8,
589 .add_fields = 0x0000001100005555ull, 595 .max_alternatives = 5,
590 .test_adder = 0x0011083300000000ull, 596 .add_fields = 0x0000001100005555ul,
591 .compute_mmcr = p4_compute_mmcr, 597 .test_adder = 0x0011083300000000ul,
592 .get_constraint = p4_get_constraint, 598 .compute_mmcr = p4_compute_mmcr,
593 .get_alternatives = p4_get_alternatives, 599 .get_constraint = p4_get_constraint,
594 .disable_pmc = p4_disable_pmc, 600 .get_alternatives = p4_get_alternatives,
595 .n_generic = ARRAY_SIZE(p4_generic_events), 601 .disable_pmc = p4_disable_pmc,
596 .generic_events = p4_generic_events, 602 .n_generic = ARRAY_SIZE(p4_generic_events),
597 .cache_events = &power4_cache_events, 603 .generic_events = p4_generic_events,
604 .cache_events = &power4_cache_events,
598}; 605};
606
607static int init_power4_pmu(void)
608{
609 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
610 return -ENODEV;
611
612 return register_power_pmu(&power4_pmu);
613}
614
615arch_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 41e5d2d958d4..f4adca8e98a4 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) 18 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
@@ -126,20 +128,21 @@ static const int grsel_shift[8] = {
126}; 128};
127 129
128/* Masks and values for using events from the various units */ 130/* Masks and values for using events from the various units */
129static u64 unit_cons[PM_LASTUNIT+1][2] = { 131static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
130 [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, 132 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
131 [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, 133 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
132 [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, 134 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
133 [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, 135 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
134 [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, 136 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
135 [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, 137 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
136}; 138};
137 139
138static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) 140static int power5p_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp)
139{ 142{
140 int pmc, byte, unit, sh; 143 int pmc, byte, unit, sh;
141 int bit, fmask; 144 int bit, fmask;
142 u64 mask = 0, value = 0; 145 unsigned long mask = 0, value = 0;
143 146
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 147 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) { 148 if (pmc) {
@@ -171,17 +174,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
171 bit = event & 7; 174 bit = event & 7;
172 fmask = (bit == 6)? 7: 3; 175 fmask = (bit == 6)? 7: 3;
173 sh = grsel_shift[bit]; 176 sh = grsel_shift[bit];
174 mask |= (u64)fmask << sh; 177 mask |= (unsigned long)fmask << sh;
175 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; 178 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
179 << sh;
176 } 180 }
177 /* Set byte lane select field */ 181 /* Set byte lane select field */
178 mask |= 0xfULL << (24 - 4 * byte); 182 mask |= 0xfUL << (24 - 4 * byte);
179 value |= (u64)unit << (24 - 4 * byte); 183 value |= (unsigned long)unit << (24 - 4 * byte);
180 } 184 }
181 if (pmc < 5) { 185 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */ 186 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ull; 187 mask |= 0x8000000000000ul;
184 value |= 0x1000000000000ull; 188 value |= 0x1000000000000ul;
185 } 189 }
186 *maskp = mask; 190 *maskp = mask;
187 *valp = value; 191 *valp = value;
@@ -452,10 +456,10 @@ static int power5p_marked_instr_event(u64 event)
452} 456}
453 457
454static int power5p_compute_mmcr(u64 event[], int n_ev, 458static int power5p_compute_mmcr(u64 event[], int n_ev,
455 unsigned int hwc[], u64 mmcr[]) 459 unsigned int hwc[], unsigned long mmcr[])
456{ 460{
457 u64 mmcr1 = 0; 461 unsigned long mmcr1 = 0;
458 u64 mmcra = 0; 462 unsigned long mmcra = 0;
459 unsigned int pmc, unit, byte, psel; 463 unsigned int pmc, unit, byte, psel;
460 unsigned int ttm; 464 unsigned int ttm;
461 int i, isbus, bit, grsel; 465 int i, isbus, bit, grsel;
@@ -517,7 +521,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
517 continue; 521 continue;
518 if (ttmuse++) 522 if (ttmuse++)
519 return -1; 523 return -1;
520 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; 524 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
521 } 525 }
522 ttmuse = 0; 526 ttmuse = 0;
523 for (; i <= PM_GRS; ++i) { 527 for (; i <= PM_GRS; ++i) {
@@ -525,7 +529,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
525 continue; 529 continue;
526 if (ttmuse++) 530 if (ttmuse++)
527 return -1; 531 return -1;
528 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; 532 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
529 } 533 }
530 if (ttmuse > 1) 534 if (ttmuse > 1)
531 return -1; 535 return -1;
@@ -540,10 +544,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
540 unit = PM_ISU0_ALT; 544 unit = PM_ISU0_ALT;
541 } else if (unit == PM_LSU1 + 1) { 545 } else if (unit == PM_LSU1 + 1) {
542 /* select lower word of LSU1 for this byte */ 546 /* select lower word of LSU1 for this byte */
543 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); 547 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
544 } 548 }
545 ttm = unit >> 2; 549 ttm = unit >> 2;
546 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 550 mmcr1 |= (unsigned long)ttm
551 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
547 } 552 }
548 553
549 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 554 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
@@ -568,7 +573,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
568 if (isbus && (byte & 2) && 573 if (isbus && (byte & 2) &&
569 (psel == 8 || psel == 0x10 || psel == 0x28)) 574 (psel == 8 || psel == 0x10 || psel == 0x28))
570 /* add events on higher-numbered bus */ 575 /* add events on higher-numbered bus */
571 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 576 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
572 } else { 577 } else {
573 /* Instructions or run cycles on PMC5/6 */ 578 /* Instructions or run cycles on PMC5/6 */
574 --pmc; 579 --pmc;
@@ -576,7 +581,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
576 if (isbus && unit == PM_GRS) { 581 if (isbus && unit == PM_GRS) {
577 bit = psel & 7; 582 bit = psel & 7;
578 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; 583 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
579 mmcr1 |= (u64)grsel << grsel_shift[bit]; 584 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
580 } 585 }
581 if (power5p_marked_instr_event(event[i])) 586 if (power5p_marked_instr_event(event[i]))
582 mmcra |= MMCRA_SAMPLE_ENABLE; 587 mmcra |= MMCRA_SAMPLE_ENABLE;
@@ -599,7 +604,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
599 return 0; 604 return 0;
600} 605}
601 606
602static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) 607static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
603{ 608{
604 if (pmc <= 3) 609 if (pmc <= 3)
605 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); 610 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
@@ -654,18 +659,30 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
654 }, 659 },
655}; 660};
656 661
657struct power_pmu power5p_pmu = { 662static struct power_pmu power5p_pmu = {
658 .n_counter = 6, 663 .name = "POWER5+/++",
659 .max_alternatives = MAX_ALT, 664 .n_counter = 6,
660 .add_fields = 0x7000000000055ull, 665 .max_alternatives = MAX_ALT,
661 .test_adder = 0x3000040000000ull, 666 .add_fields = 0x7000000000055ul,
662 .compute_mmcr = power5p_compute_mmcr, 667 .test_adder = 0x3000040000000ul,
663 .get_constraint = power5p_get_constraint, 668 .compute_mmcr = power5p_compute_mmcr,
664 .get_alternatives = power5p_get_alternatives, 669 .get_constraint = power5p_get_constraint,
665 .disable_pmc = power5p_disable_pmc, 670 .get_alternatives = power5p_get_alternatives,
666 .limited_pmc_event = power5p_limited_pmc_event, 671 .disable_pmc = power5p_disable_pmc,
667 .flags = PPMU_LIMITED_PMC5_6, 672 .limited_pmc_event = power5p_limited_pmc_event,
668 .n_generic = ARRAY_SIZE(power5p_generic_events), 673 .flags = PPMU_LIMITED_PMC5_6,
669 .generic_events = power5p_generic_events, 674 .n_generic = ARRAY_SIZE(power5p_generic_events),
670 .cache_events = &power5p_cache_events, 675 .generic_events = power5p_generic_events,
676 .cache_events = &power5p_cache_events,
671}; 677};
678
679static int init_power5p_pmu(void)
680{
681 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
682 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))
683 return -ENODEV;
684
685 return register_power_pmu(&power5p_pmu);
686}
687
688arch_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 05600b66221a..29b2c6c0e83a 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for POWER5 (not POWER5++) 18 * Bits in event code for POWER5 (not POWER5++)
@@ -130,20 +132,21 @@ static const int grsel_shift[8] = {
130}; 132};
131 133
132/* Masks and values for using events from the various units */ 134/* Masks and values for using events from the various units */
133static u64 unit_cons[PM_LASTUNIT+1][2] = { 135static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
134 [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, 136 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
135 [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, 137 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
136 [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, 138 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
137 [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, 139 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
138 [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, 140 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
139 [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, 141 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
140}; 142};
141 143
142static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) 144static int power5_get_constraint(u64 event, unsigned long *maskp,
145 unsigned long *valp)
143{ 146{
144 int pmc, byte, unit, sh; 147 int pmc, byte, unit, sh;
145 int bit, fmask; 148 int bit, fmask;
146 u64 mask = 0, value = 0; 149 unsigned long mask = 0, value = 0;
147 int grp = -1; 150 int grp = -1;
148 151
149 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 152 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
@@ -178,8 +181,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
178 bit = event & 7; 181 bit = event & 7;
179 fmask = (bit == 6)? 7: 3; 182 fmask = (bit == 6)? 7: 3;
180 sh = grsel_shift[bit]; 183 sh = grsel_shift[bit];
181 mask |= (u64)fmask << sh; 184 mask |= (unsigned long)fmask << sh;
182 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; 185 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
186 << sh;
183 } 187 }
184 /* 188 /*
185 * Bus events on bytes 0 and 2 can be counted 189 * Bus events on bytes 0 and 2 can be counted
@@ -188,22 +192,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
188 if (!pmc) 192 if (!pmc)
189 grp = byte & 1; 193 grp = byte & 1;
190 /* Set byte lane select field */ 194 /* Set byte lane select field */
191 mask |= 0xfULL << (24 - 4 * byte); 195 mask |= 0xfUL << (24 - 4 * byte);
192 value |= (u64)unit << (24 - 4 * byte); 196 value |= (unsigned long)unit << (24 - 4 * byte);
193 } 197 }
194 if (grp == 0) { 198 if (grp == 0) {
195 /* increment PMC1/2 field */ 199 /* increment PMC1/2 field */
196 mask |= 0x200000000ull; 200 mask |= 0x200000000ul;
197 value |= 0x080000000ull; 201 value |= 0x080000000ul;
198 } else if (grp == 1) { 202 } else if (grp == 1) {
199 /* increment PMC3/4 field */ 203 /* increment PMC3/4 field */
200 mask |= 0x40000000ull; 204 mask |= 0x40000000ul;
201 value |= 0x10000000ull; 205 value |= 0x10000000ul;
202 } 206 }
203 if (pmc < 5) { 207 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */ 208 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ull; 209 mask |= 0x8000000000000ul;
206 value |= 0x1000000000000ull; 210 value |= 0x1000000000000ul;
207 } 211 }
208 *maskp = mask; 212 *maskp = mask;
209 *valp = value; 213 *valp = value;
@@ -383,10 +387,10 @@ static int power5_marked_instr_event(u64 event)
383} 387}
384 388
385static int power5_compute_mmcr(u64 event[], int n_ev, 389static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], u64 mmcr[]) 390 unsigned int hwc[], unsigned long mmcr[])
387{ 391{
388 u64 mmcr1 = 0; 392 unsigned long mmcr1 = 0;
389 u64 mmcra = 0; 393 unsigned long mmcra = 0;
390 unsigned int pmc, unit, byte, psel; 394 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp; 395 unsigned int ttm, grp;
392 int i, isbus, bit, grsel; 396 int i, isbus, bit, grsel;
@@ -457,7 +461,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
457 continue; 461 continue;
458 if (ttmuse++) 462 if (ttmuse++)
459 return -1; 463 return -1;
460 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; 464 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
461 } 465 }
462 ttmuse = 0; 466 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) { 467 for (; i <= PM_GRS; ++i) {
@@ -465,7 +469,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
465 continue; 469 continue;
466 if (ttmuse++) 470 if (ttmuse++)
467 return -1; 471 return -1;
468 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; 472 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
469 } 473 }
470 if (ttmuse > 1) 474 if (ttmuse > 1)
471 return -1; 475 return -1;
@@ -480,10 +484,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
480 unit = PM_ISU0_ALT; 484 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) { 485 } else if (unit == PM_LSU1 + 1) {
482 /* select lower word of LSU1 for this byte */ 486 /* select lower word of LSU1 for this byte */
483 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); 487 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
484 } 488 }
485 ttm = unit >> 2; 489 ttm = unit >> 2;
486 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 490 mmcr1 |= (unsigned long)ttm
491 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
487 } 492 }
488 493
489 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 494 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
@@ -513,7 +518,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
513 --pmc; 518 --pmc;
514 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) 519 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
515 /* add events on higher-numbered bus */ 520 /* add events on higher-numbered bus */
516 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 521 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
517 } else { 522 } else {
518 /* Instructions or run cycles on PMC5/6 */ 523 /* Instructions or run cycles on PMC5/6 */
519 --pmc; 524 --pmc;
@@ -521,7 +526,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
521 if (isbus && unit == PM_GRS) { 526 if (isbus && unit == PM_GRS) {
522 bit = psel & 7; 527 bit = psel & 7;
523 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; 528 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
524 mmcr1 |= (u64)grsel << grsel_shift[bit]; 529 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
525 } 530 }
526 if (power5_marked_instr_event(event[i])) 531 if (power5_marked_instr_event(event[i]))
527 mmcra |= MMCRA_SAMPLE_ENABLE; 532 mmcra |= MMCRA_SAMPLE_ENABLE;
@@ -541,7 +546,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
541 return 0; 546 return 0;
542} 547}
543 548
544static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) 549static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
545{ 550{
546 if (pmc <= 3) 551 if (pmc <= 3)
547 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); 552 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
@@ -596,16 +601,27 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
596 }, 601 },
597}; 602};
598 603
599struct power_pmu power5_pmu = { 604static struct power_pmu power5_pmu = {
600 .n_counter = 6, 605 .name = "POWER5",
601 .max_alternatives = MAX_ALT, 606 .n_counter = 6,
602 .add_fields = 0x7000090000555ull, 607 .max_alternatives = MAX_ALT,
603 .test_adder = 0x3000490000000ull, 608 .add_fields = 0x7000090000555ul,
604 .compute_mmcr = power5_compute_mmcr, 609 .test_adder = 0x3000490000000ul,
605 .get_constraint = power5_get_constraint, 610 .compute_mmcr = power5_compute_mmcr,
606 .get_alternatives = power5_get_alternatives, 611 .get_constraint = power5_get_constraint,
607 .disable_pmc = power5_disable_pmc, 612 .get_alternatives = power5_get_alternatives,
608 .n_generic = ARRAY_SIZE(power5_generic_events), 613 .disable_pmc = power5_disable_pmc,
609 .generic_events = power5_generic_events, 614 .n_generic = ARRAY_SIZE(power5_generic_events),
610 .cache_events = &power5_cache_events, 615 .generic_events = power5_generic_events,
616 .cache_events = &power5_cache_events,
611}; 617};
618
619static int init_power5_pmu(void)
620{
621 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
622 return -ENODEV;
623
624 return register_power_pmu(&power5_pmu);
625}
626
627arch_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 46f74bebcfd9..09ae5bf5bda7 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for POWER6 18 * Bits in event code for POWER6
@@ -41,9 +43,9 @@
41#define MMCR1_NESTSEL_SH 45 43#define MMCR1_NESTSEL_SH 45
42#define MMCR1_NESTSEL_MSK 0x7 44#define MMCR1_NESTSEL_MSK 0x7
43#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) 45#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
44#define MMCR1_PMC1_LLA ((u64)1 << 44) 46#define MMCR1_PMC1_LLA (1ul << 44)
45#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) 47#define MMCR1_PMC1_LLA_VALUE (1ul << 39)
46#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) 48#define MMCR1_PMC1_ADDR_SEL (1ul << 35)
47#define MMCR1_PMC1SEL_SH 24 49#define MMCR1_PMC1SEL_SH 24
48#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) 50#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49#define MMCR1_PMCSEL_MSK 0xff 51#define MMCR1_PMCSEL_MSK 0xff
@@ -173,10 +175,10 @@ static int power6_marked_instr_event(u64 event)
173 * Assign PMC numbers and compute MMCR1 value for a set of events 175 * Assign PMC numbers and compute MMCR1 value for a set of events
174 */ 176 */
175static int p6_compute_mmcr(u64 event[], int n_ev, 177static int p6_compute_mmcr(u64 event[], int n_ev,
176 unsigned int hwc[], u64 mmcr[]) 178 unsigned int hwc[], unsigned long mmcr[])
177{ 179{
178 u64 mmcr1 = 0; 180 unsigned long mmcr1 = 0;
179 u64 mmcra = 0; 181 unsigned long mmcra = 0;
180 int i; 182 int i;
181 unsigned int pmc, ev, b, u, s, psel; 183 unsigned int pmc, ev, b, u, s, psel;
182 unsigned int ttmset = 0; 184 unsigned int ttmset = 0;
@@ -215,7 +217,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
215 /* check for conflict on this byte of event bus */ 217 /* check for conflict on this byte of event bus */
216 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) 218 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
217 return -1; 219 return -1;
218 mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); 220 mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
219 ttmset |= 1 << b; 221 ttmset |= 1 << b;
220 if (u == 5) { 222 if (u == 5) {
221 /* Nest events have a further mux */ 223 /* Nest events have a further mux */
@@ -224,7 +226,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
224 MMCR1_NESTSEL(mmcr1) != s) 226 MMCR1_NESTSEL(mmcr1) != s)
225 return -1; 227 return -1;
226 ttmset |= 0x10; 228 ttmset |= 0x10;
227 mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; 229 mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
228 } 230 }
229 if (0x30 <= psel && psel <= 0x3d) { 231 if (0x30 <= psel && psel <= 0x3d) {
230 /* these need the PMCx_ADDR_SEL bits */ 232 /* these need the PMCx_ADDR_SEL bits */
@@ -243,7 +245,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
243 if (power6_marked_instr_event(event[i])) 245 if (power6_marked_instr_event(event[i]))
244 mmcra |= MMCRA_SAMPLE_ENABLE; 246 mmcra |= MMCRA_SAMPLE_ENABLE;
245 if (pmc < 4) 247 if (pmc < 4)
246 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); 248 mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
247 } 249 }
248 mmcr[0] = 0; 250 mmcr[0] = 0;
249 if (pmc_inuse & 1) 251 if (pmc_inuse & 1)
@@ -265,10 +267,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
265 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 267 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
266 * 32-34 select field: nest (subunit) event selector 268 * 32-34 select field: nest (subunit) event selector
267 */ 269 */
268static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) 270static int p6_get_constraint(u64 event, unsigned long *maskp,
271 unsigned long *valp)
269{ 272{
270 int pmc, byte, sh, subunit; 273 int pmc, byte, sh, subunit;
271 u64 mask = 0, value = 0; 274 unsigned long mask = 0, value = 0;
272 275
273 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 276 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
274 if (pmc) { 277 if (pmc) {
@@ -282,11 +285,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
282 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 285 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
283 sh = byte * 4 + (16 - PM_UNIT_SH); 286 sh = byte * 4 + (16 - PM_UNIT_SH);
284 mask |= PM_UNIT_MSKS << sh; 287 mask |= PM_UNIT_MSKS << sh;
285 value |= (u64)(event & PM_UNIT_MSKS) << sh; 288 value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
286 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { 289 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
287 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; 290 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
288 mask |= (u64)PM_SUBUNIT_MSK << 32; 291 mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
289 value |= (u64)subunit << 32; 292 value |= (unsigned long)subunit << 32;
290 } 293 }
291 } 294 }
292 if (pmc <= 4) { 295 if (pmc <= 4) {
@@ -458,7 +461,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
458 return nalt; 461 return nalt;
459} 462}
460 463
461static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) 464static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
462{ 465{
463 /* Set PMCxSEL to 0 to disable PMCx */ 466 /* Set PMCxSEL to 0 to disable PMCx */
464 if (pmc <= 3) 467 if (pmc <= 3)
@@ -515,18 +518,29 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
515 }, 518 },
516}; 519};
517 520
518struct power_pmu power6_pmu = { 521static struct power_pmu power6_pmu = {
519 .n_counter = 6, 522 .name = "POWER6",
520 .max_alternatives = MAX_ALT, 523 .n_counter = 6,
521 .add_fields = 0x1555, 524 .max_alternatives = MAX_ALT,
522 .test_adder = 0x3000, 525 .add_fields = 0x1555,
523 .compute_mmcr = p6_compute_mmcr, 526 .test_adder = 0x3000,
524 .get_constraint = p6_get_constraint, 527 .compute_mmcr = p6_compute_mmcr,
525 .get_alternatives = p6_get_alternatives, 528 .get_constraint = p6_get_constraint,
526 .disable_pmc = p6_disable_pmc, 529 .get_alternatives = p6_get_alternatives,
527 .limited_pmc_event = p6_limited_pmc_event, 530 .disable_pmc = p6_disable_pmc,
528 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, 531 .limited_pmc_event = p6_limited_pmc_event,
529 .n_generic = ARRAY_SIZE(power6_generic_events), 532 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
530 .generic_events = power6_generic_events, 533 .n_generic = ARRAY_SIZE(power6_generic_events),
531 .cache_events = &power6_cache_events, 534 .generic_events = power6_generic_events,
535 .cache_events = &power6_cache_events,
532}; 536};
537
538static int init_power6_pmu(void)
539{
540 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
541 return -ENODEV;
542
543 return register_power_pmu(&power6_pmu);
544}
545
546arch_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index b72e7a19d054..5d755ef7ac8f 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for POWER7 18 * Bits in event code for POWER7
@@ -71,10 +73,11 @@
71 * 0-9: Count of events needing PMC1..PMC5 73 * 0-9: Count of events needing PMC1..PMC5
72 */ 74 */
73 75
74static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) 76static int power7_get_constraint(u64 event, unsigned long *maskp,
77 unsigned long *valp)
75{ 78{
76 int pmc, sh; 79 int pmc, sh;
77 u64 mask = 0, value = 0; 80 unsigned long mask = 0, value = 0;
78 81
79 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 82 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
80 if (pmc) { 83 if (pmc) {
@@ -224,10 +227,10 @@ static int power7_marked_instr_event(u64 event)
224} 227}
225 228
226static int power7_compute_mmcr(u64 event[], int n_ev, 229static int power7_compute_mmcr(u64 event[], int n_ev,
227 unsigned int hwc[], u64 mmcr[]) 230 unsigned int hwc[], unsigned long mmcr[])
228{ 231{
229 u64 mmcr1 = 0; 232 unsigned long mmcr1 = 0;
230 u64 mmcra = 0; 233 unsigned long mmcra = 0;
231 unsigned int pmc, unit, combine, l2sel, psel; 234 unsigned int pmc, unit, combine, l2sel, psel;
232 unsigned int pmc_inuse = 0; 235 unsigned int pmc_inuse = 0;
233 int i; 236 int i;
@@ -265,11 +268,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
265 --pmc; 268 --pmc;
266 } 269 }
267 if (pmc <= 3) { 270 if (pmc <= 3) {
268 mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); 271 mmcr1 |= (unsigned long) unit
269 mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); 272 << (MMCR1_TTM0SEL_SH - 4 * pmc);
273 mmcr1 |= (unsigned long) combine
274 << (MMCR1_PMC1_COMBINE_SH - pmc);
270 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); 275 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
271 if (unit == 6) /* L2 events */ 276 if (unit == 6) /* L2 events */
272 mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; 277 mmcr1 |= (unsigned long) l2sel
278 << MMCR1_L2SEL_SH;
273 } 279 }
274 if (power7_marked_instr_event(event[i])) 280 if (power7_marked_instr_event(event[i]))
275 mmcra |= MMCRA_SAMPLE_ENABLE; 281 mmcra |= MMCRA_SAMPLE_ENABLE;
@@ -287,10 +293,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
287 return 0; 293 return 0;
288} 294}
289 295
290static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) 296static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
291{ 297{
292 if (pmc <= 3) 298 if (pmc <= 3)
293 mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); 299 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
294} 300}
295 301
296static int power7_generic_events[] = { 302static int power7_generic_events[] = {
@@ -342,16 +348,27 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
342 }, 348 },
343}; 349};
344 350
345struct power_pmu power7_pmu = { 351static struct power_pmu power7_pmu = {
346 .n_counter = 6, 352 .name = "POWER7",
347 .max_alternatives = MAX_ALT + 1, 353 .n_counter = 6,
348 .add_fields = 0x1555ull, 354 .max_alternatives = MAX_ALT + 1,
349 .test_adder = 0x3000ull, 355 .add_fields = 0x1555ul,
350 .compute_mmcr = power7_compute_mmcr, 356 .test_adder = 0x3000ul,
351 .get_constraint = power7_get_constraint, 357 .compute_mmcr = power7_compute_mmcr,
352 .get_alternatives = power7_get_alternatives, 358 .get_constraint = power7_get_constraint,
353 .disable_pmc = power7_disable_pmc, 359 .get_alternatives = power7_get_alternatives,
354 .n_generic = ARRAY_SIZE(power7_generic_events), 360 .disable_pmc = power7_disable_pmc,
355 .generic_events = power7_generic_events, 361 .n_generic = ARRAY_SIZE(power7_generic_events),
356 .cache_events = &power7_cache_events, 362 .generic_events = power7_generic_events,
363 .cache_events = &power7_cache_events,
357}; 364};
365
366static int init_power7_pmu(void)
367{
368 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
369 return -ENODEV;
370
371 return register_power_pmu(&power7_pmu);
372}
373
374arch_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index ba0a357a89f4..6637c87fe70e 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -10,7 +10,9 @@
10 */ 10 */
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/perf_counter.h> 12#include <linux/perf_counter.h>
13#include <linux/string.h>
13#include <asm/reg.h> 14#include <asm/reg.h>
15#include <asm/cputable.h>
14 16
15/* 17/*
16 * Bits in event code for PPC970 18 * Bits in event code for PPC970
@@ -183,7 +185,7 @@ static int p970_marked_instr_event(u64 event)
183} 185}
184 186
185/* Masks and values for using events from the various units */ 187/* Masks and values for using events from the various units */
186static u64 unit_cons[PM_LASTUNIT+1][2] = { 188static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
187 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, 189 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
188 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, 190 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
189 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, 191 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
@@ -192,10 +194,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = {
192 [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, 194 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
193}; 195};
194 196
195static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) 197static int p970_get_constraint(u64 event, unsigned long *maskp,
198 unsigned long *valp)
196{ 199{
197 int pmc, byte, unit, sh, spcsel; 200 int pmc, byte, unit, sh, spcsel;
198 u64 mask = 0, value = 0; 201 unsigned long mask = 0, value = 0;
199 int grp = -1; 202 int grp = -1;
200 203
201 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 204 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
@@ -222,7 +225,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
222 grp = byte & 1; 225 grp = byte & 1;
223 /* Set byte lane select field */ 226 /* Set byte lane select field */
224 mask |= 0xfULL << (28 - 4 * byte); 227 mask |= 0xfULL << (28 - 4 * byte);
225 value |= (u64)unit << (28 - 4 * byte); 228 value |= (unsigned long)unit << (28 - 4 * byte);
226 } 229 }
227 if (grp == 0) { 230 if (grp == 0) {
228 /* increment PMC1/2/5/6 field */ 231 /* increment PMC1/2/5/6 field */
@@ -236,7 +239,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
236 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; 239 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
237 if (spcsel) { 240 if (spcsel) {
238 mask |= 3ull << 48; 241 mask |= 3ull << 48;
239 value |= (u64)spcsel << 48; 242 value |= (unsigned long)spcsel << 48;
240 } 243 }
241 *maskp = mask; 244 *maskp = mask;
242 *valp = value; 245 *valp = value;
@@ -257,9 +260,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
257} 260}
258 261
259static int p970_compute_mmcr(u64 event[], int n_ev, 262static int p970_compute_mmcr(u64 event[], int n_ev,
260 unsigned int hwc[], u64 mmcr[]) 263 unsigned int hwc[], unsigned long mmcr[])
261{ 264{
262 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; 265 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
263 unsigned int pmc, unit, byte, psel; 266 unsigned int pmc, unit, byte, psel;
264 unsigned int ttm, grp; 267 unsigned int ttm, grp;
265 unsigned int pmc_inuse = 0; 268 unsigned int pmc_inuse = 0;
@@ -320,7 +323,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
320 continue; 323 continue;
321 ttm = unitmap[i]; 324 ttm = unitmap[i];
322 ++ttmuse[(ttm >> 2) & 1]; 325 ++ttmuse[(ttm >> 2) & 1];
323 mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; 326 mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
324 } 327 }
325 /* Check only one unit per TTMx */ 328 /* Check only one unit per TTMx */
326 if (ttmuse[0] > 1 || ttmuse[1] > 1) 329 if (ttmuse[0] > 1 || ttmuse[1] > 1)
@@ -340,7 +343,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
340 if (unit == PM_LSU1L && byte >= 2) 343 if (unit == PM_LSU1L && byte >= 2)
341 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); 344 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
342 } 345 }
343 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 346 mmcr1 |= (unsigned long)ttm
347 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
344 } 348 }
345 349
346 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 350 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
@@ -386,7 +390,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
386 for (pmc = 0; pmc < 2; ++pmc) 390 for (pmc = 0; pmc < 2; ++pmc)
387 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); 391 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
388 for (; pmc < 8; ++pmc) 392 for (; pmc < 8; ++pmc)
389 mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); 393 mmcr1 |= (unsigned long)pmcsel[pmc]
394 << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
390 if (pmc_inuse & 1) 395 if (pmc_inuse & 1)
391 mmcr0 |= MMCR0_PMC1CE; 396 mmcr0 |= MMCR0_PMC1CE;
392 if (pmc_inuse & 0xfe) 397 if (pmc_inuse & 0xfe)
@@ -401,7 +406,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
401 return 0; 406 return 0;
402} 407}
403 408
404static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) 409static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
405{ 410{
406 int shift, i; 411 int shift, i;
407 412
@@ -467,16 +472,28 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
467 }, 472 },
468}; 473};
469 474
470struct power_pmu ppc970_pmu = { 475static struct power_pmu ppc970_pmu = {
471 .n_counter = 8, 476 .name = "PPC970/FX/MP",
472 .max_alternatives = 2, 477 .n_counter = 8,
473 .add_fields = 0x001100005555ull, 478 .max_alternatives = 2,
474 .test_adder = 0x013300000000ull, 479 .add_fields = 0x001100005555ull,
475 .compute_mmcr = p970_compute_mmcr, 480 .test_adder = 0x013300000000ull,
476 .get_constraint = p970_get_constraint, 481 .compute_mmcr = p970_compute_mmcr,
477 .get_alternatives = p970_get_alternatives, 482 .get_constraint = p970_get_constraint,
478 .disable_pmc = p970_disable_pmc, 483 .get_alternatives = p970_get_alternatives,
479 .n_generic = ARRAY_SIZE(ppc970_generic_events), 484 .disable_pmc = p970_disable_pmc,
480 .generic_events = ppc970_generic_events, 485 .n_generic = ARRAY_SIZE(ppc970_generic_events),
481 .cache_events = &ppc970_cache_events, 486 .generic_events = ppc970_generic_events,
487 .cache_events = &ppc970_cache_events,
482}; 488};
489
490static int init_ppc970_pmu(void)
491{
492 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
493 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))
494 return -ENODEV;
495
496 return register_power_pmu(&ppc970_pmu);
497}
498
499arch_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 15391c2ab013..eae4511ceeac 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,6 +53,7 @@
53#include <linux/posix-timers.h> 53#include <linux/posix-timers.h>
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h> 55#include <linux/delay.h>
56#include <linux/perf_counter.h>
56 57
57#include <asm/io.h> 58#include <asm/io.h>
58#include <asm/processor.h> 59#include <asm/processor.h>
@@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void)
525} 526}
526#endif /* CONFIG_PPC_ISERIES */ 527#endif /* CONFIG_PPC_ISERIES */
527 528
529#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
530DEFINE_PER_CPU(u8, perf_counter_pending);
531
532void set_perf_counter_pending(void)
533{
534 get_cpu_var(perf_counter_pending) = 1;
535 set_dec(1);
536 put_cpu_var(perf_counter_pending);
537}
538
539#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending)
540#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0
541
542#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
543
544#define test_perf_counter_pending() 0
545#define clear_perf_counter_pending()
546
547#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
548
528/* 549/*
529 * For iSeries shared processors, we have to let the hypervisor 550 * For iSeries shared processors, we have to let the hypervisor
530 * set the hardware decrementer. We set a virtual decrementer 551 * set the hardware decrementer. We set a virtual decrementer
@@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs)
551 set_dec(DECREMENTER_MAX); 572 set_dec(DECREMENTER_MAX);
552 573
553#ifdef CONFIG_PPC32 574#ifdef CONFIG_PPC32
575 if (test_perf_counter_pending()) {
576 clear_perf_counter_pending();
577 perf_counter_do_pending();
578 }
554 if (atomic_read(&ppc_n_lost_interrupts) != 0) 579 if (atomic_read(&ppc_n_lost_interrupts) != 0)
555 do_IRQ(regs); 580 do_IRQ(regs);
556#endif 581#endif
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c4192542b809..61187bec7506 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,7 +1,7 @@
1config PPC64 1config PPC64
2 bool "64-bit kernel" 2 bool "64-bit kernel"
3 default n 3 default n
4 select HAVE_PERF_COUNTERS 4 select PPC_HAVE_PMU_SUPPORT
5 help 5 help
6 This option selects whether a 32-bit or a 64-bit kernel 6 This option selects whether a 32-bit or a 64-bit kernel
7 will be built. 7 will be built.
@@ -78,6 +78,7 @@ config POWER4_ONLY
78config 6xx 78config 6xx
79 def_bool y 79 def_bool y
80 depends on PPC32 && PPC_BOOK3S 80 depends on PPC32 && PPC_BOOK3S
81 select PPC_HAVE_PMU_SUPPORT
81 82
82config POWER3 83config POWER3
83 bool 84 bool
@@ -246,6 +247,15 @@ config VIRT_CPU_ACCOUNTING
246 247
247 If in doubt, say Y here. 248 If in doubt, say Y here.
248 249
250config PPC_HAVE_PMU_SUPPORT
251 bool
252
253config PPC_PERF_CTRS
254 def_bool y
255 depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT
256 help
257 This enables the powerpc-specific perf_counter back-end.
258
249config SMP 259config SMP
250 depends on PPC_STD_MMU || FSL_BOOKE 260 depends on PPC_STD_MMU || FSL_BOOKE
251 bool "Symmetric multi-processing support" 261 bool "Symmetric multi-processing support"
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b3..5fb33e160ea0 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86 86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS 87#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void); 88extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(void); 89extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 31bd120cf2a2..01fd9461d323 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
49#endif 49#endif
50 50
51#if defined(CONFIG_HIGHPTE) 51#if defined(CONFIG_HIGHPTE)
52#define __KM_PTE \
53 (in_nmi() ? KM_NMI_PTE : \
54 in_irq() ? KM_IRQ_PTE : \
55 KM_PTE0)
52#define pte_offset_map(dir, address) \ 56#define pte_offset_map(dir, address) \
53 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ 57 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
54 pte_index((address))) 58 pte_index((address)))
55#define pte_offset_map_nested(dir, address) \ 59#define pte_offset_map_nested(dir, address) \
56 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ 60 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
57 pte_index((address))) 61 pte_index((address)))
58#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) 62#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
59#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) 63#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
60#else 64#else
61#define pte_offset_map(dir, address) \ 65#define pte_offset_map(dir, address) \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b685ece89d5c..512ee87062c2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,12 @@
25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) 25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26 26
27#define KERNEL_DS MAKE_MM_SEG(-1UL) 27#define KERNEL_DS MAKE_MM_SEG(-1UL)
28#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) 28
29#ifdef CONFIG_X86_32
30# define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
31#else
32# define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK)
33#endif
29 34
30#define get_ds() (KERNEL_DS) 35#define get_ds() (KERNEL_DS)
31#define get_fs() (current_thread_info()->addr_limit) 36#define get_fs() (current_thread_info()->addr_limit)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..76dfef23f789 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19#include <linux/kdebug.h> 19#include <linux/kdebug.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/highmem.h>
22 23
23#include <asm/apic.h> 24#include <asm/apic.h>
24#include <asm/stacktrace.h> 25#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
389 return event & CORE_EVNTSEL_MASK; 390 return event & CORE_EVNTSEL_MASK;
390} 391}
391 392
392static const u64 amd_0f_hw_cache_event_ids 393static const u64 amd_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX] 394 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX] 395 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 396 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{ 397{
397 [ C(L1D) ] = { 398 [ C(L1D) ] = {
398 [ C(OP_READ) ] = { 399 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0, 400 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400 [ C(RESULT_MISS) ] = 0, 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401 }, 402 },
402 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0, 404 [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */
404 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
405 }, 406 },
406 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0, 408 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408 [ C(RESULT_MISS) ] = 0, 409 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409 }, 410 },
410 }, 411 },
411 [ C(L1I ) ] = { 412 [ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
418 [ C(RESULT_MISS) ] = -1, 419 [ C(RESULT_MISS) ] = -1,
419 }, 420 },
420 [ C(OP_PREFETCH) ] = { 421 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0, 422 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422 [ C(RESULT_MISS) ] = 0, 423 [ C(RESULT_MISS) ] = 0,
423 }, 424 },
424 }, 425 },
425 [ C(LL ) ] = { 426 [ C(LL ) ] = {
426 [ C(OP_READ) ] = { 427 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0, 428 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428 [ C(RESULT_MISS) ] = 0, 429 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429 }, 430 },
430 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0, 432 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432 [ C(RESULT_MISS) ] = 0, 433 [ C(RESULT_MISS) ] = 0,
433 }, 434 },
434 [ C(OP_PREFETCH) ] = { 435 [ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
438 }, 439 },
439 [ C(DTLB) ] = { 440 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = { 441 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0, 442 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442 [ C(RESULT_MISS) ] = 0, 443 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443 }, 444 },
444 [ C(OP_WRITE) ] = { 445 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0, 446 [ C(RESULT_ACCESS) ] = 0,
@@ -1223,6 +1224,8 @@ again:
1223 if (!intel_pmu_save_and_restart(counter)) 1224 if (!intel_pmu_save_and_restart(counter))
1224 continue; 1225 continue;
1225 1226
1227 data.period = counter->hw.last_period;
1228
1226 if (perf_counter_overflow(counter, 1, &data)) 1229 if (perf_counter_overflow(counter, 1, &data))
1227 intel_pmu_disable_counter(&counter->hw, bit); 1230 intel_pmu_disable_counter(&counter->hw, bit);
1228 } 1231 }
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void)
1459 1462
1460static int amd_pmu_init(void) 1463static int amd_pmu_init(void)
1461{ 1464{
1465 /* Performance-monitoring supported from K7 and later: */
1466 if (boot_cpu_data.x86 < 6)
1467 return -ENODEV;
1468
1462 x86_pmu = amd_pmu; 1469 x86_pmu = amd_pmu;
1463 1470
1464 switch (boot_cpu_data.x86) { 1471 /* Events are common for all AMDs */
1465 case 0x0f: 1472 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466 case 0x10: 1473 sizeof(hw_cache_event_ids));
1467 case 0x11:
1468 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469 sizeof(hw_cache_event_ids));
1470 1474
1471 pr_cont("AMD Family 0f/10/11 events, ");
1472 break;
1473 }
1474 return 0; 1475 return 0;
1475} 1476}
1476 1477
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1554 */ 1555 */
1555 1556
1556static inline 1557static inline
1557void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) 1558void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558{ 1559{
1559 if (entry->nr < MAX_STACK_DEPTH) 1560 if (entry->nr < PERF_MAX_STACK_DEPTH)
1560 entry->ip[entry->nr++] = ip; 1561 entry->ip[entry->nr++] = ip;
1561} 1562}
1562 1563
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg)
1577 1578
1578static int backtrace_stack(void *data, char *name) 1579static int backtrace_stack(void *data, char *name)
1579{ 1580{
1580 /* Don't bother with IRQ stacks for now */ 1581 /* Process all stacks: */
1581 return -1; 1582 return 0;
1582} 1583}
1583 1584
1584static void backtrace_address(void *data, unsigned long addr, int reliable) 1585static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596 .address = backtrace_address, 1597 .address = backtrace_address,
1597}; 1598};
1598 1599
1600#include "../dumpstack.h"
1601
1599static void 1602static void
1600perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1603perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1601{ 1604{
1602 unsigned long bp; 1605 callchain_store(entry, PERF_CONTEXT_KERNEL);
1603 char *stack; 1606 callchain_store(entry, regs->ip);
1604 int nr = entry->nr;
1605 1607
1606 callchain_store(entry, instruction_pointer(regs)); 1608 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1609}
1607 1610
1608 stack = ((char *)regs + sizeof(struct pt_regs)); 1611/*
1609#ifdef CONFIG_FRAME_POINTER 1612 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610 bp = frame_pointer(regs); 1613 */
1611#else 1614static unsigned long
1612 bp = 0; 1615copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1613#endif 1616{
1617 unsigned long offset, addr = (unsigned long)from;
1618 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1619 unsigned long size, len = 0;
1620 struct page *page;
1621 void *map;
1622 int ret;
1614 1623
1615 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); 1624 do {
1625 ret = __get_user_pages_fast(addr, 1, 0, &page);
1626 if (!ret)
1627 break;
1616 1628
1617 entry->kernel = entry->nr - nr; 1629 offset = addr & (PAGE_SIZE - 1);
1618} 1630 size = min(PAGE_SIZE - offset, n - len);
1619 1631
1632 map = kmap_atomic(page, type);
1633 memcpy(to, map+offset, size);
1634 kunmap_atomic(map, type);
1635 put_page(page);
1620 1636
1621struct stack_frame { 1637 len += size;
1622 const void __user *next_fp; 1638 to += size;
1623 unsigned long return_address; 1639 addr += size;
1624}; 1640
1641 } while (len < n);
1642
1643 return len;
1644}
1625 1645
1626static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1646static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1627{ 1647{
1628 int ret; 1648 unsigned long bytes;
1629 1649
1630 if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) 1650 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1631 return 0;
1632 1651
1633 ret = 1; 1652 return bytes == sizeof(*frame);
1634 pagefault_disable();
1635 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636 ret = 0;
1637 pagefault_enable();
1638
1639 return ret;
1640} 1653}
1641 1654
1642static void 1655static void
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1644{ 1657{
1645 struct stack_frame frame; 1658 struct stack_frame frame;
1646 const void __user *fp; 1659 const void __user *fp;
1647 int nr = entry->nr;
1648 1660
1649 regs = (struct pt_regs *)current->thread.sp0 - 1; 1661 if (!user_mode(regs))
1650 fp = (void __user *)regs->bp; 1662 regs = task_pt_regs(current);
1651 1663
1664 fp = (void __user *)regs->bp;
1665
1666 callchain_store(entry, PERF_CONTEXT_USER);
1652 callchain_store(entry, regs->ip); 1667 callchain_store(entry, regs->ip);
1653 1668
1654 while (entry->nr < MAX_STACK_DEPTH) { 1669 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655 frame.next_fp = NULL; 1670 frame.next_frame = NULL;
1656 frame.return_address = 0; 1671 frame.return_address = 0;
1657 1672
1658 if (!copy_stack_frame(fp, &frame)) 1673 if (!copy_stack_frame(fp, &frame))
1659 break; 1674 break;
1660 1675
1661 if ((unsigned long)fp < user_stack_pointer(regs)) 1676 if ((unsigned long)fp < regs->sp)
1662 break; 1677 break;
1663 1678
1664 callchain_store(entry, frame.return_address); 1679 callchain_store(entry, frame.return_address);
1665 fp = frame.next_fp; 1680 fp = frame.next_frame;
1666 } 1681 }
1667
1668 entry->user = entry->nr - nr;
1669} 1682}
1670 1683
1671static void 1684static void
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1701 entry = &__get_cpu_var(irq_entry); 1714 entry = &__get_cpu_var(irq_entry);
1702 1715
1703 entry->nr = 0; 1716 entry->nr = 0;
1704 entry->hv = 0;
1705 entry->kernel = 0;
1706 entry->user = 0;
1707 1717
1708 perf_do_callchain(regs, entry); 1718 perf_do_callchain(regs, entry);
1709 1719
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index f97480941269..71da1bca13cb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
14static inline pte_t gup_get_pte(pte_t *ptep) 14static inline pte_t gup_get_pte(pte_t *ptep)
15{ 15{
16#ifndef CONFIG_X86_PAE 16#ifndef CONFIG_X86_PAE
17 return *ptep; 17 return ACCESS_ONCE(*ptep);
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219 return 1; 219 return 1;
220} 220}
221 221
222/*
223 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
224 * back to the regular GUP.
225 */
226int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
227 struct page **pages)
228{
229 struct mm_struct *mm = current->mm;
230 unsigned long addr, len, end;
231 unsigned long next;
232 unsigned long flags;
233 pgd_t *pgdp;
234 int nr = 0;
235
236 start &= PAGE_MASK;
237 addr = start;
238 len = (unsigned long) nr_pages << PAGE_SHIFT;
239 end = start + len;
240 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
241 (void __user *)start, len)))
242 return 0;
243
244 /*
245 * XXX: batch / limit 'nr', to avoid large irq off latency
246 * needs some instrumenting to determine the common sizes used by
247 * important workloads (eg. DB2), and whether limiting the batch size
248 * will decrease performance.
249 *
250 * It seems like we're in the clear for the moment. Direct-IO is
251 * the main guy that batches up lots of get_user_pages, and even
252 * they are limited to 64-at-a-time which is not so many.
253 */
254 /*
255 * This doesn't prevent pagetable teardown, but does prevent
256 * the pagetables and pages from being freed on x86.
257 *
258 * So long as we atomically load page table pointers versus teardown
259 * (which we do on x86, with the above PAE exception), we can follow the
260 * address down to the the page and take a ref on it.
261 */
262 local_irq_save(flags);
263 pgdp = pgd_offset(mm, addr);
264 do {
265 pgd_t pgd = *pgdp;
266
267 next = pgd_addr_end(addr, end);
268 if (pgd_none(pgd))
269 break;
270 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
271 break;
272 } while (pgdp++, addr = next, addr != end);
273 local_irq_restore(flags);
274
275 return nr;
276}
277
222/** 278/**
223 * get_user_pages_fast() - pin user pages in memory 279 * get_user_pages_fast() - pin user pages in memory
224 * @start: starting user address 280 * @start: starting user address