aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/hw_irq.h39
-rw-r--r--arch/powerpc/include/asm/paca.h1
-rw-r--r--arch/powerpc/include/asm/perf_counter.h95
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/systbl.h2
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/entry_64.S9
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/perf_counter.c1214
-rw-r--r--arch/powerpc/kernel/power4-pmu.c557
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c630
-rw-r--r--arch/powerpc/kernel/power5-pmu.c570
-rw-r--r--arch/powerpc/kernel/power6-pmu.c490
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c441
-rw-r--r--arch/powerpc/mm/fault.c10
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/include/asm/atomic_32.h236
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/intel_arch_perfmon.h31
-rw-r--r--arch/x86/include/asm/irq_vectors.h8
-rw-r--r--arch/x86/include/asm/perf_counter.h100
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h5
-rw-r--r--arch/x86/kernel/apic/apic.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile12
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c1417
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irq.c10
-rw-r--r--arch/x86/kernel/irqinit_32.c59
-rw-r--r--arch/x86/kernel/irqinit_64.c12
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/traps.c15
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c7
-rw-r--r--arch/x86/oprofile/op_model_ppro.c10
-rw-r--r--arch/x86/vdso/vdso32-setup.c6
-rw-r--r--arch/x86/vdso/vma.c7
46 files changed, 5955 insertions, 92 deletions
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b7e034b0a6dd..20a44d0c9fdd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags)
131 */ 131 */
132struct irq_chip; 132struct irq_chip;
133 133
134#ifdef CONFIG_PERF_COUNTERS
135static inline unsigned long test_perf_counter_pending(void)
136{
137 unsigned long x;
138
139 asm volatile("lbz %0,%1(13)"
140 : "=r" (x)
141 : "i" (offsetof(struct paca_struct, perf_counter_pending)));
142 return x;
143}
144
145static inline void set_perf_counter_pending(void)
146{
147 asm volatile("stb %0,%1(13)" : :
148 "r" (1),
149 "i" (offsetof(struct paca_struct, perf_counter_pending)));
150}
151
152static inline void clear_perf_counter_pending(void)
153{
154 asm volatile("stb %0,%1(13)" : :
155 "r" (0),
156 "i" (offsetof(struct paca_struct, perf_counter_pending)));
157}
158
159extern void perf_counter_do_pending(void);
160
161#else
162
163static inline unsigned long test_perf_counter_pending(void)
164{
165 return 0;
166}
167
168static inline void set_perf_counter_pending(void) {}
169static inline void clear_perf_counter_pending(void) {}
170static inline void perf_counter_do_pending(void) {}
171#endif /* CONFIG_PERF_COUNTERS */
172
134#endif /* __KERNEL__ */ 173#endif /* __KERNEL__ */
135#endif /* _ASM_POWERPC_HW_IRQ_H */ 174#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 082b3aedf145..6ef055723019 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -99,6 +99,7 @@ struct paca_struct {
99 u8 soft_enabled; /* irq soft-enable flag */ 99 u8 soft_enabled; /* irq soft-enable flag */
100 u8 hard_enabled; /* set if irqs are enabled in MSR */ 100 u8 hard_enabled; /* set if irqs are enabled in MSR */
101 u8 io_sync; /* writel() needs spin_unlock sync */ 101 u8 io_sync; /* writel() needs spin_unlock sync */
102 u8 perf_counter_pending; /* PM interrupt while soft-disabled */
102 103
103 /* Stuff for accurate time accounting */ 104 /* Stuff for accurate time accounting */
104 u64 user_time; /* accumulated usermode TB ticks */ 105 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
new file mode 100644
index 000000000000..1c60f0ca7920
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -0,0 +1,95 @@
1/*
2 * Performance counter support - PowerPC-specific definitions.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/types.h>
12
13#define MAX_HWCOUNTERS 8
14#define MAX_EVENT_ALTERNATIVES 8
15#define MAX_LIMITED_HWCOUNTERS 2
16
17/*
18 * This struct provides the constants and functions needed to
19 * describe the PMU on a particular POWER-family CPU.
20 */
21struct power_pmu {
22 int n_counter;
23 int max_alternatives;
24 u64 add_fields;
25 u64 test_adder;
26 int (*compute_mmcr)(u64 events[], int n_ev,
27 unsigned int hwc[], u64 mmcr[]);
28 int (*get_constraint)(u64 event, u64 *mskp, u64 *valp);
29 int (*get_alternatives)(u64 event, unsigned int flags,
30 u64 alt[]);
31 void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
32 int (*limited_pmc_event)(u64 event);
33 u32 flags;
34 int n_generic;
35 int *generic_events;
36};
37
38extern struct power_pmu *ppmu;
39
40/*
41 * Values for power_pmu.flags
42 */
43#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
44#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
45
46/*
47 * Values for flags to get_alternatives()
48 */
49#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
50#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
51#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
52
53struct pt_regs;
54extern unsigned long perf_misc_flags(struct pt_regs *regs);
55#define perf_misc_flags(regs) perf_misc_flags(regs)
56
57extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
58
59/*
60 * The power_pmu.get_constraint function returns a 64-bit value and
61 * a 64-bit mask that express the constraints between this event and
62 * other events.
63 *
64 * The value and mask are divided up into (non-overlapping) bitfields
65 * of three different types:
66 *
67 * Select field: this expresses the constraint that some set of bits
68 * in MMCR* needs to be set to a specific value for this event. For a
69 * select field, the mask contains 1s in every bit of the field, and
70 * the value contains a unique value for each possible setting of the
71 * MMCR* bits. The constraint checking code will ensure that two events
72 * that set the same field in their masks have the same value in their
73 * value dwords.
74 *
75 * Add field: this expresses the constraint that there can be at most
76 * N events in a particular class. A field of k bits can be used for
77 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
78 * set (and the other bits 0), and the value has only the least significant
79 * bit of the field set. In addition, the 'add_fields' and 'test_adder'
80 * in the struct power_pmu for this processor come into play. The
81 * add_fields value contains 1 in the LSB of the field, and the
82 * test_adder contains 2^(k-1) - 1 - N in the field.
83 *
84 * NAND field: this expresses the constraint that you may not have events
85 * in all of a set of classes. (For example, on PPC970, you can't select
86 * events from the FPU, ISU and IDU simultaneously, although any two are
87 * possible.) For N classes, the field is N+1 bits wide, and each class
88 * is assigned one bit from the least-significant N bits. The mask has
89 * only the most-significant bit set, and the value has only the bit
90 * for the event's class set. The test_adder has the least significant
91 * bit set in the field.
92 *
93 * If an event is not subject to the constraint expressed by a particular
94 * field, then it will have 0 in both the mask and value for that field.
95 */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e8018d540e87..fb359b0a6937 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -492,11 +492,13 @@
492#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ 492#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
493#define SPRN_MMCR1 798 493#define SPRN_MMCR1 798
494#define SPRN_MMCRA 0x312 494#define SPRN_MMCRA 0x312
495#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
495#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ 496#define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
496#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ 497#define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
497#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ 498#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
498#define MMCRA_SLOT_SHIFT 24 499#define MMCRA_SLOT_SHIFT 24
499#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ 500#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
501#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
500#define POWER6_MMCRA_SIHV 0x0000040000000000ULL 502#define POWER6_MMCRA_SIHV 0x0000040000000000ULL
501#define POWER6_MMCRA_SIPR 0x0000020000000000ULL 503#define POWER6_MMCRA_SIPR 0x0000020000000000ULL
502#define POWER6_MMCRA_THRM 0x00000020UL 504#define POWER6_MMCRA_THRM 0x00000020UL
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d98a30dfd41c..a0b92de51c7e 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1)
322SYSCALL_SPU(dup3) 322SYSCALL_SPU(dup3)
323SYSCALL_SPU(pipe2) 323SYSCALL_SPU(pipe2)
324SYSCALL(inotify_init1) 324SYSCALL(inotify_init1)
325SYSCALL(ni_syscall) 325SYSCALL_SPU(perf_counter_open)
326COMPAT_SYS_SPU(preadv) 326COMPAT_SYS_SPU(preadv)
327COMPAT_SYS_SPU(pwritev) 327COMPAT_SYS_SPU(pwritev)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3f06f8ec81c5..4badac2d11d1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,6 +341,7 @@
341#define __NR_dup3 316 341#define __NR_dup3 316
342#define __NR_pipe2 317 342#define __NR_pipe2 317
343#define __NR_inotify_init1 318 343#define __NR_inotify_init1 318
344#define __NR_perf_counter_open 319
344#define __NR_preadv 320 345#define __NR_preadv 320
345#define __NR_pwritev 321 346#define __NR_pwritev 321
346 347
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fbda4a5..9ba1bb731fcc 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,8 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
94 94
95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
97obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \
98 power5-pmu.o power5+-pmu.o power6-pmu.o
97 99
98obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 100obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
99 101
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc053946..e981d1ce1914 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,6 +131,7 @@ int main(void)
131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); 131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); 132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); 133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
134 DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
134 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 135 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
135 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 136 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
136 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 137 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc32330479..43e073477c34 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
5262: 5262:
527 TRACE_AND_RESTORE_IRQ(r5); 527 TRACE_AND_RESTORE_IRQ(r5);
528 528
529#ifdef CONFIG_PERF_COUNTERS
530 /* check paca->perf_counter_pending if we're enabling ints */
531 lbz r3,PACAPERFPEND(r13)
532 and. r3,r3,r5
533 beq 27f
534 bl .perf_counter_do_pending
53527:
536#endif /* CONFIG_PERF_COUNTERS */
537
529 /* extract EE bit and use it to restore paca->hard_enabled */ 538 /* extract EE bit and use it to restore paca->hard_enabled */
530 ld r3,_MSR(r1) 539 ld r3,_MSR(r1)
531 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 540 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8c1a4966867e..feff792ed0f9 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en)
135 iseries_handle_interrupts(); 135 iseries_handle_interrupts();
136 } 136 }
137 137
138 if (test_perf_counter_pending()) {
139 clear_perf_counter_pending();
140 perf_counter_do_pending();
141 }
142
138 /* 143 /*
139 * if (get_paca()->hard_enabled) return; 144 * if (get_paca()->hard_enabled) return;
140 * But again we need to take care that gcc gets hard_enabled directly 145 * But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 000000000000..4786ad9a2887
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,1214 @@
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18#include <asm/machdep.h>
19#include <asm/firmware.h>
20#include <asm/ptrace.h>
21
22struct cpu_hw_counters {
23 int n_counters;
24 int n_percpu;
25 int disabled;
26 int n_added;
27 int n_limited;
28 u8 pmcs_enabled;
29 struct perf_counter *counter[MAX_HWCOUNTERS];
30 u64 events[MAX_HWCOUNTERS];
31 unsigned int flags[MAX_HWCOUNTERS];
32 u64 mmcr[3];
33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35};
36DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
37
38struct power_pmu *ppmu;
39
40/*
41 * Normally, to ignore kernel events we set the FCS (freeze counters
42 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
43 * hypervisor bit set in the MSR, or if we are running on a processor
44 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
45 * then we need to use the FCHV bit to ignore kernel events.
46 */
47static unsigned int freeze_counters_kernel = MMCR0_FCS;
48
49static void perf_counter_interrupt(struct pt_regs *regs);
50
51void perf_counter_print_debug(void)
52{
53}
54
55/*
56 * Read one performance monitor counter (PMC).
57 */
58static unsigned long read_pmc(int idx)
59{
60 unsigned long val;
61
62 switch (idx) {
63 case 1:
64 val = mfspr(SPRN_PMC1);
65 break;
66 case 2:
67 val = mfspr(SPRN_PMC2);
68 break;
69 case 3:
70 val = mfspr(SPRN_PMC3);
71 break;
72 case 4:
73 val = mfspr(SPRN_PMC4);
74 break;
75 case 5:
76 val = mfspr(SPRN_PMC5);
77 break;
78 case 6:
79 val = mfspr(SPRN_PMC6);
80 break;
81 case 7:
82 val = mfspr(SPRN_PMC7);
83 break;
84 case 8:
85 val = mfspr(SPRN_PMC8);
86 break;
87 default:
88 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
89 val = 0;
90 }
91 return val;
92}
93
94/*
95 * Write one PMC.
96 */
97static void write_pmc(int idx, unsigned long val)
98{
99 switch (idx) {
100 case 1:
101 mtspr(SPRN_PMC1, val);
102 break;
103 case 2:
104 mtspr(SPRN_PMC2, val);
105 break;
106 case 3:
107 mtspr(SPRN_PMC3, val);
108 break;
109 case 4:
110 mtspr(SPRN_PMC4, val);
111 break;
112 case 5:
113 mtspr(SPRN_PMC5, val);
114 break;
115 case 6:
116 mtspr(SPRN_PMC6, val);
117 break;
118 case 7:
119 mtspr(SPRN_PMC7, val);
120 break;
121 case 8:
122 mtspr(SPRN_PMC8, val);
123 break;
124 default:
125 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
126 }
127}
128
129/*
130 * Check if a set of events can all go on the PMU at once.
131 * If they can't, this will look at alternative codes for the events
132 * and see if any combination of alternative codes is feasible.
133 * The feasible set is returned in event[].
134 */
135static int power_check_constraints(u64 event[], unsigned int cflags[],
136 int n_ev)
137{
138 u64 mask, value, nv;
139 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
140 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
141 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
142 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
143 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
144 int i, j;
145 u64 addf = ppmu->add_fields;
146 u64 tadd = ppmu->test_adder;
147
148 if (n_ev > ppmu->n_counter)
149 return -1;
150
151 /* First see if the events will go on as-is */
152 for (i = 0; i < n_ev; ++i) {
153 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
154 && !ppmu->limited_pmc_event(event[i])) {
155 ppmu->get_alternatives(event[i], cflags[i],
156 alternatives[i]);
157 event[i] = alternatives[i][0];
158 }
159 if (ppmu->get_constraint(event[i], &amasks[i][0],
160 &avalues[i][0]))
161 return -1;
162 }
163 value = mask = 0;
164 for (i = 0; i < n_ev; ++i) {
165 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
166 if ((((nv + tadd) ^ value) & mask) != 0 ||
167 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
168 break;
169 value = nv;
170 mask |= amasks[i][0];
171 }
172 if (i == n_ev)
173 return 0; /* all OK */
174
175 /* doesn't work, gather alternatives... */
176 if (!ppmu->get_alternatives)
177 return -1;
178 for (i = 0; i < n_ev; ++i) {
179 choice[i] = 0;
180 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
181 alternatives[i]);
182 for (j = 1; j < n_alt[i]; ++j)
183 ppmu->get_constraint(alternatives[i][j],
184 &amasks[i][j], &avalues[i][j]);
185 }
186
187 /* enumerate all possibilities and see if any will work */
188 i = 0;
189 j = -1;
190 value = mask = nv = 0;
191 while (i < n_ev) {
192 if (j >= 0) {
193 /* we're backtracking, restore context */
194 value = svalues[i];
195 mask = smasks[i];
196 j = choice[i];
197 }
198 /*
199 * See if any alternative k for event i,
200 * where k > j, will satisfy the constraints.
201 */
202 while (++j < n_alt[i]) {
203 nv = (value | avalues[i][j]) +
204 (value & avalues[i][j] & addf);
205 if ((((nv + tadd) ^ value) & mask) == 0 &&
206 (((nv + tadd) ^ avalues[i][j])
207 & amasks[i][j]) == 0)
208 break;
209 }
210 if (j >= n_alt[i]) {
211 /*
212 * No feasible alternative, backtrack
213 * to event i-1 and continue enumerating its
214 * alternatives from where we got up to.
215 */
216 if (--i < 0)
217 return -1;
218 } else {
219 /*
220 * Found a feasible alternative for event i,
221 * remember where we got up to with this event,
222 * go on to the next event, and start with
223 * the first alternative for it.
224 */
225 choice[i] = j;
226 svalues[i] = value;
227 smasks[i] = mask;
228 value = nv;
229 mask |= amasks[i][j];
230 ++i;
231 j = -1;
232 }
233 }
234
235 /* OK, we have a feasible combination, tell the caller the solution */
236 for (i = 0; i < n_ev; ++i)
237 event[i] = alternatives[i][choice[i]];
238 return 0;
239}
240
241/*
242 * Check if newly-added counters have consistent settings for
243 * exclude_{user,kernel,hv} with each other and any previously
244 * added counters.
245 */
246static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
247 int n_prev, int n_new)
248{
249 int eu = 0, ek = 0, eh = 0;
250 int i, n, first;
251 struct perf_counter *counter;
252
253 n = n_prev + n_new;
254 if (n <= 1)
255 return 0;
256
257 first = 1;
258 for (i = 0; i < n; ++i) {
259 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
260 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
261 continue;
262 }
263 counter = ctrs[i];
264 if (first) {
265 eu = counter->attr.exclude_user;
266 ek = counter->attr.exclude_kernel;
267 eh = counter->attr.exclude_hv;
268 first = 0;
269 } else if (counter->attr.exclude_user != eu ||
270 counter->attr.exclude_kernel != ek ||
271 counter->attr.exclude_hv != eh) {
272 return -EAGAIN;
273 }
274 }
275
276 if (eu || ek || eh)
277 for (i = 0; i < n; ++i)
278 if (cflags[i] & PPMU_LIMITED_PMC_OK)
279 cflags[i] |= PPMU_LIMITED_PMC_REQD;
280
281 return 0;
282}
283
284static void power_pmu_read(struct perf_counter *counter)
285{
286 long val, delta, prev;
287
288 if (!counter->hw.idx)
289 return;
290 /*
291 * Performance monitor interrupts come even when interrupts
292 * are soft-disabled, as long as interrupts are hard-enabled.
293 * Therefore we treat them like NMIs.
294 */
295 do {
296 prev = atomic64_read(&counter->hw.prev_count);
297 barrier();
298 val = read_pmc(counter->hw.idx);
299 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
300
301 /* The counters are only 32 bits wide */
302 delta = (val - prev) & 0xfffffffful;
303 atomic64_add(delta, &counter->count);
304 atomic64_sub(delta, &counter->hw.period_left);
305}
306
307/*
308 * On some machines, PMC5 and PMC6 can't be written, don't respect
309 * the freeze conditions, and don't generate interrupts. This tells
310 * us if `counter' is using such a PMC.
311 */
312static int is_limited_pmc(int pmcnum)
313{
314 return (ppmu->flags & PPMU_LIMITED_PMC5_6)
315 && (pmcnum == 5 || pmcnum == 6);
316}
317
318static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
319 unsigned long pmc5, unsigned long pmc6)
320{
321 struct perf_counter *counter;
322 u64 val, prev, delta;
323 int i;
324
325 for (i = 0; i < cpuhw->n_limited; ++i) {
326 counter = cpuhw->limited_counter[i];
327 if (!counter->hw.idx)
328 continue;
329 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
330 prev = atomic64_read(&counter->hw.prev_count);
331 counter->hw.idx = 0;
332 delta = (val - prev) & 0xfffffffful;
333 atomic64_add(delta, &counter->count);
334 }
335}
336
337static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
338 unsigned long pmc5, unsigned long pmc6)
339{
340 struct perf_counter *counter;
341 u64 val;
342 int i;
343
344 for (i = 0; i < cpuhw->n_limited; ++i) {
345 counter = cpuhw->limited_counter[i];
346 counter->hw.idx = cpuhw->limited_hwidx[i];
347 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
348 atomic64_set(&counter->hw.prev_count, val);
349 perf_counter_update_userpage(counter);
350 }
351}
352
353/*
354 * Since limited counters don't respect the freeze conditions, we
355 * have to read them immediately after freezing or unfreezing the
356 * other counters. We try to keep the values from the limited
357 * counters as consistent as possible by keeping the delay (in
358 * cycles and instructions) between freezing/unfreezing and reading
359 * the limited counters as small and consistent as possible.
360 * Therefore, if any limited counters are in use, we read them
361 * both, and always in the same order, to minimize variability,
362 * and do it inside the same asm that writes MMCR0.
363 */
364static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
365{
366 unsigned long pmc5, pmc6;
367
368 if (!cpuhw->n_limited) {
369 mtspr(SPRN_MMCR0, mmcr0);
370 return;
371 }
372
373 /*
374 * Write MMCR0, then read PMC5 and PMC6 immediately.
375 * To ensure we don't get a performance monitor interrupt
376 * between writing MMCR0 and freezing/thawing the limited
377 * counters, we first write MMCR0 with the counter overflow
378 * interrupt enable bits turned off.
379 */
380 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
381 : "=&r" (pmc5), "=&r" (pmc6)
382 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
383 "i" (SPRN_MMCR0),
384 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
385
386 if (mmcr0 & MMCR0_FC)
387 freeze_limited_counters(cpuhw, pmc5, pmc6);
388 else
389 thaw_limited_counters(cpuhw, pmc5, pmc6);
390
391 /*
392 * Write the full MMCR0 including the counter overflow interrupt
393 * enable bits, if necessary.
394 */
395 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
396 mtspr(SPRN_MMCR0, mmcr0);
397}
398
399/*
400 * Disable all counters to prevent PMU interrupts and to allow
401 * counters to be added or removed.
402 */
403void hw_perf_disable(void)
404{
405 struct cpu_hw_counters *cpuhw;
406 unsigned long ret;
407 unsigned long flags;
408
409 local_irq_save(flags);
410 cpuhw = &__get_cpu_var(cpu_hw_counters);
411
412 ret = cpuhw->disabled;
413 if (!ret) {
414 cpuhw->disabled = 1;
415 cpuhw->n_added = 0;
416
417 /*
418 * Check if we ever enabled the PMU on this cpu.
419 */
420 if (!cpuhw->pmcs_enabled) {
421 if (ppc_md.enable_pmcs)
422 ppc_md.enable_pmcs();
423 cpuhw->pmcs_enabled = 1;
424 }
425
426 /*
427 * Disable instruction sampling if it was enabled
428 */
429 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
430 mtspr(SPRN_MMCRA,
431 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
432 mb();
433 }
434
435 /*
436 * Set the 'freeze counters' bit.
437 * The barrier is to make sure the mtspr has been
438 * executed and the PMU has frozen the counters
439 * before we return.
440 */
441 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
442 mb();
443 }
444 local_irq_restore(flags);
445}
446
447/*
448 * Re-enable all counters if disable == 0.
449 * If we were previously disabled and counters were added, then
450 * put the new config on the PMU.
451 */
452void hw_perf_enable(void)
453{
454 struct perf_counter *counter;
455 struct cpu_hw_counters *cpuhw;
456 unsigned long flags;
457 long i;
458 unsigned long val;
459 s64 left;
460 unsigned int hwc_index[MAX_HWCOUNTERS];
461 int n_lim;
462 int idx;
463
464 local_irq_save(flags);
465 cpuhw = &__get_cpu_var(cpu_hw_counters);
466 if (!cpuhw->disabled) {
467 local_irq_restore(flags);
468 return;
469 }
470 cpuhw->disabled = 0;
471
472 /*
473 * If we didn't change anything, or only removed counters,
474 * no need to recalculate MMCR* settings and reset the PMCs.
475 * Just reenable the PMU with the current MMCR* settings
476 * (possibly updated for removal of counters).
477 */
478 if (!cpuhw->n_added) {
479 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
480 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
481 if (cpuhw->n_counters == 0)
482 get_lppaca()->pmcregs_in_use = 0;
483 goto out_enable;
484 }
485
486 /*
487 * Compute MMCR* values for the new set of counters
488 */
489 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
490 cpuhw->mmcr)) {
491 /* shouldn't ever get here */
492 printk(KERN_ERR "oops compute_mmcr failed\n");
493 goto out;
494 }
495
496 /*
497 * Add in MMCR0 freeze bits corresponding to the
498 * attr.exclude_* bits for the first counter.
499 * We have already checked that all counters have the
500 * same values for these bits as the first counter.
501 */
502 counter = cpuhw->counter[0];
503 if (counter->attr.exclude_user)
504 cpuhw->mmcr[0] |= MMCR0_FCP;
505 if (counter->attr.exclude_kernel)
506 cpuhw->mmcr[0] |= freeze_counters_kernel;
507 if (counter->attr.exclude_hv)
508 cpuhw->mmcr[0] |= MMCR0_FCHV;
509
510 /*
511 * Write the new configuration to MMCR* with the freeze
512 * bit set and set the hardware counters to their initial values.
513 * Then unfreeze the counters.
514 */
515 get_lppaca()->pmcregs_in_use = 1;
516 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
517 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
518 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
519 | MMCR0_FC);
520
521 /*
522 * Read off any pre-existing counters that need to move
523 * to another PMC.
524 */
525 for (i = 0; i < cpuhw->n_counters; ++i) {
526 counter = cpuhw->counter[i];
527 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
528 power_pmu_read(counter);
529 write_pmc(counter->hw.idx, 0);
530 counter->hw.idx = 0;
531 }
532 }
533
534 /*
535 * Initialize the PMCs for all the new and moved counters.
536 */
537 cpuhw->n_limited = n_lim = 0;
538 for (i = 0; i < cpuhw->n_counters; ++i) {
539 counter = cpuhw->counter[i];
540 if (counter->hw.idx)
541 continue;
542 idx = hwc_index[i] + 1;
543 if (is_limited_pmc(idx)) {
544 cpuhw->limited_counter[n_lim] = counter;
545 cpuhw->limited_hwidx[n_lim] = idx;
546 ++n_lim;
547 continue;
548 }
549 val = 0;
550 if (counter->hw.sample_period) {
551 left = atomic64_read(&counter->hw.period_left);
552 if (left < 0x80000000L)
553 val = 0x80000000L - left;
554 }
555 atomic64_set(&counter->hw.prev_count, val);
556 counter->hw.idx = idx;
557 write_pmc(idx, val);
558 perf_counter_update_userpage(counter);
559 }
560 cpuhw->n_limited = n_lim;
561 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
562
563 out_enable:
564 mb();
565 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
566
567 /*
568 * Enable instruction sampling if necessary
569 */
570 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
571 mb();
572 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
573 }
574
575 out:
576 local_irq_restore(flags);
577}
578
579static int collect_events(struct perf_counter *group, int max_count,
580 struct perf_counter *ctrs[], u64 *events,
581 unsigned int *flags)
582{
583 int n = 0;
584 struct perf_counter *counter;
585
586 if (!is_software_counter(group)) {
587 if (n >= max_count)
588 return -1;
589 ctrs[n] = group;
590 flags[n] = group->hw.counter_base;
591 events[n++] = group->hw.config;
592 }
593 list_for_each_entry(counter, &group->sibling_list, list_entry) {
594 if (!is_software_counter(counter) &&
595 counter->state != PERF_COUNTER_STATE_OFF) {
596 if (n >= max_count)
597 return -1;
598 ctrs[n] = counter;
599 flags[n] = counter->hw.counter_base;
600 events[n++] = counter->hw.config;
601 }
602 }
603 return n;
604}
605
606static void counter_sched_in(struct perf_counter *counter, int cpu)
607{
608 counter->state = PERF_COUNTER_STATE_ACTIVE;
609 counter->oncpu = cpu;
610 counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
611 if (is_software_counter(counter))
612 counter->pmu->enable(counter);
613}
614
615/*
616 * Called to enable a whole group of counters.
617 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
618 * Assumes the caller has disabled interrupts and has
619 * frozen the PMU with hw_perf_save_disable.
620 */
621int hw_perf_group_sched_in(struct perf_counter *group_leader,
622 struct perf_cpu_context *cpuctx,
623 struct perf_counter_context *ctx, int cpu)
624{
625 struct cpu_hw_counters *cpuhw;
626 long i, n, n0;
627 struct perf_counter *sub;
628
629 cpuhw = &__get_cpu_var(cpu_hw_counters);
630 n0 = cpuhw->n_counters;
631 n = collect_events(group_leader, ppmu->n_counter - n0,
632 &cpuhw->counter[n0], &cpuhw->events[n0],
633 &cpuhw->flags[n0]);
634 if (n < 0)
635 return -EAGAIN;
636 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
637 return -EAGAIN;
638 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
639 if (i < 0)
640 return -EAGAIN;
641 cpuhw->n_counters = n0 + n;
642 cpuhw->n_added += n;
643
644 /*
645 * OK, this group can go on; update counter states etc.,
646 * and enable any software counters
647 */
648 for (i = n0; i < n0 + n; ++i)
649 cpuhw->counter[i]->hw.config = cpuhw->events[i];
650 cpuctx->active_oncpu += n;
651 n = 1;
652 counter_sched_in(group_leader, cpu);
653 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
654 if (sub->state != PERF_COUNTER_STATE_OFF) {
655 counter_sched_in(sub, cpu);
656 ++n;
657 }
658 }
659 ctx->nr_active += n;
660
661 return 1;
662}
663
664/*
665 * Add a counter to the PMU.
666 * If all counters are not already frozen, then we disable and
667 * re-enable the PMU in order to get hw_perf_enable to do the
668 * actual work of reconfiguring the PMU.
669 */
670static int power_pmu_enable(struct perf_counter *counter)
671{
672 struct cpu_hw_counters *cpuhw;
673 unsigned long flags;
674 int n0;
675 int ret = -EAGAIN;
676
677 local_irq_save(flags);
678 perf_disable();
679
680 /*
681 * Add the counter to the list (if there is room)
682 * and check whether the total set is still feasible.
683 */
684 cpuhw = &__get_cpu_var(cpu_hw_counters);
685 n0 = cpuhw->n_counters;
686 if (n0 >= ppmu->n_counter)
687 goto out;
688 cpuhw->counter[n0] = counter;
689 cpuhw->events[n0] = counter->hw.config;
690 cpuhw->flags[n0] = counter->hw.counter_base;
691 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
692 goto out;
693 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
694 goto out;
695
696 counter->hw.config = cpuhw->events[n0];
697 ++cpuhw->n_counters;
698 ++cpuhw->n_added;
699
700 ret = 0;
701 out:
702 perf_enable();
703 local_irq_restore(flags);
704 return ret;
705}
706
707/*
708 * Remove a counter from the PMU.
709 */
710static void power_pmu_disable(struct perf_counter *counter)
711{
712 struct cpu_hw_counters *cpuhw;
713 long i;
714 unsigned long flags;
715
716 local_irq_save(flags);
717 perf_disable();
718
719 power_pmu_read(counter);
720
721 cpuhw = &__get_cpu_var(cpu_hw_counters);
722 for (i = 0; i < cpuhw->n_counters; ++i) {
723 if (counter == cpuhw->counter[i]) {
724 while (++i < cpuhw->n_counters)
725 cpuhw->counter[i-1] = cpuhw->counter[i];
726 --cpuhw->n_counters;
727 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
728 if (counter->hw.idx) {
729 write_pmc(counter->hw.idx, 0);
730 counter->hw.idx = 0;
731 }
732 perf_counter_update_userpage(counter);
733 break;
734 }
735 }
736 for (i = 0; i < cpuhw->n_limited; ++i)
737 if (counter == cpuhw->limited_counter[i])
738 break;
739 if (i < cpuhw->n_limited) {
740 while (++i < cpuhw->n_limited) {
741 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
742 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
743 }
744 --cpuhw->n_limited;
745 }
746 if (cpuhw->n_counters == 0) {
747 /* disable exceptions if no counters are running */
748 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
749 }
750
751 perf_enable();
752 local_irq_restore(flags);
753}
754
755/*
756 * Re-enable interrupts on a counter after they were throttled
757 * because they were coming too fast.
758 */
759static void power_pmu_unthrottle(struct perf_counter *counter)
760{
761 s64 val, left;
762 unsigned long flags;
763
764 if (!counter->hw.idx || !counter->hw.sample_period)
765 return;
766 local_irq_save(flags);
767 perf_disable();
768 power_pmu_read(counter);
769 left = counter->hw.sample_period;
770 val = 0;
771 if (left < 0x80000000L)
772 val = 0x80000000L - left;
773 write_pmc(counter->hw.idx, val);
774 atomic64_set(&counter->hw.prev_count, val);
775 atomic64_set(&counter->hw.period_left, left);
776 perf_counter_update_userpage(counter);
777 perf_enable();
778 local_irq_restore(flags);
779}
780
781struct pmu power_pmu = {
782 .enable = power_pmu_enable,
783 .disable = power_pmu_disable,
784 .read = power_pmu_read,
785 .unthrottle = power_pmu_unthrottle,
786};
787
788/*
789 * Return 1 if we might be able to put counter on a limited PMC,
790 * or 0 if not.
791 * A counter can only go on a limited PMC if it counts something
792 * that a limited PMC can count, doesn't require interrupts, and
793 * doesn't exclude any processor mode.
794 */
795static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
796 unsigned int flags)
797{
798 int n;
799 u64 alt[MAX_EVENT_ALTERNATIVES];
800
801 if (counter->attr.exclude_user
802 || counter->attr.exclude_kernel
803 || counter->attr.exclude_hv
804 || counter->attr.sample_period)
805 return 0;
806
807 if (ppmu->limited_pmc_event(ev))
808 return 1;
809
810 /*
811 * The requested event isn't on a limited PMC already;
812 * see if any alternative code goes on a limited PMC.
813 */
814 if (!ppmu->get_alternatives)
815 return 0;
816
817 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
818 n = ppmu->get_alternatives(ev, flags, alt);
819
820 return n > 0;
821}
822
823/*
824 * Find an alternative event that goes on a normal PMC, if possible,
825 * and return the event code, or 0 if there is no such alternative.
826 * (Note: event code 0 is "don't count" on all machines.)
827 */
828static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
829{
830 u64 alt[MAX_EVENT_ALTERNATIVES];
831 int n;
832
833 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
834 n = ppmu->get_alternatives(ev, flags, alt);
835 if (!n)
836 return 0;
837 return alt[0];
838}
839
840/* Number of perf_counters counting hardware events */
841static atomic_t num_counters;
842/* Used to avoid races in calling reserve/release_pmc_hardware */
843static DEFINE_MUTEX(pmc_reserve_mutex);
844
845/*
846 * Release the PMU if this is the last perf_counter.
847 */
848static void hw_perf_counter_destroy(struct perf_counter *counter)
849{
850 if (!atomic_add_unless(&num_counters, -1, 1)) {
851 mutex_lock(&pmc_reserve_mutex);
852 if (atomic_dec_return(&num_counters) == 0)
853 release_pmc_hardware();
854 mutex_unlock(&pmc_reserve_mutex);
855 }
856}
857
858const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
859{
860 u64 ev;
861 unsigned long flags;
862 struct perf_counter *ctrs[MAX_HWCOUNTERS];
863 u64 events[MAX_HWCOUNTERS];
864 unsigned int cflags[MAX_HWCOUNTERS];
865 int n;
866 int err;
867
868 if (!ppmu)
869 return ERR_PTR(-ENXIO);
870 if (counter->attr.type != PERF_TYPE_RAW) {
871 ev = counter->attr.config;
872 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
873 return ERR_PTR(-EOPNOTSUPP);
874 ev = ppmu->generic_events[ev];
875 } else {
876 ev = counter->attr.config;
877 }
878 counter->hw.config_base = ev;
879 counter->hw.idx = 0;
880
881 /*
882 * If we are not running on a hypervisor, force the
883 * exclude_hv bit to 0 so that we don't care what
884 * the user set it to.
885 */
886 if (!firmware_has_feature(FW_FEATURE_LPAR))
887 counter->attr.exclude_hv = 0;
888
889 /*
890 * If this is a per-task counter, then we can use
891 * PM_RUN_* events interchangeably with their non RUN_*
892 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
893 * XXX we should check if the task is an idle task.
894 */
895 flags = 0;
896 if (counter->ctx->task)
897 flags |= PPMU_ONLY_COUNT_RUN;
898
899 /*
900 * If this machine has limited counters, check whether this
901 * event could go on a limited counter.
902 */
903 if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
904 if (can_go_on_limited_pmc(counter, ev, flags)) {
905 flags |= PPMU_LIMITED_PMC_OK;
906 } else if (ppmu->limited_pmc_event(ev)) {
907 /*
908 * The requested event is on a limited PMC,
909 * but we can't use a limited PMC; see if any
910 * alternative goes on a normal PMC.
911 */
912 ev = normal_pmc_alternative(ev, flags);
913 if (!ev)
914 return ERR_PTR(-EINVAL);
915 }
916 }
917
918 /*
919 * If this is in a group, check if it can go on with all the
920 * other hardware counters in the group. We assume the counter
921 * hasn't been linked into its leader's sibling list at this point.
922 */
923 n = 0;
924 if (counter->group_leader != counter) {
925 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
926 ctrs, events, cflags);
927 if (n < 0)
928 return ERR_PTR(-EINVAL);
929 }
930 events[n] = ev;
931 ctrs[n] = counter;
932 cflags[n] = flags;
933 if (check_excludes(ctrs, cflags, n, 1))
934 return ERR_PTR(-EINVAL);
935 if (power_check_constraints(events, cflags, n + 1))
936 return ERR_PTR(-EINVAL);
937
938 counter->hw.config = events[n];
939 counter->hw.counter_base = cflags[n];
940 atomic64_set(&counter->hw.period_left, counter->hw.sample_period);
941
942 /*
943 * See if we need to reserve the PMU.
944 * If no counters are currently in use, then we have to take a
945 * mutex to ensure that we don't race with another task doing
946 * reserve_pmc_hardware or release_pmc_hardware.
947 */
948 err = 0;
949 if (!atomic_inc_not_zero(&num_counters)) {
950 mutex_lock(&pmc_reserve_mutex);
951 if (atomic_read(&num_counters) == 0 &&
952 reserve_pmc_hardware(perf_counter_interrupt))
953 err = -EBUSY;
954 else
955 atomic_inc(&num_counters);
956 mutex_unlock(&pmc_reserve_mutex);
957 }
958 counter->destroy = hw_perf_counter_destroy;
959
960 if (err)
961 return ERR_PTR(err);
962 return &power_pmu;
963}
964
965/*
966 * A counter has overflowed; update its count and record
967 * things if requested. Note that interrupts are hard-disabled
968 * here so there is no possibility of being interrupted.
969 */
970static void record_and_restart(struct perf_counter *counter, long val,
971 struct pt_regs *regs, int nmi)
972{
973 u64 period = counter->hw.sample_period;
974 s64 prev, delta, left;
975 int record = 0;
976 u64 addr, mmcra, sdsync;
977
978 /* we don't have to worry about interrupts here */
979 prev = atomic64_read(&counter->hw.prev_count);
980 delta = (val - prev) & 0xfffffffful;
981 atomic64_add(delta, &counter->count);
982
983 /*
984 * See if the total period for this counter has expired,
985 * and update for the next period.
986 */
987 val = 0;
988 left = atomic64_read(&counter->hw.period_left) - delta;
989 if (period) {
990 if (left <= 0) {
991 left += period;
992 if (left <= 0)
993 left = period;
994 record = 1;
995 }
996 if (left < 0x80000000L)
997 val = 0x80000000L - left;
998 }
999
1000 /*
1001 * Finally record data if requested.
1002 */
1003 if (record) {
1004 addr = 0;
1005 if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
1006 /*
1007 * The user wants a data address recorded.
1008 * If we're not doing instruction sampling,
1009 * give them the SDAR (sampled data address).
1010 * If we are doing instruction sampling, then only
1011 * give them the SDAR if it corresponds to the
1012 * instruction pointed to by SIAR; this is indicated
1013 * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA.
1014 */
1015 mmcra = regs->dsisr;
1016 sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
1017 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
1018 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
1019 addr = mfspr(SPRN_SDAR);
1020 }
1021 if (perf_counter_overflow(counter, nmi, regs, addr)) {
1022 /*
1023 * Interrupts are coming too fast - throttle them
1024 * by setting the counter to 0, so it will be
1025 * at least 2^30 cycles until the next interrupt
1026 * (assuming each counter counts at most 2 counts
1027 * per cycle).
1028 */
1029 val = 0;
1030 left = ~0ULL >> 1;
1031 }
1032 }
1033
1034 write_pmc(counter->hw.idx, val);
1035 atomic64_set(&counter->hw.prev_count, val);
1036 atomic64_set(&counter->hw.period_left, left);
1037 perf_counter_update_userpage(counter);
1038}
1039
1040/*
1041 * Called from generic code to get the misc flags (i.e. processor mode)
1042 * for an event.
1043 */
1044unsigned long perf_misc_flags(struct pt_regs *regs)
1045{
1046 unsigned long mmcra;
1047
1048 if (TRAP(regs) != 0xf00) {
1049 /* not a PMU interrupt */
1050 return user_mode(regs) ? PERF_EVENT_MISC_USER :
1051 PERF_EVENT_MISC_KERNEL;
1052 }
1053
1054 mmcra = regs->dsisr;
1055 if (ppmu->flags & PPMU_ALT_SIPR) {
1056 if (mmcra & POWER6_MMCRA_SIHV)
1057 return PERF_EVENT_MISC_HYPERVISOR;
1058 return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1059 PERF_EVENT_MISC_KERNEL;
1060 }
1061 if (mmcra & MMCRA_SIHV)
1062 return PERF_EVENT_MISC_HYPERVISOR;
1063 return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
1064 PERF_EVENT_MISC_KERNEL;
1065}
1066
1067/*
1068 * Called from generic code to get the instruction pointer
1069 * for an event.
1070 */
1071unsigned long perf_instruction_pointer(struct pt_regs *regs)
1072{
1073 unsigned long mmcra;
1074 unsigned long ip;
1075 unsigned long slot;
1076
1077 if (TRAP(regs) != 0xf00)
1078 return regs->nip; /* not a PMU interrupt */
1079
1080 ip = mfspr(SPRN_SIAR);
1081 mmcra = regs->dsisr;
1082 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
1083 slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
1084 if (slot > 1)
1085 ip += 4 * (slot - 1);
1086 }
1087 return ip;
1088}
1089
1090/*
1091 * Performance monitor interrupt stuff
1092 */
1093static void perf_counter_interrupt(struct pt_regs *regs)
1094{
1095 int i;
1096 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
1097 struct perf_counter *counter;
1098 long val;
1099 int found = 0;
1100 int nmi;
1101
1102 if (cpuhw->n_limited)
1103 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1104 mfspr(SPRN_PMC6));
1105
1106 /*
1107 * Overload regs->dsisr to store MMCRA so we only need to read it once.
1108 */
1109 regs->dsisr = mfspr(SPRN_MMCRA);
1110
1111 /*
1112 * If interrupts were soft-disabled when this PMU interrupt
1113 * occurred, treat it as an NMI.
1114 */
1115 nmi = !regs->softe;
1116 if (nmi)
1117 nmi_enter();
1118 else
1119 irq_enter();
1120
1121 for (i = 0; i < cpuhw->n_counters; ++i) {
1122 counter = cpuhw->counter[i];
1123 if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
1124 continue;
1125 val = read_pmc(counter->hw.idx);
1126 if ((int)val < 0) {
1127 /* counter has overflowed */
1128 found = 1;
1129 record_and_restart(counter, val, regs, nmi);
1130 }
1131 }
1132
1133 /*
1134 * In case we didn't find and reset the counter that caused
1135 * the interrupt, scan all counters and reset any that are
1136 * negative, to avoid getting continual interrupts.
1137 * Any that we processed in the previous loop will not be negative.
1138 */
1139 if (!found) {
1140 for (i = 0; i < ppmu->n_counter; ++i) {
1141 if (is_limited_pmc(i + 1))
1142 continue;
1143 val = read_pmc(i + 1);
1144 if ((int)val < 0)
1145 write_pmc(i + 1, 0);
1146 }
1147 }
1148
1149 /*
1150 * Reset MMCR0 to its normal value. This will set PMXE and
1151 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1152 * and thus allow interrupts to occur again.
1153 * XXX might want to use MSR.PM to keep the counters frozen until
1154 * we get back out of this interrupt.
1155 */
1156 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
1157
1158 if (nmi)
1159 nmi_exit();
1160 else
1161 irq_exit();
1162}
1163
1164void hw_perf_counter_setup(int cpu)
1165{
1166 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
1167
1168 memset(cpuhw, 0, sizeof(*cpuhw));
1169 cpuhw->mmcr[0] = MMCR0_FC;
1170}
1171
1172extern struct power_pmu power4_pmu;
1173extern struct power_pmu ppc970_pmu;
1174extern struct power_pmu power5_pmu;
1175extern struct power_pmu power5p_pmu;
1176extern struct power_pmu power6_pmu;
1177
1178static int init_perf_counters(void)
1179{
1180 unsigned long pvr;
1181
1182 /* XXX should get this from cputable */
1183 pvr = mfspr(SPRN_PVR);
1184 switch (PVR_VER(pvr)) {
1185 case PV_POWER4:
1186 case PV_POWER4p:
1187 ppmu = &power4_pmu;
1188 break;
1189 case PV_970:
1190 case PV_970FX:
1191 case PV_970MP:
1192 ppmu = &ppc970_pmu;
1193 break;
1194 case PV_POWER5:
1195 ppmu = &power5_pmu;
1196 break;
1197 case PV_POWER5p:
1198 ppmu = &power5p_pmu;
1199 break;
1200 case 0x3e:
1201 ppmu = &power6_pmu;
1202 break;
1203 }
1204
1205 /*
1206 * Use FCHV to ignore kernel events if MSR.HV is set.
1207 */
1208 if (mfmsr() & MSR_HV)
1209 freeze_counters_kernel = MMCR0_FCHV;
1210
1211 return 0;
1212}
1213
1214arch_initcall(init_perf_counters);
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 000000000000..836fa118eb1e
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,557 @@
1/*
2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER4
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_LOWER_SH 6
23#define PM_LOWER_MSK 1
24#define PM_LOWER_MSKS 0x40
25#define PM_BYTE_SH 4 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 3
27#define PM_PMCSEL_MSK 7
28
29/*
30 * Unit code values
31 */
32#define PM_FPU 1
33#define PM_ISU1 2
34#define PM_IFU 3
35#define PM_IDU0 4
36#define PM_ISU1_ALT 6
37#define PM_ISU2 7
38#define PM_IFU_ALT 8
39#define PM_LSU0 9
40#define PM_LSU1 0xc
41#define PM_GPS 0xf
42
43/*
44 * Bits in MMCR0 for POWER4
45 */
46#define MMCR0_PMC1SEL_SH 8
47#define MMCR0_PMC2SEL_SH 1
48#define MMCR_PMCSEL_MSK 0x1f
49
50/*
51 * Bits in MMCR1 for POWER4
52 */
53#define MMCR1_TTM0SEL_SH 62
54#define MMCR1_TTC0SEL_SH 61
55#define MMCR1_TTM1SEL_SH 59
56#define MMCR1_TTC1SEL_SH 58
57#define MMCR1_TTM2SEL_SH 56
58#define MMCR1_TTC2SEL_SH 55
59#define MMCR1_TTM3SEL_SH 53
60#define MMCR1_TTC3SEL_SH 52
61#define MMCR1_TTMSEL_MSK 3
62#define MMCR1_TD_CP_DBG0SEL_SH 50
63#define MMCR1_TD_CP_DBG1SEL_SH 48
64#define MMCR1_TD_CP_DBG2SEL_SH 46
65#define MMCR1_TD_CP_DBG3SEL_SH 44
66#define MMCR1_DEBUG0SEL_SH 43
67#define MMCR1_DEBUG1SEL_SH 42
68#define MMCR1_DEBUG2SEL_SH 41
69#define MMCR1_DEBUG3SEL_SH 40
70#define MMCR1_PMC1_ADDER_SEL_SH 39
71#define MMCR1_PMC2_ADDER_SEL_SH 38
72#define MMCR1_PMC6_ADDER_SEL_SH 37
73#define MMCR1_PMC5_ADDER_SEL_SH 36
74#define MMCR1_PMC8_ADDER_SEL_SH 35
75#define MMCR1_PMC7_ADDER_SEL_SH 34
76#define MMCR1_PMC3_ADDER_SEL_SH 33
77#define MMCR1_PMC4_ADDER_SEL_SH 32
78#define MMCR1_PMC3SEL_SH 27
79#define MMCR1_PMC4SEL_SH 22
80#define MMCR1_PMC5SEL_SH 17
81#define MMCR1_PMC6SEL_SH 12
82#define MMCR1_PMC7SEL_SH 7
83#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
84
85static short mmcr1_adder_bits[8] = {
86 MMCR1_PMC1_ADDER_SEL_SH,
87 MMCR1_PMC2_ADDER_SEL_SH,
88 MMCR1_PMC3_ADDER_SEL_SH,
89 MMCR1_PMC4_ADDER_SEL_SH,
90 MMCR1_PMC5_ADDER_SEL_SH,
91 MMCR1_PMC6_ADDER_SEL_SH,
92 MMCR1_PMC7_ADDER_SEL_SH,
93 MMCR1_PMC8_ADDER_SEL_SH
94};
95
96/*
97 * Bits in MMCRA
98 */
99#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
100
101/*
102 * Layout of constraint bits:
103 * 6666555555555544444444443333333333222222222211111111110000000000
104 * 3210987654321098765432109876543210987654321098765432109876543210
105 * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
106 * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
107 * \SMPL ||\TTC3SEL
108 * |\TTC_IFU_SEL
109 * \TTM2SEL0
110 *
111 * SMPL - SAMPLE_ENABLE constraint
112 * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
113 *
114 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
115 * 55: UC1 error 0x0080_0000_0000_0000
116 * 54: FPU events needed 0x0040_0000_0000_0000
117 * 53: ISU1 events needed 0x0020_0000_0000_0000
118 * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
119 *
120 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
121 * 51: UC2 error 0x0008_0000_0000_0000
122 * 50: FPU events needed 0x0004_0000_0000_0000
123 * 49: IFU events needed 0x0002_0000_0000_0000
124 * 48: LSU0 events needed 0x0001_0000_0000_0000
125 *
126 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
127 * 47: UC3 error 0x8000_0000_0000
128 * 46: LSU0 events needed 0x4000_0000_0000
129 * 45: IFU events needed 0x2000_0000_0000
130 * 44: IDU0|ISU2 events needed 0x1000_0000_0000
131 * 43: ISU1 events needed 0x0800_0000_0000
132 *
133 * TTM2SEL0
134 * 42: 0 = IDU0 events needed
135 * 1 = ISU2 events needed 0x0400_0000_0000
136 *
137 * TTC_IFU_SEL
138 * 41: 0 = IFU.U events needed
139 * 1 = IFU.L events needed 0x0200_0000_0000
140 *
141 * TTC3SEL
142 * 40: 0 = LSU1.U events needed
143 * 1 = LSU1.L events needed 0x0100_0000_0000
144 *
145 * PS1
146 * 39: PS1 error 0x0080_0000_0000
147 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
148 *
149 * PS2
150 * 35: PS2 error 0x0008_0000_0000
151 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
152 *
153 * B0
154 * 28-31: Byte 0 event source 0xf000_0000
155 * 1 = FPU
156 * 2 = ISU1
157 * 3 = IFU
158 * 4 = IDU0
159 * 7 = ISU2
160 * 9 = LSU0
161 * c = LSU1
162 * f = GPS
163 *
164 * B1, B2, B3
165 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
166 *
167 * P8
168 * 15: P8 error 0x8000
169 * 14-15: Count of events needing PMC8
170 *
171 * P1..P7
172 * 0-13: Count of events needing PMC1..PMC7
173 *
174 * Note: this doesn't allow events using IFU.U to be combined with events
175 * using IFU.L, though that is feasible (using TTM0 and TTM2). However
176 * there are no listed events for IFU.L (they are debug events not
177 * verified for performance monitoring) so this shouldn't cause a
178 * problem.
179 */
180
181static struct unitinfo {
182 u64 value, mask;
183 int unit;
184 int lowerbit;
185} p4_unitinfo[16] = {
186 [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
187 [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
188 [PM_ISU1_ALT] =
189 { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
190 [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
191 [PM_IFU_ALT] =
192 { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
193 [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
194 [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
195 [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
196 [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
197 [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
198};
199
200static unsigned char direct_marked_event[8] = {
201 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
202 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
203 (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
204 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
205 (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
206 (1<<3) | (1<<4) | (1<<5),
207 /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
208 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
209 (1<<4), /* PMC8: PM_MRK_LSU_FIN */
210};
211
212/*
213 * Returns 1 if event counts things relating to marked instructions
214 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
215 */
216static int p4_marked_instr_event(u64 event)
217{
218 int pmc, psel, unit, byte, bit;
219 unsigned int mask;
220
221 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
222 psel = event & PM_PMCSEL_MSK;
223 if (pmc) {
224 if (direct_marked_event[pmc - 1] & (1 << psel))
225 return 1;
226 if (psel == 0) /* add events */
227 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
228 else if (psel == 6) /* decode events */
229 bit = 4;
230 else
231 return 0;
232 } else
233 bit = psel;
234
235 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
236 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
237 mask = 0;
238 switch (unit) {
239 case PM_LSU1:
240 if (event & PM_LOWER_MSKS)
241 mask = 1 << 28; /* byte 7 bit 4 */
242 else
243 mask = 6 << 24; /* byte 3 bits 1 and 2 */
244 break;
245 case PM_LSU0:
246 /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
247 mask = 0x083dff00;
248 }
249 return (mask >> (byte * 8 + bit)) & 1;
250}
251
252static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp)
253{
254 int pmc, byte, unit, lower, sh;
255 u64 mask = 0, value = 0;
256 int grp = -1;
257
258 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
259 if (pmc) {
260 if (pmc > 8)
261 return -1;
262 sh = (pmc - 1) * 2;
263 mask |= 2 << sh;
264 value |= 1 << sh;
265 grp = ((pmc - 1) >> 1) & 1;
266 }
267 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
268 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
269 if (unit) {
270 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
271
272 /*
273 * Bus events on bytes 0 and 2 can be counted
274 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
275 */
276 if (!pmc)
277 grp = byte & 1;
278
279 if (!p4_unitinfo[unit].unit)
280 return -1;
281 mask |= p4_unitinfo[unit].mask;
282 value |= p4_unitinfo[unit].value;
283 sh = p4_unitinfo[unit].lowerbit;
284 if (sh > 1)
285 value |= (u64)lower << sh;
286 else if (lower != sh)
287 return -1;
288 unit = p4_unitinfo[unit].unit;
289
290 /* Set byte lane select field */
291 mask |= 0xfULL << (28 - 4 * byte);
292 value |= (u64)unit << (28 - 4 * byte);
293 }
294 if (grp == 0) {
295 /* increment PMC1/2/5/6 field */
296 mask |= 0x8000000000ull;
297 value |= 0x1000000000ull;
298 } else {
299 /* increment PMC3/4/7/8 field */
300 mask |= 0x800000000ull;
301 value |= 0x100000000ull;
302 }
303
304 /* Marked instruction events need sample_enable set */
305 if (p4_marked_instr_event(event)) {
306 mask |= 1ull << 56;
307 value |= 1ull << 56;
308 }
309
310 /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
311 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
312 mask |= 1ull << 56;
313
314 *maskp = mask;
315 *valp = value;
316 return 0;
317}
318
319static unsigned int ppc_inst_cmpl[] = {
320 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
321};
322
323static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
324{
325 int i, j, na;
326
327 alt[0] = event;
328 na = 1;
329
330 /* 2 possibilities for PM_GRP_DISP_REJECT */
331 if (event == 0x8003 || event == 0x0224) {
332 alt[1] = event ^ (0x8003 ^ 0x0224);
333 return 2;
334 }
335
336 /* 2 possibilities for PM_ST_MISS_L1 */
337 if (event == 0x0c13 || event == 0x0c23) {
338 alt[1] = event ^ (0x0c13 ^ 0x0c23);
339 return 2;
340 }
341
342 /* several possibilities for PM_INST_CMPL */
343 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
344 if (event == ppc_inst_cmpl[i]) {
345 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
346 if (j != i)
347 alt[na++] = ppc_inst_cmpl[j];
348 break;
349 }
350 }
351
352 return na;
353}
354
355static int p4_compute_mmcr(u64 event[], int n_ev,
356 unsigned int hwc[], u64 mmcr[])
357{
358 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
359 unsigned int pmc, unit, byte, psel, lower;
360 unsigned int ttm, grp;
361 unsigned int pmc_inuse = 0;
362 unsigned int pmc_grp_use[2];
363 unsigned char busbyte[4];
364 unsigned char unituse[16];
365 unsigned int unitlower = 0;
366 int i;
367
368 if (n_ev > 8)
369 return -1;
370
371 /* First pass to count resource use */
372 pmc_grp_use[0] = pmc_grp_use[1] = 0;
373 memset(busbyte, 0, sizeof(busbyte));
374 memset(unituse, 0, sizeof(unituse));
375 for (i = 0; i < n_ev; ++i) {
376 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
377 if (pmc) {
378 if (pmc_inuse & (1 << (pmc - 1)))
379 return -1;
380 pmc_inuse |= 1 << (pmc - 1);
381 /* count 1/2/5/6 vs 3/4/7/8 use */
382 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
383 }
384 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
385 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
386 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
387 if (unit) {
388 if (!pmc)
389 ++pmc_grp_use[byte & 1];
390 if (unit == 6 || unit == 8)
391 /* map alt ISU1/IFU codes: 6->2, 8->3 */
392 unit = (unit >> 1) - 1;
393 if (busbyte[byte] && busbyte[byte] != unit)
394 return -1;
395 busbyte[byte] = unit;
396 lower <<= unit;
397 if (unituse[unit] && lower != (unitlower & lower))
398 return -1;
399 unituse[unit] = 1;
400 unitlower |= lower;
401 }
402 }
403 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
404 return -1;
405
406 /*
407 * Assign resources and set multiplexer selects.
408 *
409 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
410 * Each TTMx can only select one unit, but since
411 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
412 * we have some choices.
413 */
414 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
415 unituse[6] = 1; /* Move 2 to 6 */
416 unituse[2] = 0;
417 }
418 if (unituse[3] & (unituse[1] | unituse[2])) {
419 unituse[8] = 1; /* Move 3 to 8 */
420 unituse[3] = 0;
421 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
422 }
423 /* Check only one unit per TTMx */
424 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
425 unituse[4] + unituse[6] + unituse[7] > 1 ||
426 unituse[8] + unituse[9] > 1 ||
427 (unituse[5] | unituse[10] | unituse[11] |
428 unituse[13] | unituse[14]))
429 return -1;
430
431 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
432 mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
433 mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
434 mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
435
436 /* Set TTCxSEL fields. */
437 if (unitlower & 0xe)
438 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
439 if (unitlower & 0xf0)
440 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
441 if (unitlower & 0xf00)
442 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
443 if (unitlower & 0x7000)
444 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
445
446 /* Set byte lane select fields. */
447 for (byte = 0; byte < 4; ++byte) {
448 unit = busbyte[byte];
449 if (!unit)
450 continue;
451 if (unit == 0xf) {
452 /* special case for GPS */
453 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
454 } else {
455 if (!unituse[unit])
456 ttm = unit - 1; /* 2->1, 3->2 */
457 else
458 ttm = unit >> 2;
459 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
460 }
461 }
462
463 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
464 for (i = 0; i < n_ev; ++i) {
465 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
466 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
467 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
468 psel = event[i] & PM_PMCSEL_MSK;
469 if (!pmc) {
470 /* Bus event or 00xxx direct event (off or cycles) */
471 if (unit)
472 psel |= 0x10 | ((byte & 2) << 2);
473 for (pmc = 0; pmc < 8; ++pmc) {
474 if (pmc_inuse & (1 << pmc))
475 continue;
476 grp = (pmc >> 1) & 1;
477 if (unit) {
478 if (grp == (byte & 1))
479 break;
480 } else if (pmc_grp_use[grp] < 4) {
481 ++pmc_grp_use[grp];
482 break;
483 }
484 }
485 pmc_inuse |= 1 << pmc;
486 } else {
487 /* Direct event */
488 --pmc;
489 if (psel == 0 && (byte & 2))
490 /* add events on higher-numbered bus */
491 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
492 else if (psel == 6 && byte == 3)
493 /* seem to need to set sample_enable here */
494 mmcra |= MMCRA_SAMPLE_ENABLE;
495 psel |= 8;
496 }
497 if (pmc <= 1)
498 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
499 else
500 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
501 if (pmc == 7) /* PMC8 */
502 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
503 hwc[i] = pmc;
504 if (p4_marked_instr_event(event[i]))
505 mmcra |= MMCRA_SAMPLE_ENABLE;
506 }
507
508 if (pmc_inuse & 1)
509 mmcr0 |= MMCR0_PMC1CE;
510 if (pmc_inuse & 0xfe)
511 mmcr0 |= MMCR0_PMCjCE;
512
513 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
514
515 /* Return MMCRx values */
516 mmcr[0] = mmcr0;
517 mmcr[1] = mmcr1;
518 mmcr[2] = mmcra;
519 return 0;
520}
521
522static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
523{
524 /*
525 * Setting the PMCxSEL field to 0 disables PMC x.
526 * (Note that pmc is 0-based here, not 1-based.)
527 */
528 if (pmc <= 1) {
529 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
530 } else {
531 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
532 if (pmc == 7)
533 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
534 }
535}
536
537static int p4_generic_events[] = {
538 [PERF_COUNT_CPU_CYCLES] = 7,
539 [PERF_COUNT_INSTRUCTIONS] = 0x1001,
540 [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
541 [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
542 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
543 [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
544};
545
546struct power_pmu power4_pmu = {
547 .n_counter = 8,
548 .max_alternatives = 5,
549 .add_fields = 0x0000001100005555ull,
550 .test_adder = 0x0011083300000000ull,
551 .compute_mmcr = p4_compute_mmcr,
552 .get_constraint = p4_get_constraint,
553 .get_alternatives = p4_get_alternatives,
554 .disable_pmc = p4_disable_pmc,
555 .n_generic = ARRAY_SIZE(p4_generic_events),
556 .generic_events = p4_generic_events,
557};
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 000000000000..8471e3c2e465
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,630 @@
1/*
2 * Performance counter support for POWER5+/++ (not POWER5) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5+
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
83 *
84 * NC - number of counters
85 * 51: NC error 0x0008_0000_0000_0000
86 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
87 *
88 * G0..G3 - GRS mux constraints
89 * 46-47: GRS_L2SEL value
90 * 44-45: GRS_L3SEL value
91 * 41-44: GRS_MCSEL value
92 * 39-40: GRS_FABSEL value
93 * Note that these match up with their bit positions in MMCR1
94 *
95 * T0 - TTM0 constraint
96 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
97 *
98 * T1 - TTM1 constraint
99 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 33: UC3 error 0x02_0000_0000
103 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
104 * 31: ISU0 events needed 0x01_8000_0000
105 * 30: IDU|GRS events needed 0x00_4000_0000
106 *
107 * B0
108 * 24-27: Byte 0 event source 0x0f00_0000
109 * Encoding as for the event code
110 *
111 * B1, B2, B3
112 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
113 *
114 * P6
115 * 11: P6 error 0x800
116 * 10-11: Count of events needing PMC6
117 *
118 * P1..P5
119 * 0-9: Count of events needing PMC1..PMC5
120 */
121
122static const int grsel_shift[8] = {
123 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
124 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
125 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
126};
127
128/* Masks and values for using events from the various units */
129static u64 unit_cons[PM_LASTUNIT+1][2] = {
130 [PM_FPU] = { 0x3200000000ull, 0x0100000000ull },
131 [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull },
132 [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull },
133 [PM_IFU] = { 0x3200000000ull, 0x2100000000ull },
134 [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull },
135 [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull },
136};
137
138static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp)
139{
140 int pmc, byte, unit, sh;
141 int bit, fmask;
142 u64 mask = 0, value = 0;
143
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) {
146 if (pmc > 6)
147 return -1;
148 sh = (pmc - 1) * 2;
149 mask |= 2 << sh;
150 value |= 1 << sh;
151 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
152 return -1;
153 }
154 if (event & PM_BUSEVENT_MSK) {
155 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
156 if (unit > PM_LASTUNIT)
157 return -1;
158 if (unit == PM_ISU0_ALT)
159 unit = PM_ISU0;
160 mask |= unit_cons[unit][0];
161 value |= unit_cons[unit][1];
162 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
163 if (byte >= 4) {
164 if (unit != PM_LSU1)
165 return -1;
166 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
167 ++unit;
168 byte &= 3;
169 }
170 if (unit == PM_GRS) {
171 bit = event & 7;
172 fmask = (bit == 6)? 7: 3;
173 sh = grsel_shift[bit];
174 mask |= (u64)fmask << sh;
175 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
176 }
177 /* Set byte lane select field */
178 mask |= 0xfULL << (24 - 4 * byte);
179 value |= (u64)unit << (24 - 4 * byte);
180 }
181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ull;
184 value |= 0x1000000000000ull;
185 }
186 *maskp = mask;
187 *valp = value;
188 return 0;
189}
190
191static int power5p_limited_pmc_event(u64 event)
192{
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194
195 return pmc == 5 || pmc == 6;
196}
197
198#define MAX_ALT 3 /* at most 3 alternatives for any event */
199
200static const unsigned int event_alternatives[][MAX_ALT] = {
201 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
202 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
203 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
204 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
212};
213
214/*
215 * Scan the alternatives table for a match and return the
216 * index into the alternatives table if found, else -1.
217 */
218static int find_alternative(unsigned int event)
219{
220 int i, j;
221
222 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
223 if (event < event_alternatives[i][0])
224 break;
225 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
226 if (event == event_alternatives[i][j])
227 return i;
228 }
229 return -1;
230}
231
232static const unsigned char bytedecode_alternatives[4][4] = {
233 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
234 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
235 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
236 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
237};
238
239/*
240 * Some direct events for decodes of event bus byte 3 have alternative
241 * PMCSEL values on other counters. This returns the alternative
242 * event code for those that do, or -1 otherwise. This also handles
243 * alternative PCMSEL values for add events.
244 */
245static s64 find_alternative_bdecode(u64 event)
246{
247 int pmc, altpmc, pp, j;
248
249 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
250 if (pmc == 0 || pmc > 4)
251 return -1;
252 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
253 pp = event & PM_PMCSEL_MSK;
254 for (j = 0; j < 4; ++j) {
255 if (bytedecode_alternatives[pmc - 1][j] == pp) {
256 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
257 (altpmc << PM_PMC_SH) |
258 bytedecode_alternatives[altpmc - 1][j];
259 }
260 }
261
262 /* new decode alternatives for power5+ */
263 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
264 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
265 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
266 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
267
268 /* alternative add event encodings */
269 if (pp == 0x10 || pp == 0x28)
270 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
271 (altpmc << PM_PMC_SH);
272
273 return -1;
274}
275
276static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
277{
278 int i, j, nalt = 1;
279 int nlim;
280 s64 ae;
281
282 alt[0] = event;
283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event);
285 i = find_alternative(event);
286 if (i >= 0) {
287 for (j = 0; j < MAX_ALT; ++j) {
288 ae = event_alternatives[i][j];
289 if (ae && ae != event)
290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae);
292 }
293 } else {
294 ae = find_alternative_bdecode(event);
295 if (ae > 0)
296 alt[nalt++] = ae;
297 }
298
299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /*
301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event.
309 */
310 j = nalt;
311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim;
316 break;
317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf;
319 break;
320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim;
323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009;
327 break;
328 }
329 }
330 nalt = j;
331 }
332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */
335 j = 0;
336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i];
339 ++j;
340 }
341 }
342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */
345 j = 0;
346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i];
349 ++j;
350 }
351 }
352 nalt = j;
353 }
354
355 return nalt;
356}
357
358/*
359 * Map of which direct events on which PMCs are marked instruction events.
360 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
361 * Bit 0 is set if it is marked for all PMCs.
362 * The 0x80 bit indicates a byte decode PMCSEL value.
363 */
364static unsigned char direct_event_is_marked[0x28] = {
365 0, /* 00 */
366 0x1f, /* 01 PM_IOPS_CMPL */
367 0x2, /* 02 PM_MRK_GRP_DISP */
368 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
369 0, /* 04 */
370 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
371 0x80, /* 06 */
372 0x80, /* 07 */
373 0, 0, 0,/* 08 - 0a */
374 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
375 0, /* 0c */
376 0x80, /* 0d */
377 0x80, /* 0e */
378 0, /* 0f */
379 0, /* 10 */
380 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
381 0, /* 12 */
382 0x10, /* 13 PM_MRK_GRP_CMPL */
383 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
384 0x2, /* 15 PM_MRK_GRP_ISSUED */
385 0x80, /* 16 */
386 0x80, /* 17 */
387 0, 0, 0, 0, 0,
388 0x80, /* 1d */
389 0x80, /* 1e */
390 0, /* 1f */
391 0x80, /* 20 */
392 0x80, /* 21 */
393 0x80, /* 22 */
394 0x80, /* 23 */
395 0x80, /* 24 */
396 0x80, /* 25 */
397 0x80, /* 26 */
398 0x80, /* 27 */
399};
400
401/*
402 * Returns 1 if event counts things relating to marked instructions
403 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
404 */
405static int power5p_marked_instr_event(u64 event)
406{
407 int pmc, psel;
408 int bit, byte, unit;
409 u32 mask;
410
411 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
412 psel = event & PM_PMCSEL_MSK;
413 if (pmc >= 5)
414 return 0;
415
416 bit = -1;
417 if (psel < sizeof(direct_event_is_marked)) {
418 if (direct_event_is_marked[psel] & (1 << pmc))
419 return 1;
420 if (direct_event_is_marked[psel] & 0x80)
421 bit = 4;
422 else if (psel == 0x08)
423 bit = pmc - 1;
424 else if (psel == 0x10)
425 bit = 4 - pmc;
426 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
427 bit = 4;
428 } else if ((psel & 0x48) == 0x40) {
429 bit = psel & 7;
430 } else if (psel == 0x28) {
431 bit = pmc - 1;
432 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
433 bit = 4;
434 }
435
436 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
437 return 0;
438
439 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
440 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
441 if (unit == PM_LSU0) {
442 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
443 mask = 0x5dff00;
444 } else if (unit == PM_LSU1 && byte >= 4) {
445 byte -= 4;
446 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
447 mask = 0x5f11c000;
448 } else
449 return 0;
450
451 return (mask >> (byte * 8 + bit)) & 1;
452}
453
454static int power5p_compute_mmcr(u64 event[], int n_ev,
455 unsigned int hwc[], u64 mmcr[])
456{
457 u64 mmcr1 = 0;
458 u64 mmcra = 0;
459 unsigned int pmc, unit, byte, psel;
460 unsigned int ttm;
461 int i, isbus, bit, grsel;
462 unsigned int pmc_inuse = 0;
463 unsigned char busbyte[4];
464 unsigned char unituse[16];
465 int ttmuse;
466
467 if (n_ev > 6)
468 return -1;
469
470 /* First pass to count resource use */
471 memset(busbyte, 0, sizeof(busbyte));
472 memset(unituse, 0, sizeof(unituse));
473 for (i = 0; i < n_ev; ++i) {
474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
475 if (pmc) {
476 if (pmc > 6)
477 return -1;
478 if (pmc_inuse & (1 << (pmc - 1)))
479 return -1;
480 pmc_inuse |= 1 << (pmc - 1);
481 }
482 if (event[i] & PM_BUSEVENT_MSK) {
483 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
484 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
485 if (unit > PM_LASTUNIT)
486 return -1;
487 if (unit == PM_ISU0_ALT)
488 unit = PM_ISU0;
489 if (byte >= 4) {
490 if (unit != PM_LSU1)
491 return -1;
492 ++unit;
493 byte &= 3;
494 }
495 if (busbyte[byte] && busbyte[byte] != unit)
496 return -1;
497 busbyte[byte] = unit;
498 unituse[unit] = 1;
499 }
500 }
501
502 /*
503 * Assign resources and set multiplexer selects.
504 *
505 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
506 * choice we have to deal with.
507 */
508 if (unituse[PM_ISU0] &
509 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
510 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
511 unituse[PM_ISU0] = 0;
512 }
513 /* Set TTM[01]SEL fields. */
514 ttmuse = 0;
515 for (i = PM_FPU; i <= PM_ISU1; ++i) {
516 if (!unituse[i])
517 continue;
518 if (ttmuse++)
519 return -1;
520 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
521 }
522 ttmuse = 0;
523 for (; i <= PM_GRS; ++i) {
524 if (!unituse[i])
525 continue;
526 if (ttmuse++)
527 return -1;
528 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
529 }
530 if (ttmuse > 1)
531 return -1;
532
533 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
534 for (byte = 0; byte < 4; ++byte) {
535 unit = busbyte[byte];
536 if (!unit)
537 continue;
538 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
539 /* get ISU0 through TTM1 rather than TTM0 */
540 unit = PM_ISU0_ALT;
541 } else if (unit == PM_LSU1 + 1) {
542 /* select lower word of LSU1 for this byte */
543 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
544 }
545 ttm = unit >> 2;
546 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
547 }
548
549 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
550 for (i = 0; i < n_ev; ++i) {
551 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
552 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
553 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
554 psel = event[i] & PM_PMCSEL_MSK;
555 isbus = event[i] & PM_BUSEVENT_MSK;
556 if (!pmc) {
557 /* Bus event or any-PMC direct event */
558 for (pmc = 0; pmc < 4; ++pmc) {
559 if (!(pmc_inuse & (1 << pmc)))
560 break;
561 }
562 if (pmc >= 4)
563 return -1;
564 pmc_inuse |= 1 << pmc;
565 } else if (pmc <= 4) {
566 /* Direct event */
567 --pmc;
568 if (isbus && (byte & 2) &&
569 (psel == 8 || psel == 0x10 || psel == 0x28))
570 /* add events on higher-numbered bus */
571 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
572 } else {
573 /* Instructions or run cycles on PMC5/6 */
574 --pmc;
575 }
576 if (isbus && unit == PM_GRS) {
577 bit = psel & 7;
578 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
579 mmcr1 |= (u64)grsel << grsel_shift[bit];
580 }
581 if (power5p_marked_instr_event(event[i]))
582 mmcra |= MMCRA_SAMPLE_ENABLE;
583 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
584 /* select alternate byte lane */
585 psel |= 0x10;
586 if (pmc <= 3)
587 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
588 hwc[i] = pmc;
589 }
590
591 /* Return MMCRx values */
592 mmcr[0] = 0;
593 if (pmc_inuse & 1)
594 mmcr[0] = MMCR0_PMC1CE;
595 if (pmc_inuse & 0x3e)
596 mmcr[0] |= MMCR0_PMCjCE;
597 mmcr[1] = mmcr1;
598 mmcr[2] = mmcra;
599 return 0;
600}
601
602static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
603{
604 if (pmc <= 3)
605 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
606}
607
608static int power5p_generic_events[] = {
609 [PERF_COUNT_CPU_CYCLES] = 0xf,
610 [PERF_COUNT_INSTRUCTIONS] = 0x100009,
611 [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
612 [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
613 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
614 [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
615};
616
617struct power_pmu power5p_pmu = {
618 .n_counter = 6,
619 .max_alternatives = MAX_ALT,
620 .add_fields = 0x7000000000055ull,
621 .test_adder = 0x3000040000000ull,
622 .compute_mmcr = power5p_compute_mmcr,
623 .get_constraint = power5p_get_constraint,
624 .get_alternatives = power5p_get_alternatives,
625 .disable_pmc = power5p_disable_pmc,
626 .n_generic = ARRAY_SIZE(power5p_generic_events),
627 .generic_events = power5p_generic_events,
628 .flags = PPMU_LIMITED_PMC5_6,
629 .limited_pmc_event = power5p_limited_pmc_event,
630};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 000000000000..1b44c5fca189
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,570 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER5 (not POWER5++)
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12 /* Byte number of event bus to use */
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8 /* Storage subsystem mux select */
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
28#define PM_PMCSEL_MSK 0x7f
29
30/* Values in PM_UNIT field */
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42/*
43 * Bits in MMCR1 for POWER5
44 */
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73/*
74 * Bits in MMCRA
75 */
76
77/*
78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
82 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
83 *
84 * T0 - TTM0 constraint
85 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
86 *
87 * T1 - TTM1 constraint
88 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
89 *
90 * NC - number of counters
91 * 51: NC error 0x0008_0000_0000_0000
92 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
93 *
94 * G0..G3 - GRS mux constraints
95 * 46-47: GRS_L2SEL value
96 * 44-45: GRS_L3SEL value
97 * 41-44: GRS_MCSEL value
98 * 39-40: GRS_FABSEL value
99 * Note that these match up with their bit positions in MMCR1
100 *
101 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
102 * 37: UC3 error 0x20_0000_0000
103 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
104 * 35: ISU0 events needed 0x08_0000_0000
105 * 34: IDU|GRS events needed 0x04_0000_0000
106 *
107 * PS1
108 * 33: PS1 error 0x2_0000_0000
109 * 31-32: count of events needing PMC1/2 0x1_8000_0000
110 *
111 * PS2
112 * 30: PS2 error 0x4000_0000
113 * 28-29: count of events needing PMC3/4 0x3000_0000
114 *
115 * B0
116 * 24-27: Byte 0 event source 0x0f00_0000
117 * Encoding as for the event code
118 *
119 * B1, B2, B3
120 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
121 *
122 * P1..P6
123 * 0-11: Count of events needing PMC1..PMC6
124 */
125
126static const int grsel_shift[8] = {
127 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
128 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
129 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
130};
131
132/* Masks and values for using events from the various units */
133static u64 unit_cons[PM_LASTUNIT+1][2] = {
134 [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull },
135 [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull },
136 [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull },
137 [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull },
138 [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull },
139 [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull },
140};
141
142static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp)
143{
144 int pmc, byte, unit, sh;
145 int bit, fmask;
146 u64 mask = 0, value = 0;
147 int grp = -1;
148
149 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
150 if (pmc) {
151 if (pmc > 6)
152 return -1;
153 sh = (pmc - 1) * 2;
154 mask |= 2 << sh;
155 value |= 1 << sh;
156 if (pmc <= 4)
157 grp = (pmc - 1) >> 1;
158 else if (event != 0x500009 && event != 0x600005)
159 return -1;
160 }
161 if (event & PM_BUSEVENT_MSK) {
162 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
163 if (unit > PM_LASTUNIT)
164 return -1;
165 if (unit == PM_ISU0_ALT)
166 unit = PM_ISU0;
167 mask |= unit_cons[unit][0];
168 value |= unit_cons[unit][1];
169 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
170 if (byte >= 4) {
171 if (unit != PM_LSU1)
172 return -1;
173 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
174 ++unit;
175 byte &= 3;
176 }
177 if (unit == PM_GRS) {
178 bit = event & 7;
179 fmask = (bit == 6)? 7: 3;
180 sh = grsel_shift[bit];
181 mask |= (u64)fmask << sh;
182 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
183 }
184 /*
185 * Bus events on bytes 0 and 2 can be counted
186 * on PMC1/2; bytes 1 and 3 on PMC3/4.
187 */
188 if (!pmc)
189 grp = byte & 1;
190 /* Set byte lane select field */
191 mask |= 0xfULL << (24 - 4 * byte);
192 value |= (u64)unit << (24 - 4 * byte);
193 }
194 if (grp == 0) {
195 /* increment PMC1/2 field */
196 mask |= 0x200000000ull;
197 value |= 0x080000000ull;
198 } else if (grp == 1) {
199 /* increment PMC3/4 field */
200 mask |= 0x40000000ull;
201 value |= 0x10000000ull;
202 }
203 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ull;
206 value |= 0x1000000000000ull;
207 }
208 *maskp = mask;
209 *valp = value;
210 return 0;
211}
212
213#define MAX_ALT 3 /* at most 3 alternatives for any event */
214
215static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
220 { 0x300009, 0x400009 }, /* PM_INST_DISP */
221};
222
223/*
224 * Scan the alternatives table for a match and return the
225 * index into the alternatives table if found, else -1.
226 */
227static int find_alternative(u64 event)
228{
229 int i, j;
230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0])
233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j])
236 return i;
237 }
238 return -1;
239}
240
241static const unsigned char bytedecode_alternatives[4][4] = {
242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
246};
247
248/*
249 * Some direct events for decodes of event bus byte 3 have alternative
250 * PMCSEL values on other counters. This returns the alternative
251 * event code for those that do, or -1 otherwise.
252 */
253static s64 find_alternative_bdecode(u64 event)
254{
255 int pmc, altpmc, pp, j;
256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4)
259 return -1;
260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j];
267 }
268 }
269 return -1;
270}
271
272static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
273{
274 int i, j, nalt = 1;
275 s64 ae;
276
277 alt[0] = event;
278 nalt = 1;
279 i = find_alternative(event);
280 if (i >= 0) {
281 for (j = 0; j < MAX_ALT; ++j) {
282 ae = event_alternatives[i][j];
283 if (ae && ae != event)
284 alt[nalt++] = ae;
285 }
286 } else {
287 ae = find_alternative_bdecode(event);
288 if (ae > 0)
289 alt[nalt++] = ae;
290 }
291 return nalt;
292}
293
294/*
295 * Map of which direct events on which PMCs are marked instruction events.
296 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
297 * Bit 0 is set if it is marked for all PMCs.
298 * The 0x80 bit indicates a byte decode PMCSEL value.
299 */
300static unsigned char direct_event_is_marked[0x28] = {
301 0, /* 00 */
302 0x1f, /* 01 PM_IOPS_CMPL */
303 0x2, /* 02 PM_MRK_GRP_DISP */
304 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
305 0, /* 04 */
306 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
307 0x80, /* 06 */
308 0x80, /* 07 */
309 0, 0, 0,/* 08 - 0a */
310 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
311 0, /* 0c */
312 0x80, /* 0d */
313 0x80, /* 0e */
314 0, /* 0f */
315 0, /* 10 */
316 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
317 0, /* 12 */
318 0x10, /* 13 PM_MRK_GRP_CMPL */
319 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
320 0x2, /* 15 PM_MRK_GRP_ISSUED */
321 0x80, /* 16 */
322 0x80, /* 17 */
323 0, 0, 0, 0, 0,
324 0x80, /* 1d */
325 0x80, /* 1e */
326 0, /* 1f */
327 0x80, /* 20 */
328 0x80, /* 21 */
329 0x80, /* 22 */
330 0x80, /* 23 */
331 0x80, /* 24 */
332 0x80, /* 25 */
333 0x80, /* 26 */
334 0x80, /* 27 */
335};
336
337/*
338 * Returns 1 if event counts things relating to marked instructions
339 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
340 */
341static int power5_marked_instr_event(u64 event)
342{
343 int pmc, psel;
344 int bit, byte, unit;
345 u32 mask;
346
347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
348 psel = event & PM_PMCSEL_MSK;
349 if (pmc >= 5)
350 return 0;
351
352 bit = -1;
353 if (psel < sizeof(direct_event_is_marked)) {
354 if (direct_event_is_marked[psel] & (1 << pmc))
355 return 1;
356 if (direct_event_is_marked[psel] & 0x80)
357 bit = 4;
358 else if (psel == 0x08)
359 bit = pmc - 1;
360 else if (psel == 0x10)
361 bit = 4 - pmc;
362 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
363 bit = 4;
364 } else if ((psel & 0x58) == 0x40)
365 bit = psel & 7;
366
367 if (!(event & PM_BUSEVENT_MSK))
368 return 0;
369
370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
372 if (unit == PM_LSU0) {
373 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
374 mask = 0x5dff00;
375 } else if (unit == PM_LSU1 && byte >= 4) {
376 byte -= 4;
377 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
378 mask = 0x5f00c0aa;
379 } else
380 return 0;
381
382 return (mask >> (byte * 8 + bit)) & 1;
383}
384
385static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], u64 mmcr[])
387{
388 u64 mmcr1 = 0;
389 u64 mmcra = 0;
390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4];
396 unsigned char unituse[16];
397 int ttmuse;
398
399 if (n_ev > 6)
400 return -1;
401
402 /* First pass to count resource use */
403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) {
409 if (pmc > 6)
410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1;
413 pmc_inuse |= 1 << (pmc - 1);
414 /* count 1/2 vs 3/4 use */
415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1];
417 }
418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT)
422 return -1;
423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0;
425 if (byte >= 4) {
426 if (unit != PM_LSU1)
427 return -1;
428 ++unit;
429 byte &= 3;
430 }
431 if (!pmc)
432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1;
435 busbyte[byte] = unit;
436 unituse[unit] = 1;
437 }
438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1;
441
442 /*
443 * Assign resources and set multiplexer selects.
444 *
445 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
446 * choice we have to deal with.
447 */
448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
451 unituse[PM_ISU0] = 0;
452 }
453 /* Set TTM[01]SEL fields. */
454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i])
457 continue;
458 if (ttmuse++)
459 return -1;
460 mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
461 }
462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i])
465 continue;
466 if (ttmuse++)
467 return -1;
468 mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
469 }
470 if (ttmuse > 1)
471 return -1;
472
473 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte];
476 if (!unit)
477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479 /* get ISU0 through TTM1 rather than TTM0 */
480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) {
482 /* select lower word of LSU1 for this byte */
483 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
484 }
485 ttm = unit >> 2;
486 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
487 }
488
489 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
490 for (i = 0; i < n_ev; ++i) {
491 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
492 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
493 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
494 psel = event[i] & PM_PMCSEL_MSK;
495 isbus = event[i] & PM_BUSEVENT_MSK;
496 if (!pmc) {
497 /* Bus event or any-PMC direct event */
498 for (pmc = 0; pmc < 4; ++pmc) {
499 if (pmc_inuse & (1 << pmc))
500 continue;
501 grp = (pmc >> 1) & 1;
502 if (isbus) {
503 if (grp == (byte & 1))
504 break;
505 } else if (pmc_grp_use[grp] < 2) {
506 ++pmc_grp_use[grp];
507 break;
508 }
509 }
510 pmc_inuse |= 1 << pmc;
511 } else if (pmc <= 4) {
512 /* Direct event */
513 --pmc;
514 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
515 /* add events on higher-numbered bus */
516 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
517 } else {
518 /* Instructions or run cycles on PMC5/6 */
519 --pmc;
520 }
521 if (isbus && unit == PM_GRS) {
522 bit = psel & 7;
523 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
524 mmcr1 |= (u64)grsel << grsel_shift[bit];
525 }
526 if (power5_marked_instr_event(event[i]))
527 mmcra |= MMCRA_SAMPLE_ENABLE;
528 if (pmc <= 3)
529 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
530 hwc[i] = pmc;
531 }
532
533 /* Return MMCRx values */
534 mmcr[0] = 0;
535 if (pmc_inuse & 1)
536 mmcr[0] = MMCR0_PMC1CE;
537 if (pmc_inuse & 0x3e)
538 mmcr[0] |= MMCR0_PMCjCE;
539 mmcr[1] = mmcr1;
540 mmcr[2] = mmcra;
541 return 0;
542}
543
544static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
545{
546 if (pmc <= 3)
547 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
548}
549
550static int power5_generic_events[] = {
551 [PERF_COUNT_CPU_CYCLES] = 0xf,
552 [PERF_COUNT_INSTRUCTIONS] = 0x100009,
553 [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
554 [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
555 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
556 [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
557};
558
559struct power_pmu power5_pmu = {
560 .n_counter = 6,
561 .max_alternatives = MAX_ALT,
562 .add_fields = 0x7000090000555ull,
563 .test_adder = 0x3000490000000ull,
564 .compute_mmcr = power5_compute_mmcr,
565 .get_constraint = power5_get_constraint,
566 .get_alternatives = power5_get_alternatives,
567 .disable_pmc = power5_disable_pmc,
568 .n_generic = ARRAY_SIZE(power5_generic_events),
569 .generic_events = power5_generic_events,
570};
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 000000000000..cd4fbe06c35d
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,490 @@
1/*
2 * Performance counter support for POWER6 processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for POWER6
17 */
18#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0x7
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
22#define PM_UNIT_MSK 0xf
23#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
24#define PM_LLAV 0x8000 /* Load lookahead match value */
25#define PM_LLA 0x4000 /* Load lookahead match enable */
26#define PM_BYTE_SH 12 /* Byte of event bus to use */
27#define PM_BYTE_MSK 3
28#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
29#define PM_SUBUNIT_MSK 7
30#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
31#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
32#define PM_BUSEVENT_MSK 0xf3700
33
34/*
35 * Bits in MMCR1 for POWER6
36 */
37#define MMCR1_TTM0SEL_SH 60
38#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
39#define MMCR1_TTMSEL_MSK 0xf
40#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
41#define MMCR1_NESTSEL_SH 45
42#define MMCR1_NESTSEL_MSK 0x7
43#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
44#define MMCR1_PMC1_LLA ((u64)1 << 44)
45#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39)
46#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35)
47#define MMCR1_PMC1SEL_SH 24
48#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
49#define MMCR1_PMCSEL_MSK 0xff
50
51/*
52 * Map of which direct events on which PMCs are marked instruction events.
53 * Indexed by PMCSEL value >> 1.
54 * Bottom 4 bits are a map of which PMCs are interesting,
55 * top 4 bits say what sort of event:
56 * 0 = direct marked event,
57 * 1 = byte decode event,
58 * 4 = add/and event (PMC1 -> bits 0 & 4),
59 * 5 = add/and event (PMC1 -> bits 1 & 5),
60 * 6 = add/and event (PMC1 -> bits 2 & 6),
61 * 7 = add/and event (PMC1 -> bits 3 & 7).
62 */
63static unsigned char direct_event_is_marked[0x60 >> 1] = {
64 0, /* 00 */
65 0, /* 02 */
66 0, /* 04 */
67 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
68 0x04, /* 08 PM_MRK_DFU_FIN */
69 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
70 0, /* 0c */
71 0, /* 0e */
72 0x02, /* 10 PM_MRK_INST_DISP */
73 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
74 0, /* 14 */
75 0, /* 16 */
76 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
77 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
78 0x01, /* 1c PM_MRK_INST_ISSUED */
79 0, /* 1e */
80 0, /* 20 */
81 0, /* 22 */
82 0, /* 24 */
83 0, /* 26 */
84 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
85 0, /* 2a */
86 0, /* 2c */
87 0, /* 2e */
88 0x4f, /* 30 */
89 0x7f, /* 32 */
90 0x4f, /* 34 */
91 0x5f, /* 36 */
92 0x6f, /* 38 */
93 0x4f, /* 3a */
94 0, /* 3c */
95 0x08, /* 3e PM_MRK_INST_TIMEO */
96 0x1f, /* 40 */
97 0x1f, /* 42 */
98 0x1f, /* 44 */
99 0x1f, /* 46 */
100 0x1f, /* 48 */
101 0x1f, /* 4a */
102 0x1f, /* 4c */
103 0x1f, /* 4e */
104 0, /* 50 */
105 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
106 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
107 0x02, /* 56 PM_MRK_LD_MISS_L1 */
108 0, /* 58 */
109 0, /* 5a */
110 0, /* 5c */
111 0, /* 5e */
112};
113
114/*
115 * Masks showing for each unit which bits are marked events.
116 * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
117 */
118static u32 marked_bus_events[16] = {
119 0x01000000, /* direct events set 1: byte 3 bit 0 */
120 0x00010000, /* direct events set 2: byte 2 bit 0 */
121 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
122 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
123 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
124 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
125 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
126 0, /* LSU set 3 */
127 0x00000010, /* VMX set 3: byte 0 bit 4 */
128 0, /* BFP set 1 */
129 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
130 0, 0
131};
132
133/*
134 * Returns 1 if event counts things relating to marked instructions
135 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
136 */
137static int power6_marked_instr_event(u64 event)
138{
139 int pmc, psel, ptype;
140 int bit, byte, unit;
141 u32 mask;
142
143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
144 psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
145 if (pmc >= 5)
146 return 0;
147
148 bit = -1;
149 if (psel < sizeof(direct_event_is_marked)) {
150 ptype = direct_event_is_marked[psel];
151 if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
152 return 0;
153 ptype >>= 4;
154 if (ptype == 0)
155 return 1;
156 if (ptype == 1)
157 bit = 0;
158 else
159 bit = ptype ^ (pmc - 1);
160 } else if ((psel & 0x48) == 0x40)
161 bit = psel & 7;
162
163 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
164 return 0;
165
166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
167 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
168 mask = marked_bus_events[unit];
169 return (mask >> (byte * 8 + bit)) & 1;
170}
171
172/*
173 * Assign PMC numbers and compute MMCR1 value for a set of events
174 */
175static int p6_compute_mmcr(u64 event[], int n_ev,
176 unsigned int hwc[], u64 mmcr[])
177{
178 u64 mmcr1 = 0;
179 u64 mmcra = 0;
180 int i;
181 unsigned int pmc, ev, b, u, s, psel;
182 unsigned int ttmset = 0;
183 unsigned int pmc_inuse = 0;
184
185 if (n_ev > 6)
186 return -1;
187 for (i = 0; i < n_ev; ++i) {
188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
189 if (pmc) {
190 if (pmc_inuse & (1 << (pmc - 1)))
191 return -1; /* collision! */
192 pmc_inuse |= 1 << (pmc - 1);
193 }
194 }
195 for (i = 0; i < n_ev; ++i) {
196 ev = event[i];
197 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
198 if (pmc) {
199 --pmc;
200 } else {
201 /* can go on any PMC; find a free one */
202 for (pmc = 0; pmc < 4; ++pmc)
203 if (!(pmc_inuse & (1 << pmc)))
204 break;
205 if (pmc >= 4)
206 return -1;
207 pmc_inuse |= 1 << pmc;
208 }
209 hwc[i] = pmc;
210 psel = ev & PM_PMCSEL_MSK;
211 if (ev & PM_BUSEVENT_MSK) {
212 /* this event uses the event bus */
213 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
214 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
215 /* check for conflict on this byte of event bus */
216 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
217 return -1;
218 mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
219 ttmset |= 1 << b;
220 if (u == 5) {
221 /* Nest events have a further mux */
222 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
223 if ((ttmset & 0x10) &&
224 MMCR1_NESTSEL(mmcr1) != s)
225 return -1;
226 ttmset |= 0x10;
227 mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
228 }
229 if (0x30 <= psel && psel <= 0x3d) {
230 /* these need the PMCx_ADDR_SEL bits */
231 if (b >= 2)
232 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
233 }
234 /* bus select values are different for PMC3/4 */
235 if (pmc >= 2 && (psel & 0x90) == 0x80)
236 psel ^= 0x20;
237 }
238 if (ev & PM_LLA) {
239 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
240 if (ev & PM_LLAV)
241 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
242 }
243 if (power6_marked_instr_event(event[i]))
244 mmcra |= MMCRA_SAMPLE_ENABLE;
245 if (pmc < 4)
246 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
247 }
248 mmcr[0] = 0;
249 if (pmc_inuse & 1)
250 mmcr[0] = MMCR0_PMC1CE;
251 if (pmc_inuse & 0xe)
252 mmcr[0] |= MMCR0_PMCjCE;
253 mmcr[1] = mmcr1;
254 mmcr[2] = mmcra;
255 return 0;
256}
257
258/*
259 * Layout of constraint bits:
260 *
261 * 0-1 add field: number of uses of PMC1 (max 1)
262 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
263 * 12-15 add field: number of uses of PMC1-4 (max 4)
264 * 16-19 select field: unit on byte 0 of event bus
265 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
266 * 32-34 select field: nest (subunit) event selector
267 */
268static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp)
269{
270 int pmc, byte, sh, subunit;
271 u64 mask = 0, value = 0;
272
273 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
274 if (pmc) {
275 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
276 return -1;
277 sh = (pmc - 1) * 2;
278 mask |= 2 << sh;
279 value |= 1 << sh;
280 }
281 if (event & PM_BUSEVENT_MSK) {
282 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
283 sh = byte * 4 + (16 - PM_UNIT_SH);
284 mask |= PM_UNIT_MSKS << sh;
285 value |= (u64)(event & PM_UNIT_MSKS) << sh;
286 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
287 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
288 mask |= (u64)PM_SUBUNIT_MSK << 32;
289 value |= (u64)subunit << 32;
290 }
291 }
292 if (pmc <= 4) {
293 mask |= 0x8000; /* add field for count of PMC1-4 uses */
294 value |= 0x1000;
295 }
296 *maskp = mask;
297 *valp = value;
298 return 0;
299}
300
301static int p6_limited_pmc_event(u64 event)
302{
303 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
304
305 return pmc == 5 || pmc == 6;
306}
307
308#define MAX_ALT 4 /* at most 4 alternatives for any event */
309
310static const unsigned int event_alternatives[][MAX_ALT] = {
311 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
312 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
313 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
314 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
315 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
316 { 0x10000e, 0x400010 }, /* PM_PURR */
317 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
318 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
319 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
320 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
321 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
322 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
323 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
324 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
325 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
326 { 0x200012, 0x300012 }, /* PM_INST_DISP */
327 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
328 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
329 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
330 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
331 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
332 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
333 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
334};
335
336/*
337 * This could be made more efficient with a binary search on
338 * a presorted list, if necessary
339 */
340static int find_alternatives_list(u64 event)
341{
342 int i, j;
343 unsigned int alt;
344
345 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
346 if (event < event_alternatives[i][0])
347 return -1;
348 for (j = 0; j < MAX_ALT; ++j) {
349 alt = event_alternatives[i][j];
350 if (!alt || event < alt)
351 break;
352 if (event == alt)
353 return i;
354 }
355 }
356 return -1;
357}
358
359static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
360{
361 int i, j, nlim;
362 unsigned int psel, pmc;
363 unsigned int nalt = 1;
364 u64 aevent;
365
366 alt[0] = event;
367 nlim = p6_limited_pmc_event(event);
368
369 /* check the alternatives table */
370 i = find_alternatives_list(event);
371 if (i >= 0) {
372 /* copy out alternatives from list */
373 for (j = 0; j < MAX_ALT; ++j) {
374 aevent = event_alternatives[i][j];
375 if (!aevent)
376 break;
377 if (aevent != event)
378 alt[nalt++] = aevent;
379 nlim += p6_limited_pmc_event(aevent);
380 }
381
382 } else {
383 /* Check for alternative ways of computing sum events */
384 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
385 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
386 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
387 if (pmc && (psel == 0x32 || psel == 0x34))
388 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
389 ((5 - pmc) << PM_PMC_SH);
390
391 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
392 if (pmc && (psel == 0x38 || psel == 0x3a))
393 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
394 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
395 }
396
397 if (flags & PPMU_ONLY_COUNT_RUN) {
398 /*
399 * We're only counting in RUN state,
400 * so PM_CYC is equivalent to PM_RUN_CYC,
401 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
402 * This doesn't include alternatives that don't provide
403 * any extra flexibility in assigning PMCs (e.g.
404 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
405 * Note that even with these additional alternatives
406 * we never end up with more than 4 alternatives for any event.
407 */
408 j = nalt;
409 for (i = 0; i < nalt; ++i) {
410 switch (alt[i]) {
411 case 0x1e: /* PM_CYC */
412 alt[j++] = 0x600005; /* PM_RUN_CYC */
413 ++nlim;
414 break;
415 case 0x10000a: /* PM_RUN_CYC */
416 alt[j++] = 0x1e; /* PM_CYC */
417 break;
418 case 2: /* PM_INST_CMPL */
419 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
420 ++nlim;
421 break;
422 case 0x500009: /* PM_RUN_INST_CMPL */
423 alt[j++] = 2; /* PM_INST_CMPL */
424 break;
425 case 0x10000e: /* PM_PURR */
426 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
427 break;
428 case 0x4000f4: /* PM_RUN_PURR */
429 alt[j++] = 0x10000e; /* PM_PURR */
430 break;
431 }
432 }
433 nalt = j;
434 }
435
436 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
437 /* remove the limited PMC events */
438 j = 0;
439 for (i = 0; i < nalt; ++i) {
440 if (!p6_limited_pmc_event(alt[i])) {
441 alt[j] = alt[i];
442 ++j;
443 }
444 }
445 nalt = j;
446 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
447 /* remove all but the limited PMC events */
448 j = 0;
449 for (i = 0; i < nalt; ++i) {
450 if (p6_limited_pmc_event(alt[i])) {
451 alt[j] = alt[i];
452 ++j;
453 }
454 }
455 nalt = j;
456 }
457
458 return nalt;
459}
460
461static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
462{
463 /* Set PMCxSEL to 0 to disable PMCx */
464 if (pmc <= 3)
465 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
466}
467
468static int power6_generic_events[] = {
469 [PERF_COUNT_CPU_CYCLES] = 0x1e,
470 [PERF_COUNT_INSTRUCTIONS] = 2,
471 [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
472 [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
473 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
474 [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
475};
476
477struct power_pmu power6_pmu = {
478 .n_counter = 6,
479 .max_alternatives = MAX_ALT,
480 .add_fields = 0x1555,
481 .test_adder = 0x3000,
482 .compute_mmcr = p6_compute_mmcr,
483 .get_constraint = p6_get_constraint,
484 .get_alternatives = p6_get_alternatives,
485 .disable_pmc = p6_disable_pmc,
486 .n_generic = ARRAY_SIZE(power6_generic_events),
487 .generic_events = power6_generic_events,
488 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
489 .limited_pmc_event = p6_limited_pmc_event,
490};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 000000000000..eed47c4523f1
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,441 @@
1/*
2 * Performance counter support for PPC970-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <asm/reg.h>
14
15/*
16 * Bits in event code for PPC970
17 */
18#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
19#define PM_PMC_MSK 0xf
20#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
21#define PM_UNIT_MSK 0xf
22#define PM_SPCSEL_SH 6
23#define PM_SPCSEL_MSK 3
24#define PM_BYTE_SH 4 /* Byte number of event bus to use */
25#define PM_BYTE_MSK 3
26#define PM_PMCSEL_MSK 0xf
27
28/* Values in PM_UNIT field */
29#define PM_NONE 0
30#define PM_FPU 1
31#define PM_VPU 2
32#define PM_ISU 3
33#define PM_IFU 4
34#define PM_IDU 5
35#define PM_STS 6
36#define PM_LSU0 7
37#define PM_LSU1U 8
38#define PM_LSU1L 9
39#define PM_LASTUNIT 9
40
41/*
42 * Bits in MMCR0 for PPC970
43 */
44#define MMCR0_PMC1SEL_SH 8
45#define MMCR0_PMC2SEL_SH 1
46#define MMCR_PMCSEL_MSK 0x1f
47
48/*
49 * Bits in MMCR1 for PPC970
50 */
51#define MMCR1_TTM0SEL_SH 62
52#define MMCR1_TTM1SEL_SH 59
53#define MMCR1_TTM3SEL_SH 53
54#define MMCR1_TTMSEL_MSK 3
55#define MMCR1_TD_CP_DBG0SEL_SH 50
56#define MMCR1_TD_CP_DBG1SEL_SH 48
57#define MMCR1_TD_CP_DBG2SEL_SH 46
58#define MMCR1_TD_CP_DBG3SEL_SH 44
59#define MMCR1_PMC1_ADDER_SEL_SH 39
60#define MMCR1_PMC2_ADDER_SEL_SH 38
61#define MMCR1_PMC6_ADDER_SEL_SH 37
62#define MMCR1_PMC5_ADDER_SEL_SH 36
63#define MMCR1_PMC8_ADDER_SEL_SH 35
64#define MMCR1_PMC7_ADDER_SEL_SH 34
65#define MMCR1_PMC3_ADDER_SEL_SH 33
66#define MMCR1_PMC4_ADDER_SEL_SH 32
67#define MMCR1_PMC3SEL_SH 27
68#define MMCR1_PMC4SEL_SH 22
69#define MMCR1_PMC5SEL_SH 17
70#define MMCR1_PMC6SEL_SH 12
71#define MMCR1_PMC7SEL_SH 7
72#define MMCR1_PMC8SEL_SH 2
73
74static short mmcr1_adder_bits[8] = {
75 MMCR1_PMC1_ADDER_SEL_SH,
76 MMCR1_PMC2_ADDER_SEL_SH,
77 MMCR1_PMC3_ADDER_SEL_SH,
78 MMCR1_PMC4_ADDER_SEL_SH,
79 MMCR1_PMC5_ADDER_SEL_SH,
80 MMCR1_PMC6_ADDER_SEL_SH,
81 MMCR1_PMC7_ADDER_SEL_SH,
82 MMCR1_PMC8_ADDER_SEL_SH
83};
84
85/*
86 * Bits in MMCRA
87 */
88
89/*
90 * Layout of constraint bits:
91 * 6666555555555544444444443333333333222222222211111111110000000000
92 * 3210987654321098765432109876543210987654321098765432109876543210
93 * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
94 * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
95 *
96 * SP - SPCSEL constraint
97 * 48-49: SPCSEL value 0x3_0000_0000_0000
98 *
99 * T0 - TTM0 constraint
100 * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
101 *
102 * T1 - TTM1 constraint
103 * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
104 *
105 * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
106 * 43: UC3 error 0x0800_0000_0000
107 * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
108 * 41: ISU events needed 0x0200_0000_0000
109 * 40: IDU|STS events needed 0x0100_0000_0000
110 *
111 * PS1
112 * 39: PS1 error 0x0080_0000_0000
113 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
114 *
115 * PS2
116 * 35: PS2 error 0x0008_0000_0000
117 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
118 *
119 * B0
120 * 28-31: Byte 0 event source 0xf000_0000
121 * Encoding as for the event code
122 *
123 * B1, B2, B3
124 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
125 *
126 * P1
127 * 15: P1 error 0x8000
128 * 14-15: Count of events needing PMC1
129 *
130 * P2..P8
131 * 0-13: Count of events needing PMC2..PMC8
132 */
133
134static unsigned char direct_marked_event[8] = {
135 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
136 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
137 (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
138 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
139 (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
140 (1<<3) | (1<<4) | (1<<5),
141 /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
142 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
143 (1<<4) /* PMC8: PM_MRK_LSU_FIN */
144};
145
146/*
147 * Returns 1 if event counts things relating to marked instructions
148 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
149 */
150static int p970_marked_instr_event(u64 event)
151{
152 int pmc, psel, unit, byte, bit;
153 unsigned int mask;
154
155 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
156 psel = event & PM_PMCSEL_MSK;
157 if (pmc) {
158 if (direct_marked_event[pmc - 1] & (1 << psel))
159 return 1;
160 if (psel == 0) /* add events */
161 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
162 else if (psel == 7 || psel == 13) /* decode events */
163 bit = 4;
164 else
165 return 0;
166 } else
167 bit = psel;
168
169 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
170 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
171 mask = 0;
172 switch (unit) {
173 case PM_VPU:
174 mask = 0x4c; /* byte 0 bits 2,3,6 */
175 case PM_LSU0:
176 /* byte 2 bits 0,2,3,4,6; all of byte 1 */
177 mask = 0x085dff00;
178 case PM_LSU1L:
179 mask = 0x50 << 24; /* byte 3 bits 4,6 */
180 break;
181 }
182 return (mask >> (byte * 8 + bit)) & 1;
183}
184
185/* Masks and values for using events from the various units */
186static u64 unit_cons[PM_LASTUNIT+1][2] = {
187 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
188 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
189 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
190 [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
191 [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
192 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
193};
194
195static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp)
196{
197 int pmc, byte, unit, sh, spcsel;
198 u64 mask = 0, value = 0;
199 int grp = -1;
200
201 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
202 if (pmc) {
203 if (pmc > 8)
204 return -1;
205 sh = (pmc - 1) * 2;
206 mask |= 2 << sh;
207 value |= 1 << sh;
208 grp = ((pmc - 1) >> 1) & 1;
209 }
210 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
211 if (unit) {
212 if (unit > PM_LASTUNIT)
213 return -1;
214 mask |= unit_cons[unit][0];
215 value |= unit_cons[unit][1];
216 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
217 /*
218 * Bus events on bytes 0 and 2 can be counted
219 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
220 */
221 if (!pmc)
222 grp = byte & 1;
223 /* Set byte lane select field */
224 mask |= 0xfULL << (28 - 4 * byte);
225 value |= (u64)unit << (28 - 4 * byte);
226 }
227 if (grp == 0) {
228 /* increment PMC1/2/5/6 field */
229 mask |= 0x8000000000ull;
230 value |= 0x1000000000ull;
231 } else if (grp == 1) {
232 /* increment PMC3/4/7/8 field */
233 mask |= 0x800000000ull;
234 value |= 0x100000000ull;
235 }
236 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
237 if (spcsel) {
238 mask |= 3ull << 48;
239 value |= (u64)spcsel << 48;
240 }
241 *maskp = mask;
242 *valp = value;
243 return 0;
244}
245
246static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
247{
248 alt[0] = event;
249
250 /* 2 alternatives for LSU empty */
251 if (event == 0x2002 || event == 0x3002) {
252 alt[1] = event ^ 0x1000;
253 return 2;
254 }
255
256 return 1;
257}
258
259static int p970_compute_mmcr(u64 event[], int n_ev,
260 unsigned int hwc[], u64 mmcr[])
261{
262 u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
263 unsigned int pmc, unit, byte, psel;
264 unsigned int ttm, grp;
265 unsigned int pmc_inuse = 0;
266 unsigned int pmc_grp_use[2];
267 unsigned char busbyte[4];
268 unsigned char unituse[16];
269 unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
270 unsigned char ttmuse[2];
271 unsigned char pmcsel[8];
272 int i;
273 int spcsel;
274
275 if (n_ev > 8)
276 return -1;
277
278 /* First pass to count resource use */
279 pmc_grp_use[0] = pmc_grp_use[1] = 0;
280 memset(busbyte, 0, sizeof(busbyte));
281 memset(unituse, 0, sizeof(unituse));
282 for (i = 0; i < n_ev; ++i) {
283 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
284 if (pmc) {
285 if (pmc_inuse & (1 << (pmc - 1)))
286 return -1;
287 pmc_inuse |= 1 << (pmc - 1);
288 /* count 1/2/5/6 vs 3/4/7/8 use */
289 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
290 }
291 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
292 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
293 if (unit) {
294 if (unit > PM_LASTUNIT)
295 return -1;
296 if (!pmc)
297 ++pmc_grp_use[byte & 1];
298 if (busbyte[byte] && busbyte[byte] != unit)
299 return -1;
300 busbyte[byte] = unit;
301 unituse[unit] = 1;
302 }
303 }
304 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
305 return -1;
306
307 /*
308 * Assign resources and set multiplexer selects.
309 *
310 * PM_ISU can go either on TTM0 or TTM1, but that's the only
311 * choice we have to deal with.
312 */
313 if (unituse[PM_ISU] &
314 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
315 unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
316 /* Set TTM[01]SEL fields. */
317 ttmuse[0] = ttmuse[1] = 0;
318 for (i = PM_FPU; i <= PM_STS; ++i) {
319 if (!unituse[i])
320 continue;
321 ttm = unitmap[i];
322 ++ttmuse[(ttm >> 2) & 1];
323 mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
324 }
325 /* Check only one unit per TTMx */
326 if (ttmuse[0] > 1 || ttmuse[1] > 1)
327 return -1;
328
329 /* Set byte lane select fields and TTM3SEL. */
330 for (byte = 0; byte < 4; ++byte) {
331 unit = busbyte[byte];
332 if (!unit)
333 continue;
334 if (unit <= PM_STS)
335 ttm = (unitmap[unit] >> 2) & 1;
336 else if (unit == PM_LSU0)
337 ttm = 2;
338 else {
339 ttm = 3;
340 if (unit == PM_LSU1L && byte >= 2)
341 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
342 }
343 mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
344 }
345
346 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
347 memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
348 for (i = 0; i < n_ev; ++i) {
349 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
350 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
351 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
352 psel = event[i] & PM_PMCSEL_MSK;
353 if (!pmc) {
354 /* Bus event or any-PMC direct event */
355 if (unit)
356 psel |= 0x10 | ((byte & 2) << 2);
357 else
358 psel |= 8;
359 for (pmc = 0; pmc < 8; ++pmc) {
360 if (pmc_inuse & (1 << pmc))
361 continue;
362 grp = (pmc >> 1) & 1;
363 if (unit) {
364 if (grp == (byte & 1))
365 break;
366 } else if (pmc_grp_use[grp] < 4) {
367 ++pmc_grp_use[grp];
368 break;
369 }
370 }
371 pmc_inuse |= 1 << pmc;
372 } else {
373 /* Direct event */
374 --pmc;
375 if (psel == 0 && (byte & 2))
376 /* add events on higher-numbered bus */
377 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
378 }
379 pmcsel[pmc] = psel;
380 hwc[i] = pmc;
381 spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
382 mmcr1 |= spcsel;
383 if (p970_marked_instr_event(event[i]))
384 mmcra |= MMCRA_SAMPLE_ENABLE;
385 }
386 for (pmc = 0; pmc < 2; ++pmc)
387 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
388 for (; pmc < 8; ++pmc)
389 mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
390 if (pmc_inuse & 1)
391 mmcr0 |= MMCR0_PMC1CE;
392 if (pmc_inuse & 0xfe)
393 mmcr0 |= MMCR0_PMCjCE;
394
395 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
396
397 /* Return MMCRx values */
398 mmcr[0] = mmcr0;
399 mmcr[1] = mmcr1;
400 mmcr[2] = mmcra;
401 return 0;
402}
403
404static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
405{
406 int shift, i;
407
408 if (pmc <= 1) {
409 shift = MMCR0_PMC1SEL_SH - 7 * pmc;
410 i = 0;
411 } else {
412 shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
413 i = 1;
414 }
415 /*
416 * Setting the PMCxSEL field to 0x08 disables PMC x.
417 */
418 mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
419}
420
421static int ppc970_generic_events[] = {
422 [PERF_COUNT_CPU_CYCLES] = 7,
423 [PERF_COUNT_INSTRUCTIONS] = 1,
424 [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
425 [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
426 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
427 [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
428};
429
430struct power_pmu ppc970_pmu = {
431 .n_counter = 8,
432 .max_alternatives = 2,
433 .add_fields = 0x001100005555ull,
434 .test_adder = 0x013300000000ull,
435 .compute_mmcr = p970_compute_mmcr,
436 .get_constraint = p970_get_constraint,
437 .get_alternatives = p970_get_alternatives,
438 .disable_pmc = p970_disable_pmc,
439 .n_generic = ARRAY_SIZE(ppc970_generic_events),
440 .generic_events = ppc970_generic_events,
441};
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 76993941cac9..ac0e112031b2 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kdebug.h> 31#include <linux/kdebug.h>
32#include <linux/perf_counter.h>
32 33
33#include <asm/firmware.h> 34#include <asm/firmware.h>
34#include <asm/page.h> 35#include <asm/page.h>
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
170 die("Weird page fault", regs, SIGSEGV); 171 die("Weird page fault", regs, SIGSEGV);
171 } 172 }
172 173
174 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
175
173 /* When running in the kernel we expect faults to occur only to 176 /* When running in the kernel we expect faults to occur only to
174 * addresses in user space. All other faults represent errors in the 177 * addresses in user space. All other faults represent errors in the
175 * kernel and should generate an OOPS. Unfortunately, in the case of an 178 * kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -309,6 +312,8 @@ good_area:
309 } 312 }
310 if (ret & VM_FAULT_MAJOR) { 313 if (ret & VM_FAULT_MAJOR) {
311 current->maj_flt++; 314 current->maj_flt++;
315 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
316 regs, address);
312#ifdef CONFIG_PPC_SMLPAR 317#ifdef CONFIG_PPC_SMLPAR
313 if (firmware_has_feature(FW_FEATURE_CMO)) { 318 if (firmware_has_feature(FW_FEATURE_CMO)) {
314 preempt_disable(); 319 preempt_disable();
@@ -316,8 +321,11 @@ good_area:
316 preempt_enable(); 321 preempt_enable();
317 } 322 }
318#endif 323#endif
319 } else 324 } else {
320 current->min_flt++; 325 current->min_flt++;
326 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
327 regs, address);
328 }
321 up_read(&mm->mmap_sem); 329 up_read(&mm->mmap_sem);
322 return 0; 330 return 0;
323 331
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9da795e49337..732ee93a8e98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
1config PPC64 1config PPC64
2 bool "64-bit kernel" 2 bool "64-bit kernel"
3 default n 3 default n
4 select HAVE_PERF_COUNTERS
4 help 5 help
5 This option selects whether a 32-bit or a 64-bit kernel 6 This option selects whether a 32-bit or a 64-bit kernel
6 will be built. 7 will be built.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a6efe0a2e9ae..cfd78c87a573 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -740,6 +740,7 @@ config X86_UP_IOAPIC
740config X86_LOCAL_APIC 740config X86_LOCAL_APIC
741 def_bool y 741 def_bool y
742 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 742 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
743 select HAVE_PERF_COUNTERS if (!M386 && !M486)
743 744
744config X86_IO_APIC 745config X86_IO_APIC
745 def_bool y 746 def_bool y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a505202086e8..e590261ba059 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -825,9 +825,11 @@ ia32_sys_call_table:
825 .quad compat_sys_signalfd4 825 .quad compat_sys_signalfd4
826 .quad sys_eventfd2 826 .quad sys_eventfd2
827 .quad sys_epoll_create1 827 .quad sys_epoll_create1
828 .quad sys_dup3 /* 330 */ 828 .quad sys_dup3 /* 330 */
829 .quad sys_pipe2 829 .quad sys_pipe2
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv 831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev 832 .quad compat_sys_pwritev
833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
834 .quad sys_perf_counter_open
833ia32_syscall_end: 835ia32_syscall_end:
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fba4229..aff9f1fcdcd7 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
247#define smp_mb__before_atomic_inc() barrier() 247#define smp_mb__before_atomic_inc() barrier()
248#define smp_mb__after_atomic_inc() barrier() 248#define smp_mb__after_atomic_inc() barrier()
249 249
250/* An 64bit atomic type */
251
252typedef struct {
253 unsigned long long counter;
254} atomic64_t;
255
256#define ATOMIC64_INIT(val) { (val) }
257
258/**
259 * atomic64_read - read atomic64 variable
260 * @v: pointer of type atomic64_t
261 *
262 * Atomically reads the value of @v.
263 * Doesn't imply a read memory barrier.
264 */
265#define __atomic64_read(ptr) ((ptr)->counter)
266
267static inline unsigned long long
268cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
269{
270 asm volatile(
271
272 LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
273
274 : "=A" (old)
275
276 : [ptr] "D" (ptr),
277 "A" (old),
278 "b" (ll_low(new)),
279 "c" (ll_high(new))
280
281 : "memory");
282
283 return old;
284}
285
286static inline unsigned long long
287atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
288 unsigned long long new_val)
289{
290 return cmpxchg8b(&ptr->counter, old_val, new_val);
291}
292
293/**
294 * atomic64_xchg - xchg atomic64 variable
295 * @ptr: pointer to type atomic64_t
296 * @new_val: value to assign
297 * @old_val: old value that was there
298 *
299 * Atomically xchgs the value of @ptr to @new_val and returns
300 * the old value.
301 */
302
303static inline unsigned long long
304atomic64_xchg(atomic64_t *ptr, unsigned long long new_val)
305{
306 unsigned long long old_val;
307
308 do {
309 old_val = atomic_read(ptr);
310 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
311
312 return old_val;
313}
314
315/**
316 * atomic64_set - set atomic64 variable
317 * @ptr: pointer to type atomic64_t
318 * @new_val: value to assign
319 *
320 * Atomically sets the value of @ptr to @new_val.
321 */
322static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
323{
324 atomic64_xchg(ptr, new_val);
325}
326
327/**
328 * atomic64_read - read atomic64 variable
329 * @ptr: pointer to type atomic64_t
330 *
331 * Atomically reads the value of @ptr and returns it.
332 */
333static inline unsigned long long atomic64_read(atomic64_t *ptr)
334{
335 unsigned long long curr_val;
336
337 do {
338 curr_val = __atomic64_read(ptr);
339 } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
340
341 return curr_val;
342}
343
344/**
345 * atomic64_add_return - add and return
346 * @delta: integer value to add
347 * @ptr: pointer to type atomic64_t
348 *
349 * Atomically adds @delta to @ptr and returns @delta + *@ptr
350 */
351static inline unsigned long long
352atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
353{
354 unsigned long long old_val, new_val;
355
356 do {
357 old_val = atomic_read(ptr);
358 new_val = old_val + delta;
359
360 } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
361
362 return new_val;
363}
364
365static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
366{
367 return atomic64_add_return(-delta, ptr);
368}
369
370static inline long atomic64_inc_return(atomic64_t *ptr)
371{
372 return atomic64_add_return(1, ptr);
373}
374
375static inline long atomic64_dec_return(atomic64_t *ptr)
376{
377 return atomic64_sub_return(1, ptr);
378}
379
380/**
381 * atomic64_add - add integer to atomic64 variable
382 * @delta: integer value to add
383 * @ptr: pointer to type atomic64_t
384 *
385 * Atomically adds @delta to @ptr.
386 */
387static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
388{
389 atomic64_add_return(delta, ptr);
390}
391
392/**
393 * atomic64_sub - subtract the atomic64 variable
394 * @delta: integer value to subtract
395 * @ptr: pointer to type atomic64_t
396 *
397 * Atomically subtracts @delta from @ptr.
398 */
399static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
400{
401 atomic64_add(-delta, ptr);
402}
403
404/**
405 * atomic64_sub_and_test - subtract value from variable and test result
406 * @delta: integer value to subtract
407 * @ptr: pointer to type atomic64_t
408 *
409 * Atomically subtracts @delta from @ptr and returns
410 * true if the result is zero, or false for all
411 * other cases.
412 */
413static inline int
414atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
415{
416 unsigned long long old_val = atomic64_sub_return(delta, ptr);
417
418 return old_val == 0;
419}
420
421/**
422 * atomic64_inc - increment atomic64 variable
423 * @ptr: pointer to type atomic64_t
424 *
425 * Atomically increments @ptr by 1.
426 */
427static inline void atomic64_inc(atomic64_t *ptr)
428{
429 atomic64_add(1, ptr);
430}
431
432/**
433 * atomic64_dec - decrement atomic64 variable
434 * @ptr: pointer to type atomic64_t
435 *
436 * Atomically decrements @ptr by 1.
437 */
438static inline void atomic64_dec(atomic64_t *ptr)
439{
440 atomic64_sub(1, ptr);
441}
442
443/**
444 * atomic64_dec_and_test - decrement and test
445 * @ptr: pointer to type atomic64_t
446 *
447 * Atomically decrements @ptr by 1 and
448 * returns true if the result is 0, or false for all other
449 * cases.
450 */
451static inline int atomic64_dec_and_test(atomic64_t *ptr)
452{
453 return atomic64_sub_and_test(1, ptr);
454}
455
456/**
457 * atomic64_inc_and_test - increment and test
458 * @ptr: pointer to type atomic64_t
459 *
460 * Atomically increments @ptr by 1
461 * and returns true if the result is zero, or false for all
462 * other cases.
463 */
464static inline int atomic64_inc_and_test(atomic64_t *ptr)
465{
466 return atomic64_sub_and_test(-1, ptr);
467}
468
469/**
470 * atomic64_add_negative - add and test if negative
471 * @delta: integer value to add
472 * @ptr: pointer to type atomic64_t
473 *
474 * Atomically adds @delta to @ptr and returns true
475 * if the result is negative, or false when
476 * result is greater than or equal to zero.
477 */
478static inline int
479atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
480{
481 long long old_val = atomic64_add_return(delta, ptr);
482
483 return old_val < 0;
484}
485
250#include <asm-generic/atomic.h> 486#include <asm-generic/atomic.h>
251#endif /* _ASM_X86_ATOMIC_32_H */ 487#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index c2e6bedaf258..d750a10ccad6 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
49BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 49BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
50 50
51#ifdef CONFIG_PERF_COUNTERS 51#ifdef CONFIG_PERF_COUNTERS
52BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) 52BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
53#endif 53#endif
54 54
55#ifdef CONFIG_X86_MCE_P4THERMAL 55#ifdef CONFIG_X86_MCE_P4THERMAL
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 37555e52f980..9ebc5c255032 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -13,6 +13,8 @@ typedef struct {
13 unsigned int irq_spurious_count; 13 unsigned int irq_spurious_count;
14#endif 14#endif
15 unsigned int generic_irqs; /* arch dependent */ 15 unsigned int generic_irqs; /* arch dependent */
16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs;
16#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
17 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
18 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b762ea49bd70..4b4921d7a28e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,6 +29,8 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void generic_interrupt(void); 30extern void generic_interrupt(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void);
33
32extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
33extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
34extern void reschedule_interrupt(void); 36extern void reschedule_interrupt(void);
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h
deleted file mode 100644
index fa0fd068bc2e..000000000000
--- a/arch/x86/include/asm/intel_arch_perfmon.h
+++ /dev/null
@@ -1,31 +0,0 @@
1#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
2#define _ASM_X86_INTEL_ARCH_PERFMON_H
3
4#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
5#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
6
7#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
8#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
9
10#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
11#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
12#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
13#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
18#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
19 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
20
21union cpuid10_eax {
22 struct {
23 unsigned int version_id:8;
24 unsigned int num_counters:8;
25 unsigned int bit_width:8;
26 unsigned int mask_length:8;
27 } split;
28 unsigned int full;
29};
30
31#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3cbd79bbb47c..4492e19f8391 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -107,14 +107,14 @@
107#define LOCAL_TIMER_VECTOR 0xef 107#define LOCAL_TIMER_VECTOR 0xef
108 108
109/* 109/*
110 * Performance monitoring interrupt vector: 110 * Generic system vector for platform specific use
111 */ 111 */
112#define LOCAL_PERF_VECTOR 0xee 112#define GENERIC_INTERRUPT_VECTOR 0xed
113 113
114/* 114/*
115 * Generic system vector for platform specific use 115 * Performance monitoring pending work vector:
116 */ 116 */
117#define GENERIC_INTERRUPT_VECTOR 0xed 117#define LOCAL_PENDING_VECTOR 0xec
118 118
119/* 119/*
120 * First APIC vector available to drivers: (vectors 0x30-0xee) we 120 * First APIC vector available to drivers: (vectors 0x30-0xee) we
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
new file mode 100644
index 000000000000..876ed97147b3
--- /dev/null
+++ b/arch/x86/include/asm/perf_counter.h
@@ -0,0 +1,100 @@
1#ifndef _ASM_X86_PERF_COUNTER_H
2#define _ASM_X86_PERF_COUNTER_H
3
4/*
5 * Performance counter hw details:
6 */
7
8#define X86_PMC_MAX_GENERIC 8
9#define X86_PMC_MAX_FIXED 3
10
11#define X86_PMC_IDX_GENERIC 0
12#define X86_PMC_IDX_FIXED 32
13#define X86_PMC_IDX_MAX 64
14
15#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
16#define MSR_ARCH_PERFMON_PERFCTR1 0xc2
17
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
23#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
24#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
25
26/*
27 * Includes eventsel and unit mask as well:
28 */
29#define ARCH_PERFMON_EVENT_MASK 0xffff
30
31#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
32#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
33#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0
34#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
35 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
36
37#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
38
39/*
40 * Intel "Architectural Performance Monitoring" CPUID
41 * detection/enumeration details:
42 */
43union cpuid10_eax {
44 struct {
45 unsigned int version_id:8;
46 unsigned int num_counters:8;
47 unsigned int bit_width:8;
48 unsigned int mask_length:8;
49 } split;
50 unsigned int full;
51};
52
53union cpuid10_edx {
54 struct {
55 unsigned int num_counters_fixed:4;
56 unsigned int reserved:28;
57 } split;
58 unsigned int full;
59};
60
61
62/*
63 * Fixed-purpose performance counters:
64 */
65
66/*
67 * All 3 fixed-mode PMCs are configured via this single MSR:
68 */
69#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
70
71/*
72 * The counts are available in three separate MSRs:
73 */
74
75/* Instr_Retired.Any: */
76#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
77#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
78
79/* CPU_CLK_Unhalted.Core: */
80#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
81#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
82
83/* CPU_CLK_Unhalted.Ref: */
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(void);
95#else
96static inline void init_hw_perf_counters(void) { }
97static inline void perf_counters_lapic_init(void) { }
98#endif
99
100#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6e72d74cf8dc..732a30706153 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -340,6 +340,8 @@
340#define __NR_inotify_init1 332 340#define __NR_inotify_init1 332
341#define __NR_preadv 333 341#define __NR_preadv 333
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335
344#define __NR_perf_counter_open 336
343 345
344#ifdef __KERNEL__ 346#ifdef __KERNEL__
345 347
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index f81829462325..900e1617e672 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -657,7 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
657__SYSCALL(__NR_preadv, sys_preadv) 657__SYSCALL(__NR_preadv, sys_preadv)
658#define __NR_pwritev 296 658#define __NR_pwritev 296
659__SYSCALL(__NR_pwritev, sys_pwritev) 659__SYSCALL(__NR_pwritev, sys_pwritev)
660 660#define __NR_rt_tgsigqueueinfo 297
661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
662#define __NR_perf_counter_open 298
663__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
661 664
662#ifndef __NO_STUBS 665#ifndef __NO_STUBS
663#define __ARCH_WANT_OLD_READDIR 666#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2870920f246..60df2efd7c80 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/perf_counter.h>
17#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
18#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
19#include <linux/acpi_pmtmr.h> 20#include <linux/acpi_pmtmr.h>
@@ -34,6 +35,7 @@
34#include <linux/smp.h> 35#include <linux/smp.h>
35#include <linux/mm.h> 36#include <linux/mm.h>
36 37
38#include <asm/perf_counter.h>
37#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
38#include <asm/atomic.h> 40#include <asm/atomic.h>
39#include <asm/mpspec.h> 41#include <asm/mpspec.h>
@@ -1133,6 +1135,7 @@ void __cpuinit setup_local_APIC(void)
1133 apic_write(APIC_ESR, 0); 1135 apic_write(APIC_ESR, 0);
1134 } 1136 }
1135#endif 1137#endif
1138 perf_counters_lapic_init();
1136 1139
1137 preempt_disable(); 1140 preempt_disable();
1138 1141
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4e242f9a06e4..3efcb2b96a15 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Makefile for x86-compatible CPU details and quirks 2# Makefile for x86-compatible CPU details, features and quirks
3# 3#
4 4
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
25 25
26obj-$(CONFIG_X86_MCE) += mcheck/ 26obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
27obj-$(CONFIG_MTRR) += mtrr/
28obj-$(CONFIG_CPU_FREQ) += cpufreq/
29 27
30obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 28obj-$(CONFIG_X86_MCE) += mcheck/
29obj-$(CONFIG_MTRR) += mtrr/
30obj-$(CONFIG_CPU_FREQ) += cpufreq/
31
32obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
31 33
32quiet_cmd_mkcapflags = MKCAP $@ 34quiet_cmd_mkcapflags = MKCAP $@
33 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 35 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 77848d9fca68..f60409081cb0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,6 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14 14
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_counter.h>
16#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
17#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -861,6 +862,7 @@ void __init identify_boot_cpu(void)
861#else 862#else
862 vgetcpu_set_mode(); 863 vgetcpu_set_mode();
863#endif 864#endif
865 init_hw_perf_counters();
864} 866}
865 867
866void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 868void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
new file mode 100644
index 000000000000..e86679fa5215
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -0,0 +1,1417 @@
1/*
2 * Performance counter x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * For licencing details see kernel-base/COPYING
11 */
12
13#include <linux/perf_counter.h>
14#include <linux/capability.h>
15#include <linux/notifier.h>
16#include <linux/hardirq.h>
17#include <linux/kprobes.h>
18#include <linux/module.h>
19#include <linux/kdebug.h>
20#include <linux/sched.h>
21#include <linux/uaccess.h>
22
23#include <asm/apic.h>
24#include <asm/stacktrace.h>
25#include <asm/nmi.h>
26
27static u64 perf_counter_mask __read_mostly;
28
29struct cpu_hw_counters {
30 struct perf_counter *counters[X86_PMC_IDX_MAX];
31 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
32 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33 unsigned long interrupts;
34 int enabled;
35};
36
37/*
38 * struct x86_pmu - generic x86 pmu
39 */
40struct x86_pmu {
41 const char *name;
42 int version;
43 int (*handle_irq)(struct pt_regs *);
44 void (*disable_all)(void);
45 void (*enable_all)(void);
46 void (*enable)(struct hw_perf_counter *, int);
47 void (*disable)(struct hw_perf_counter *, int);
48 unsigned eventsel;
49 unsigned perfctr;
50 u64 (*event_map)(int);
51 u64 (*raw_event)(u64);
52 int max_events;
53 int num_counters;
54 int num_counters_fixed;
55 int counter_bits;
56 u64 counter_mask;
57 u64 max_period;
58 u64 intel_ctrl;
59};
60
61static struct x86_pmu x86_pmu __read_mostly;
62
63static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
64 .enabled = 1,
65};
66
67/*
68 * Intel PerfMon v3. Used on Core2 and later.
69 */
70static const u64 intel_perfmon_event_map[] =
71{
72 [PERF_COUNT_CPU_CYCLES] = 0x003c,
73 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
74 [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
75 [PERF_COUNT_CACHE_MISSES] = 0x412e,
76 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
77 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
78 [PERF_COUNT_BUS_CYCLES] = 0x013c,
79};
80
81static u64 intel_pmu_event_map(int event)
82{
83 return intel_perfmon_event_map[event];
84}
85
86/*
87 * Generalized hw caching related event table, filled
88 * in on a per model basis. A value of 0 means
89 * 'not supported', -1 means 'event makes no sense on
90 * this CPU', any other value means the raw event
91 * ID.
92 */
93
94#define C(x) PERF_COUNT_HW_CACHE_##x
95
96static u64 __read_mostly hw_cache_event_ids
97 [PERF_COUNT_HW_CACHE_MAX]
98 [PERF_COUNT_HW_CACHE_OP_MAX]
99 [PERF_COUNT_HW_CACHE_RESULT_MAX];
100
101static const u64 nehalem_hw_cache_event_ids
102 [PERF_COUNT_HW_CACHE_MAX]
103 [PERF_COUNT_HW_CACHE_OP_MAX]
104 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
105{
106 [ C(L1D) ] = {
107 [ C(OP_READ) ] = {
108 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
109 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
110 },
111 [ C(OP_WRITE) ] = {
112 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
113 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
114 },
115 [ C(OP_PREFETCH) ] = {
116 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
117 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
118 },
119 },
120 [ C(L1I ) ] = {
121 [ C(OP_READ) ] = {
122 [ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS */
123 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
124 },
125 [ C(OP_WRITE) ] = {
126 [ C(RESULT_ACCESS) ] = -1,
127 [ C(RESULT_MISS) ] = -1,
128 },
129 [ C(OP_PREFETCH) ] = {
130 [ C(RESULT_ACCESS) ] = 0x0,
131 [ C(RESULT_MISS) ] = 0x0,
132 },
133 },
134 [ C(L2 ) ] = {
135 [ C(OP_READ) ] = {
136 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
137 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
138 },
139 [ C(OP_WRITE) ] = {
140 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
141 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
142 },
143 [ C(OP_PREFETCH) ] = {
144 [ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES */
145 [ C(RESULT_MISS) ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS */
146 },
147 },
148 [ C(DTLB) ] = {
149 [ C(OP_READ) ] = {
150 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
151 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
152 },
153 [ C(OP_WRITE) ] = {
154 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
155 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
156 },
157 [ C(OP_PREFETCH) ] = {
158 [ C(RESULT_ACCESS) ] = 0x0,
159 [ C(RESULT_MISS) ] = 0x0,
160 },
161 },
162 [ C(ITLB) ] = {
163 [ C(OP_READ) ] = {
164 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
165 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISS_RETIRED */
166 },
167 [ C(OP_WRITE) ] = {
168 [ C(RESULT_ACCESS) ] = -1,
169 [ C(RESULT_MISS) ] = -1,
170 },
171 [ C(OP_PREFETCH) ] = {
172 [ C(RESULT_ACCESS) ] = -1,
173 [ C(RESULT_MISS) ] = -1,
174 },
175 },
176 [ C(BPU ) ] = {
177 [ C(OP_READ) ] = {
178 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
179 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
180 },
181 [ C(OP_WRITE) ] = {
182 [ C(RESULT_ACCESS) ] = -1,
183 [ C(RESULT_MISS) ] = -1,
184 },
185 [ C(OP_PREFETCH) ] = {
186 [ C(RESULT_ACCESS) ] = -1,
187 [ C(RESULT_MISS) ] = -1,
188 },
189 },
190};
191
192static const u64 core2_hw_cache_event_ids
193 [PERF_COUNT_HW_CACHE_MAX]
194 [PERF_COUNT_HW_CACHE_OP_MAX]
195 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
196{
197 /* To be filled in */
198};
199
200static const u64 atom_hw_cache_event_ids
201 [PERF_COUNT_HW_CACHE_MAX]
202 [PERF_COUNT_HW_CACHE_OP_MAX]
203 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
204{
205 /* To be filled in */
206};
207
208static u64 intel_pmu_raw_event(u64 event)
209{
210#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
211#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
212#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
213#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
214#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
215
216#define CORE_EVNTSEL_MASK \
217 (CORE_EVNTSEL_EVENT_MASK | \
218 CORE_EVNTSEL_UNIT_MASK | \
219 CORE_EVNTSEL_EDGE_MASK | \
220 CORE_EVNTSEL_INV_MASK | \
221 CORE_EVNTSEL_COUNTER_MASK)
222
223 return event & CORE_EVNTSEL_MASK;
224}
225
226/*
227 * AMD Performance Monitor K7 and later.
228 */
229static const u64 amd_perfmon_event_map[] =
230{
231 [PERF_COUNT_CPU_CYCLES] = 0x0076,
232 [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
233 [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
234 [PERF_COUNT_CACHE_MISSES] = 0x0081,
235 [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
236 [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
237};
238
239static u64 amd_pmu_event_map(int event)
240{
241 return amd_perfmon_event_map[event];
242}
243
244static u64 amd_pmu_raw_event(u64 event)
245{
246#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
247#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
248#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
249#define K7_EVNTSEL_INV_MASK 0x000800000ULL
250#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
251
252#define K7_EVNTSEL_MASK \
253 (K7_EVNTSEL_EVENT_MASK | \
254 K7_EVNTSEL_UNIT_MASK | \
255 K7_EVNTSEL_EDGE_MASK | \
256 K7_EVNTSEL_INV_MASK | \
257 K7_EVNTSEL_COUNTER_MASK)
258
259 return event & K7_EVNTSEL_MASK;
260}
261
262/*
263 * Propagate counter elapsed time into the generic counter.
264 * Can only be executed on the CPU where the counter is active.
265 * Returns the delta events processed.
266 */
267static u64
268x86_perf_counter_update(struct perf_counter *counter,
269 struct hw_perf_counter *hwc, int idx)
270{
271 int shift = 64 - x86_pmu.counter_bits;
272 u64 prev_raw_count, new_raw_count;
273 s64 delta;
274
275 /*
276 * Careful: an NMI might modify the previous counter value.
277 *
278 * Our tactic to handle this is to first atomically read and
279 * exchange a new raw count - then add that new-prev delta
280 * count to the generic counter atomically:
281 */
282again:
283 prev_raw_count = atomic64_read(&hwc->prev_count);
284 rdmsrl(hwc->counter_base + idx, new_raw_count);
285
286 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
287 new_raw_count) != prev_raw_count)
288 goto again;
289
290 /*
291 * Now we have the new raw value and have updated the prev
292 * timestamp already. We can now calculate the elapsed delta
293 * (counter-)time and add that to the generic counter.
294 *
295 * Careful, not all hw sign-extends above the physical width
296 * of the count.
297 */
298 delta = (new_raw_count << shift) - (prev_raw_count << shift);
299 delta >>= shift;
300
301 atomic64_add(delta, &counter->count);
302 atomic64_sub(delta, &hwc->period_left);
303
304 return new_raw_count;
305}
306
307static atomic_t active_counters;
308static DEFINE_MUTEX(pmc_reserve_mutex);
309
310static bool reserve_pmc_hardware(void)
311{
312 int i;
313
314 if (nmi_watchdog == NMI_LOCAL_APIC)
315 disable_lapic_nmi_watchdog();
316
317 for (i = 0; i < x86_pmu.num_counters; i++) {
318 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
319 goto perfctr_fail;
320 }
321
322 for (i = 0; i < x86_pmu.num_counters; i++) {
323 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
324 goto eventsel_fail;
325 }
326
327 return true;
328
329eventsel_fail:
330 for (i--; i >= 0; i--)
331 release_evntsel_nmi(x86_pmu.eventsel + i);
332
333 i = x86_pmu.num_counters;
334
335perfctr_fail:
336 for (i--; i >= 0; i--)
337 release_perfctr_nmi(x86_pmu.perfctr + i);
338
339 if (nmi_watchdog == NMI_LOCAL_APIC)
340 enable_lapic_nmi_watchdog();
341
342 return false;
343}
344
345static void release_pmc_hardware(void)
346{
347 int i;
348
349 for (i = 0; i < x86_pmu.num_counters; i++) {
350 release_perfctr_nmi(x86_pmu.perfctr + i);
351 release_evntsel_nmi(x86_pmu.eventsel + i);
352 }
353
354 if (nmi_watchdog == NMI_LOCAL_APIC)
355 enable_lapic_nmi_watchdog();
356}
357
358static void hw_perf_counter_destroy(struct perf_counter *counter)
359{
360 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
361 release_pmc_hardware();
362 mutex_unlock(&pmc_reserve_mutex);
363 }
364}
365
366static inline int x86_pmu_initialized(void)
367{
368 return x86_pmu.handle_irq != NULL;
369}
370
371static inline int
372set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
373{
374 unsigned int cache_type, cache_op, cache_result;
375 u64 config, val;
376
377 config = attr->config;
378
379 cache_type = (config >> 0) & 0xff;
380 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
381 return -EINVAL;
382
383 cache_op = (config >> 8) & 0xff;
384 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
385 return -EINVAL;
386
387 cache_result = (config >> 16) & 0xff;
388 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
389 return -EINVAL;
390
391 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
392
393 if (val == 0)
394 return -ENOENT;
395
396 if (val == -1)
397 return -EINVAL;
398
399 hwc->config |= val;
400
401 return 0;
402}
403
404/*
405 * Setup the hardware configuration for a given attr_type
406 */
407static int __hw_perf_counter_init(struct perf_counter *counter)
408{
409 struct perf_counter_attr *attr = &counter->attr;
410 struct hw_perf_counter *hwc = &counter->hw;
411 int err;
412
413 if (!x86_pmu_initialized())
414 return -ENODEV;
415
416 err = 0;
417 if (!atomic_inc_not_zero(&active_counters)) {
418 mutex_lock(&pmc_reserve_mutex);
419 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
420 err = -EBUSY;
421 else
422 atomic_inc(&active_counters);
423 mutex_unlock(&pmc_reserve_mutex);
424 }
425 if (err)
426 return err;
427
428 /*
429 * Generate PMC IRQs:
430 * (keep 'enabled' bit clear for now)
431 */
432 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
433
434 /*
435 * Count user and OS events unless requested not to.
436 */
437 if (!attr->exclude_user)
438 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
439 if (!attr->exclude_kernel)
440 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
441
442 if (!hwc->sample_period)
443 hwc->sample_period = x86_pmu.max_period;
444
445 atomic64_set(&hwc->period_left, hwc->sample_period);
446 counter->destroy = hw_perf_counter_destroy;
447
448 /*
449 * Raw event type provide the config in the event structure
450 */
451 if (attr->type == PERF_TYPE_RAW) {
452 hwc->config |= x86_pmu.raw_event(attr->config);
453 return 0;
454 }
455
456 if (attr->type == PERF_TYPE_HW_CACHE)
457 return set_ext_hw_attr(hwc, attr);
458
459 if (attr->config >= x86_pmu.max_events)
460 return -EINVAL;
461 /*
462 * The generic map:
463 */
464 hwc->config |= x86_pmu.event_map(attr->config);
465
466 return 0;
467}
468
469static void intel_pmu_disable_all(void)
470{
471 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
472}
473
474static void amd_pmu_disable_all(void)
475{
476 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
477 int idx;
478
479 if (!cpuc->enabled)
480 return;
481
482 cpuc->enabled = 0;
483 /*
484 * ensure we write the disable before we start disabling the
485 * counters proper, so that amd_pmu_enable_counter() does the
486 * right thing.
487 */
488 barrier();
489
490 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
491 u64 val;
492
493 if (!test_bit(idx, cpuc->active_mask))
494 continue;
495 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
496 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
497 continue;
498 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
499 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
500 }
501}
502
503void hw_perf_disable(void)
504{
505 if (!x86_pmu_initialized())
506 return;
507 return x86_pmu.disable_all();
508}
509
510static void intel_pmu_enable_all(void)
511{
512 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
513}
514
515static void amd_pmu_enable_all(void)
516{
517 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
518 int idx;
519
520 if (cpuc->enabled)
521 return;
522
523 cpuc->enabled = 1;
524 barrier();
525
526 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
527 u64 val;
528
529 if (!test_bit(idx, cpuc->active_mask))
530 continue;
531 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
532 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
533 continue;
534 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
535 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
536 }
537}
538
539void hw_perf_enable(void)
540{
541 if (!x86_pmu_initialized())
542 return;
543 x86_pmu.enable_all();
544}
545
546static inline u64 intel_pmu_get_status(void)
547{
548 u64 status;
549
550 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
551
552 return status;
553}
554
555static inline void intel_pmu_ack_status(u64 ack)
556{
557 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
558}
559
560static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
561{
562 int err;
563 err = checking_wrmsrl(hwc->config_base + idx,
564 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
565}
566
567static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
568{
569 int err;
570 err = checking_wrmsrl(hwc->config_base + idx,
571 hwc->config);
572}
573
574static inline void
575intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
576{
577 int idx = __idx - X86_PMC_IDX_FIXED;
578 u64 ctrl_val, mask;
579 int err;
580
581 mask = 0xfULL << (idx * 4);
582
583 rdmsrl(hwc->config_base, ctrl_val);
584 ctrl_val &= ~mask;
585 err = checking_wrmsrl(hwc->config_base, ctrl_val);
586}
587
588static inline void
589intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
590{
591 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
592 intel_pmu_disable_fixed(hwc, idx);
593 return;
594 }
595
596 x86_pmu_disable_counter(hwc, idx);
597}
598
599static inline void
600amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
601{
602 x86_pmu_disable_counter(hwc, idx);
603}
604
605static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
606
607/*
608 * Set the next IRQ period, based on the hwc->period_left value.
609 * To be called with the counter disabled in hw:
610 */
611static int
612x86_perf_counter_set_period(struct perf_counter *counter,
613 struct hw_perf_counter *hwc, int idx)
614{
615 s64 left = atomic64_read(&hwc->period_left);
616 s64 period = hwc->sample_period;
617 int err, ret = 0;
618
619 /*
620 * If we are way outside a reasoable range then just skip forward:
621 */
622 if (unlikely(left <= -period)) {
623 left = period;
624 atomic64_set(&hwc->period_left, left);
625 ret = 1;
626 }
627
628 if (unlikely(left <= 0)) {
629 left += period;
630 atomic64_set(&hwc->period_left, left);
631 ret = 1;
632 }
633 /*
634 * Quirk: certain CPUs dont like it if just 1 event is left:
635 */
636 if (unlikely(left < 2))
637 left = 2;
638
639 if (left > x86_pmu.max_period)
640 left = x86_pmu.max_period;
641
642 per_cpu(prev_left[idx], smp_processor_id()) = left;
643
644 /*
645 * The hw counter starts counting from this counter offset,
646 * mark it to be able to extra future deltas:
647 */
648 atomic64_set(&hwc->prev_count, (u64)-left);
649
650 err = checking_wrmsrl(hwc->counter_base + idx,
651 (u64)(-left) & x86_pmu.counter_mask);
652
653 return ret;
654}
655
656static inline void
657intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
658{
659 int idx = __idx - X86_PMC_IDX_FIXED;
660 u64 ctrl_val, bits, mask;
661 int err;
662
663 /*
664 * Enable IRQ generation (0x8),
665 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
666 * if requested:
667 */
668 bits = 0x8ULL;
669 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
670 bits |= 0x2;
671 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
672 bits |= 0x1;
673 bits <<= (idx * 4);
674 mask = 0xfULL << (idx * 4);
675
676 rdmsrl(hwc->config_base, ctrl_val);
677 ctrl_val &= ~mask;
678 ctrl_val |= bits;
679 err = checking_wrmsrl(hwc->config_base, ctrl_val);
680}
681
682static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
683{
684 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
685 intel_pmu_enable_fixed(hwc, idx);
686 return;
687 }
688
689 x86_pmu_enable_counter(hwc, idx);
690}
691
692static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
693{
694 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
695
696 if (cpuc->enabled)
697 x86_pmu_enable_counter(hwc, idx);
698 else
699 x86_pmu_disable_counter(hwc, idx);
700}
701
702static int
703fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
704{
705 unsigned int event;
706
707 if (!x86_pmu.num_counters_fixed)
708 return -1;
709
710 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
711
712 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
713 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
714 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
715 return X86_PMC_IDX_FIXED_CPU_CYCLES;
716 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
717 return X86_PMC_IDX_FIXED_BUS_CYCLES;
718
719 return -1;
720}
721
722/*
723 * Find a PMC slot for the freshly enabled / scheduled in counter:
724 */
725static int x86_pmu_enable(struct perf_counter *counter)
726{
727 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
728 struct hw_perf_counter *hwc = &counter->hw;
729 int idx;
730
731 idx = fixed_mode_idx(counter, hwc);
732 if (idx >= 0) {
733 /*
734 * Try to get the fixed counter, if that is already taken
735 * then try to get a generic counter:
736 */
737 if (test_and_set_bit(idx, cpuc->used_mask))
738 goto try_generic;
739
740 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
741 /*
742 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
743 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
744 */
745 hwc->counter_base =
746 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
747 hwc->idx = idx;
748 } else {
749 idx = hwc->idx;
750 /* Try to get the previous generic counter again */
751 if (test_and_set_bit(idx, cpuc->used_mask)) {
752try_generic:
753 idx = find_first_zero_bit(cpuc->used_mask,
754 x86_pmu.num_counters);
755 if (idx == x86_pmu.num_counters)
756 return -EAGAIN;
757
758 set_bit(idx, cpuc->used_mask);
759 hwc->idx = idx;
760 }
761 hwc->config_base = x86_pmu.eventsel;
762 hwc->counter_base = x86_pmu.perfctr;
763 }
764
765 perf_counters_lapic_init();
766
767 x86_pmu.disable(hwc, idx);
768
769 cpuc->counters[idx] = counter;
770 set_bit(idx, cpuc->active_mask);
771
772 x86_perf_counter_set_period(counter, hwc, idx);
773 x86_pmu.enable(hwc, idx);
774
775 return 0;
776}
777
778static void x86_pmu_unthrottle(struct perf_counter *counter)
779{
780 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
781 struct hw_perf_counter *hwc = &counter->hw;
782
783 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
784 cpuc->counters[hwc->idx] != counter))
785 return;
786
787 x86_pmu.enable(hwc, hwc->idx);
788}
789
790void perf_counter_print_debug(void)
791{
792 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
793 struct cpu_hw_counters *cpuc;
794 unsigned long flags;
795 int cpu, idx;
796
797 if (!x86_pmu.num_counters)
798 return;
799
800 local_irq_save(flags);
801
802 cpu = smp_processor_id();
803 cpuc = &per_cpu(cpu_hw_counters, cpu);
804
805 if (x86_pmu.version >= 2) {
806 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
807 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
808 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
809 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
810
811 pr_info("\n");
812 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
813 pr_info("CPU#%d: status: %016llx\n", cpu, status);
814 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
815 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
816 }
817 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
818
819 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
820 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
821 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
822
823 prev_left = per_cpu(prev_left[idx], cpu);
824
825 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
826 cpu, idx, pmc_ctrl);
827 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
828 cpu, idx, pmc_count);
829 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
830 cpu, idx, prev_left);
831 }
832 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
833 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
834
835 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
836 cpu, idx, pmc_count);
837 }
838 local_irq_restore(flags);
839}
840
841static void x86_pmu_disable(struct perf_counter *counter)
842{
843 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
844 struct hw_perf_counter *hwc = &counter->hw;
845 int idx = hwc->idx;
846
847 /*
848 * Must be done before we disable, otherwise the nmi handler
849 * could reenable again:
850 */
851 clear_bit(idx, cpuc->active_mask);
852 x86_pmu.disable(hwc, idx);
853
854 /*
855 * Make sure the cleared pointer becomes visible before we
856 * (potentially) free the counter:
857 */
858 barrier();
859
860 /*
861 * Drain the remaining delta count out of a counter
862 * that we are disabling:
863 */
864 x86_perf_counter_update(counter, hwc, idx);
865 cpuc->counters[idx] = NULL;
866 clear_bit(idx, cpuc->used_mask);
867}
868
869/*
870 * Save and restart an expired counter. Called by NMI contexts,
871 * so it has to be careful about preempting normal counter ops:
872 */
873static int intel_pmu_save_and_restart(struct perf_counter *counter)
874{
875 struct hw_perf_counter *hwc = &counter->hw;
876 int idx = hwc->idx;
877 int ret;
878
879 x86_perf_counter_update(counter, hwc, idx);
880 ret = x86_perf_counter_set_period(counter, hwc, idx);
881
882 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
883 intel_pmu_enable_counter(hwc, idx);
884
885 return ret;
886}
887
888static void intel_pmu_reset(void)
889{
890 unsigned long flags;
891 int idx;
892
893 if (!x86_pmu.num_counters)
894 return;
895
896 local_irq_save(flags);
897
898 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
899
900 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
901 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
902 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
903 }
904 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
905 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
906 }
907
908 local_irq_restore(flags);
909}
910
911
912/*
913 * This handler is triggered by the local APIC, so the APIC IRQ handling
914 * rules apply:
915 */
916static int intel_pmu_handle_irq(struct pt_regs *regs)
917{
918 struct cpu_hw_counters *cpuc;
919 struct cpu_hw_counters;
920 int bit, cpu, loops;
921 u64 ack, status;
922
923 cpu = smp_processor_id();
924 cpuc = &per_cpu(cpu_hw_counters, cpu);
925
926 perf_disable();
927 status = intel_pmu_get_status();
928 if (!status) {
929 perf_enable();
930 return 0;
931 }
932
933 loops = 0;
934again:
935 if (++loops > 100) {
936 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
937 perf_counter_print_debug();
938 intel_pmu_reset();
939 perf_enable();
940 return 1;
941 }
942
943 inc_irq_stat(apic_perf_irqs);
944 ack = status;
945 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
946 struct perf_counter *counter = cpuc->counters[bit];
947
948 clear_bit(bit, (unsigned long *) &status);
949 if (!test_bit(bit, cpuc->active_mask))
950 continue;
951
952 if (!intel_pmu_save_and_restart(counter))
953 continue;
954
955 if (perf_counter_overflow(counter, 1, regs, 0))
956 intel_pmu_disable_counter(&counter->hw, bit);
957 }
958
959 intel_pmu_ack_status(ack);
960
961 /*
962 * Repeat if there is more work to be done:
963 */
964 status = intel_pmu_get_status();
965 if (status)
966 goto again;
967
968 perf_enable();
969
970 return 1;
971}
972
973static int amd_pmu_handle_irq(struct pt_regs *regs)
974{
975 int cpu, idx, handled = 0;
976 struct cpu_hw_counters *cpuc;
977 struct perf_counter *counter;
978 struct hw_perf_counter *hwc;
979 u64 val;
980
981 cpu = smp_processor_id();
982 cpuc = &per_cpu(cpu_hw_counters, cpu);
983
984 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
985 if (!test_bit(idx, cpuc->active_mask))
986 continue;
987
988 counter = cpuc->counters[idx];
989 hwc = &counter->hw;
990
991 val = x86_perf_counter_update(counter, hwc, idx);
992 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
993 continue;
994
995 /* counter overflow */
996 handled = 1;
997 inc_irq_stat(apic_perf_irqs);
998 if (!x86_perf_counter_set_period(counter, hwc, idx))
999 continue;
1000
1001 if (perf_counter_overflow(counter, 1, regs, 0))
1002 amd_pmu_disable_counter(hwc, idx);
1003 }
1004
1005 return handled;
1006}
1007
1008void smp_perf_pending_interrupt(struct pt_regs *regs)
1009{
1010 irq_enter();
1011 ack_APIC_irq();
1012 inc_irq_stat(apic_pending_irqs);
1013 perf_counter_do_pending();
1014 irq_exit();
1015}
1016
1017void set_perf_counter_pending(void)
1018{
1019 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1020}
1021
1022void perf_counters_lapic_init(void)
1023{
1024 if (!x86_pmu_initialized())
1025 return;
1026
1027 /*
1028 * Always use NMI for PMU
1029 */
1030 apic_write(APIC_LVTPC, APIC_DM_NMI);
1031}
1032
1033static int __kprobes
1034perf_counter_nmi_handler(struct notifier_block *self,
1035 unsigned long cmd, void *__args)
1036{
1037 struct die_args *args = __args;
1038 struct pt_regs *regs;
1039
1040 if (!atomic_read(&active_counters))
1041 return NOTIFY_DONE;
1042
1043 switch (cmd) {
1044 case DIE_NMI:
1045 case DIE_NMI_IPI:
1046 break;
1047
1048 default:
1049 return NOTIFY_DONE;
1050 }
1051
1052 regs = args->regs;
1053
1054 apic_write(APIC_LVTPC, APIC_DM_NMI);
1055 /*
1056 * Can't rely on the handled return value to say it was our NMI, two
1057 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1058 *
1059 * If the first NMI handles both, the latter will be empty and daze
1060 * the CPU.
1061 */
1062 x86_pmu.handle_irq(regs);
1063
1064 return NOTIFY_STOP;
1065}
1066
1067static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1068 .notifier_call = perf_counter_nmi_handler,
1069 .next = NULL,
1070 .priority = 1
1071};
1072
1073static struct x86_pmu intel_pmu = {
1074 .name = "Intel",
1075 .handle_irq = intel_pmu_handle_irq,
1076 .disable_all = intel_pmu_disable_all,
1077 .enable_all = intel_pmu_enable_all,
1078 .enable = intel_pmu_enable_counter,
1079 .disable = intel_pmu_disable_counter,
1080 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1081 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1082 .event_map = intel_pmu_event_map,
1083 .raw_event = intel_pmu_raw_event,
1084 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1085 /*
1086 * Intel PMCs cannot be accessed sanely above 32 bit width,
1087 * so we install an artificial 1<<31 period regardless of
1088 * the generic counter period:
1089 */
1090 .max_period = (1ULL << 31) - 1,
1091};
1092
1093static struct x86_pmu amd_pmu = {
1094 .name = "AMD",
1095 .handle_irq = amd_pmu_handle_irq,
1096 .disable_all = amd_pmu_disable_all,
1097 .enable_all = amd_pmu_enable_all,
1098 .enable = amd_pmu_enable_counter,
1099 .disable = amd_pmu_disable_counter,
1100 .eventsel = MSR_K7_EVNTSEL0,
1101 .perfctr = MSR_K7_PERFCTR0,
1102 .event_map = amd_pmu_event_map,
1103 .raw_event = amd_pmu_raw_event,
1104 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1105 .num_counters = 4,
1106 .counter_bits = 48,
1107 .counter_mask = (1ULL << 48) - 1,
1108 /* use highest bit to detect overflow */
1109 .max_period = (1ULL << 47) - 1,
1110};
1111
1112static int intel_pmu_init(void)
1113{
1114 union cpuid10_edx edx;
1115 union cpuid10_eax eax;
1116 unsigned int unused;
1117 unsigned int ebx;
1118 int version;
1119
1120 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
1121 return -ENODEV;
1122
1123 /*
1124 * Check whether the Architectural PerfMon supports
1125 * Branch Misses Retired Event or not.
1126 */
1127 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1128 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1129 return -ENODEV;
1130
1131 version = eax.split.version_id;
1132 if (version < 2)
1133 return -ENODEV;
1134
1135 x86_pmu = intel_pmu;
1136 x86_pmu.version = version;
1137 x86_pmu.num_counters = eax.split.num_counters;
1138
1139 /*
1140 * Quirk: v2 perfmon does not report fixed-purpose counters, so
1141 * assume at least 3 counters:
1142 */
1143 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1144
1145 x86_pmu.counter_bits = eax.split.bit_width;
1146 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
1147
1148 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1149
1150 /*
1151 * Nehalem:
1152 */
1153 switch (boot_cpu_data.x86_model) {
1154 case 17:
1155 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1156 sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
1157 PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
1158
1159 pr_info("... installed Core2 event tables\n");
1160 break;
1161 default:
1162 case 26:
1163 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1164 sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
1165 PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
1166
1167 pr_info("... installed Nehalem/Corei7 event tables\n");
1168 break;
1169 case 28:
1170 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1171 sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
1172 PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
1173
1174 pr_info("... installed Atom event tables\n");
1175 break;
1176 }
1177 return 0;
1178}
1179
1180static int amd_pmu_init(void)
1181{
1182 x86_pmu = amd_pmu;
1183 return 0;
1184}
1185
1186void __init init_hw_perf_counters(void)
1187{
1188 int err;
1189
1190 switch (boot_cpu_data.x86_vendor) {
1191 case X86_VENDOR_INTEL:
1192 err = intel_pmu_init();
1193 break;
1194 case X86_VENDOR_AMD:
1195 err = amd_pmu_init();
1196 break;
1197 default:
1198 return;
1199 }
1200 if (err != 0)
1201 return;
1202
1203 pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name);
1204 pr_info("... version: %d\n", x86_pmu.version);
1205 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1206
1207 pr_info("... num counters: %d\n", x86_pmu.num_counters);
1208 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1209 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1210 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1211 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1212 }
1213 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1214 perf_max_counters = x86_pmu.num_counters;
1215
1216 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
1217 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1218
1219 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1220 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1221 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1222 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1223 }
1224 pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed);
1225
1226 perf_counter_mask |=
1227 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1228
1229 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1230
1231 perf_counters_lapic_init();
1232 register_die_notifier(&perf_counter_nmi_notifier);
1233}
1234
1235static inline void x86_pmu_read(struct perf_counter *counter)
1236{
1237 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1238}
1239
1240static const struct pmu pmu = {
1241 .enable = x86_pmu_enable,
1242 .disable = x86_pmu_disable,
1243 .read = x86_pmu_read,
1244 .unthrottle = x86_pmu_unthrottle,
1245};
1246
1247const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1248{
1249 int err;
1250
1251 err = __hw_perf_counter_init(counter);
1252 if (err)
1253 return ERR_PTR(err);
1254
1255 return &pmu;
1256}
1257
1258/*
1259 * callchain support
1260 */
1261
1262static inline
1263void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
1264{
1265 if (entry->nr < MAX_STACK_DEPTH)
1266 entry->ip[entry->nr++] = ip;
1267}
1268
1269static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1270static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1271
1272
1273static void
1274backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1275{
1276 /* Ignore warnings */
1277}
1278
1279static void backtrace_warning(void *data, char *msg)
1280{
1281 /* Ignore warnings */
1282}
1283
1284static int backtrace_stack(void *data, char *name)
1285{
1286 /* Don't bother with IRQ stacks for now */
1287 return -1;
1288}
1289
1290static void backtrace_address(void *data, unsigned long addr, int reliable)
1291{
1292 struct perf_callchain_entry *entry = data;
1293
1294 if (reliable)
1295 callchain_store(entry, addr);
1296}
1297
1298static const struct stacktrace_ops backtrace_ops = {
1299 .warning = backtrace_warning,
1300 .warning_symbol = backtrace_warning_symbol,
1301 .stack = backtrace_stack,
1302 .address = backtrace_address,
1303};
1304
1305static void
1306perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1307{
1308 unsigned long bp;
1309 char *stack;
1310 int nr = entry->nr;
1311
1312 callchain_store(entry, instruction_pointer(regs));
1313
1314 stack = ((char *)regs + sizeof(struct pt_regs));
1315#ifdef CONFIG_FRAME_POINTER
1316 bp = frame_pointer(regs);
1317#else
1318 bp = 0;
1319#endif
1320
1321 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
1322
1323 entry->kernel = entry->nr - nr;
1324}
1325
1326
1327struct stack_frame {
1328 const void __user *next_fp;
1329 unsigned long return_address;
1330};
1331
1332static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1333{
1334 int ret;
1335
1336 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1337 return 0;
1338
1339 ret = 1;
1340 pagefault_disable();
1341 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1342 ret = 0;
1343 pagefault_enable();
1344
1345 return ret;
1346}
1347
1348static void
1349perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1350{
1351 struct stack_frame frame;
1352 const void __user *fp;
1353 int nr = entry->nr;
1354
1355 regs = (struct pt_regs *)current->thread.sp0 - 1;
1356 fp = (void __user *)regs->bp;
1357
1358 callchain_store(entry, regs->ip);
1359
1360 while (entry->nr < MAX_STACK_DEPTH) {
1361 frame.next_fp = NULL;
1362 frame.return_address = 0;
1363
1364 if (!copy_stack_frame(fp, &frame))
1365 break;
1366
1367 if ((unsigned long)fp < user_stack_pointer(regs))
1368 break;
1369
1370 callchain_store(entry, frame.return_address);
1371 fp = frame.next_fp;
1372 }
1373
1374 entry->user = entry->nr - nr;
1375}
1376
1377static void
1378perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1379{
1380 int is_user;
1381
1382 if (!regs)
1383 return;
1384
1385 is_user = user_mode(regs);
1386
1387 if (!current || current->pid == 0)
1388 return;
1389
1390 if (is_user && current->state != TASK_RUNNING)
1391 return;
1392
1393 if (!is_user)
1394 perf_callchain_kernel(regs, entry);
1395
1396 if (current->mm)
1397 perf_callchain_user(regs, entry);
1398}
1399
1400struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1401{
1402 struct perf_callchain_entry *entry;
1403
1404 if (in_nmi())
1405 entry = &__get_cpu_var(nmi_entry);
1406 else
1407 entry = &__get_cpu_var(irq_entry);
1408
1409 entry->nr = 0;
1410 entry->hv = 0;
1411 entry->kernel = 0;
1412 entry->user = 0;
1413
1414 perf_do_callchain(regs, entry);
1415
1416 return entry;
1417}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f6c70a164e32..d6f5b9fbde32 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,8 +19,8 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/genapic.h> 22#include <asm/apic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/perf_counter.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr; 26 unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6e8433..7985c010f8ac 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1025,6 +1025,11 @@ apicinterrupt ERROR_APIC_VECTOR \
1025apicinterrupt SPURIOUS_APIC_VECTOR \ 1025apicinterrupt SPURIOUS_APIC_VECTOR \
1026 spurious_interrupt smp_spurious_interrupt 1026 spurious_interrupt smp_spurious_interrupt
1027 1027
1028#ifdef CONFIG_PERF_COUNTERS
1029apicinterrupt LOCAL_PENDING_VECTOR \
1030 perf_pending_interrupt smp_perf_pending_interrupt
1031#endif
1032
1028/* 1033/*
1029 * Exception entry points. 1034 * Exception entry points.
1030 */ 1035 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010d74c8..8279fb8df17f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT");
67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
73 seq_printf(p, " Performance pending work\n");
66#endif 74#endif
67 if (generic_interrupt_extension) { 75 if (generic_interrupt_extension) {
68 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -166,6 +174,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
166#ifdef CONFIG_X86_LOCAL_APIC 174#ifdef CONFIG_X86_LOCAL_APIC
167 sum += irq_stats(cpu)->apic_timer_irqs; 175 sum += irq_stats(cpu)->apic_timer_irqs;
168 sum += irq_stats(cpu)->irq_spurious_count; 176 sum += irq_stats(cpu)->irq_spurious_count;
177 sum += irq_stats(cpu)->apic_perf_irqs;
178 sum += irq_stats(cpu)->apic_pending_irqs;
169#endif 179#endif
170 if (generic_interrupt_extension) 180 if (generic_interrupt_extension)
171 sum += irq_stats(cpu)->generic_irqs; 181 sum += irq_stats(cpu)->generic_irqs;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 368b0a8836f9..205bdd880d31 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -118,28 +118,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
118 return 0; 118 return 0;
119} 119}
120 120
121/* Overridden in paravirt.c */ 121static void __init smp_intr_init(void)
122void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
123
124void __init native_init_IRQ(void)
125{ 122{
126 int i;
127
128 /* Execute any quirks before the call gates are initialised: */
129 x86_quirk_pre_intr_init();
130
131 /*
132 * Cover the whole vector space, no vector can escape
133 * us. (some of these will be overridden and become
134 * 'special' SMP interrupts)
135 */
136 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
137 /* SYSCALL_VECTOR was reserved in trap_init. */
138 if (i != SYSCALL_VECTOR)
139 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
140 }
141
142
143#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) 123#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
144 /* 124 /*
145 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 125 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -168,6 +148,11 @@ void __init native_init_IRQ(void)
168 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 148 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
169 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 149 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
170#endif 150#endif
151}
152
153static void __init apic_intr_init(void)
154{
155 smp_intr_init();
171 156
172#ifdef CONFIG_X86_LOCAL_APIC 157#ifdef CONFIG_X86_LOCAL_APIC
173 /* self generated IPI for local APIC timer */ 158 /* self generated IPI for local APIC timer */
@@ -179,12 +164,40 @@ void __init native_init_IRQ(void)
179 /* IPI vectors for APIC spurious and error interrupts */ 164 /* IPI vectors for APIC spurious and error interrupts */
180 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 165 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
181 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 166 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
182#endif 167# ifdef CONFIG_PERF_COUNTERS
168 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
169# endif
183 170
184#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) 171# ifdef CONFIG_X86_MCE_P4THERMAL
185 /* thermal monitor LVT interrupt */ 172 /* thermal monitor LVT interrupt */
186 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 173 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
174# endif
187#endif 175#endif
176}
177
178/* Overridden in paravirt.c */
179void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
180
181void __init native_init_IRQ(void)
182{
183 int i;
184
185 /* Execute any quirks before the call gates are initialised: */
186 x86_quirk_pre_intr_init();
187
188 apic_intr_init();
189
190 /*
191 * Cover the whole vector space, no vector can escape
192 * us. (some of these will be overridden and become
193 * 'special' SMP interrupts)
194 */
195 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
196 int vector = FIRST_EXTERNAL_VECTOR + i;
197 /* SYSCALL_VECTOR was reserved in trap_init. */
198 if (!test_bit(vector, used_vectors))
199 set_intr_gate(vector, interrupt[i]);
200 }
188 201
189 if (!acpi_ioapic) 202 if (!acpi_ioapic)
190 setup_irq(2, &irq2); 203 setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8cd10537fd46..fa6ef692000f 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -152,6 +152,11 @@ static void __init apic_intr_init(void)
152 /* IPI vectors for APIC spurious and error interrupts */ 152 /* IPI vectors for APIC spurious and error interrupts */
153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 153 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 154 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
155
156 /* Performance monitoring interrupt: */
157#ifdef CONFIG_PERF_COUNTERS
158 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
159#endif
155} 160}
156 161
157void __init native_init_IRQ(void) 162void __init native_init_IRQ(void)
@@ -159,6 +164,9 @@ void __init native_init_IRQ(void)
159 int i; 164 int i;
160 165
161 init_ISA_irqs(); 166 init_ISA_irqs();
167
168 apic_intr_init();
169
162 /* 170 /*
163 * Cover the whole vector space, no vector can escape 171 * Cover the whole vector space, no vector can escape
164 * us. (some of these will be overridden and become 172 * us. (some of these will be overridden and become
@@ -166,12 +174,10 @@ void __init native_init_IRQ(void)
166 */ 174 */
167 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 175 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
168 int vector = FIRST_EXTERNAL_VECTOR + i; 176 int vector = FIRST_EXTERNAL_VECTOR + i;
169 if (vector != IA32_SYSCALL_VECTOR) 177 if (!test_bit(vector, used_vectors))
170 set_intr_gate(vector, interrupt[i]); 178 set_intr_gate(vector, interrupt[i]);
171 } 179 }
172 180
173 apic_intr_init();
174
175 if (!acpi_ioapic) 181 if (!acpi_ioapic)
176 setup_irq(2, &irq2); 182 setup_irq(2, &irq2);
177} 183}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 14425166b8e3..0a813b17b172 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen 7 * 2000-2002 x86-64 support by Andi Kleen
8 */ 8 */
9
10#include <linux/sched.h> 9#include <linux/sched.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12#include <linux/smp.h> 11#include <linux/smp.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index ff5c8736b491..d51321ddafda 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -334,3 +334,5 @@ ENTRY(sys_call_table)
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_preadv 335 .long sys_preadv
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_counter_open
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a1d288327ff0..2cc162e09c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -945,8 +945,13 @@ void __init trap_init(void)
945#endif 945#endif
946 set_intr_gate(19, &simd_coprocessor_error); 946 set_intr_gate(19, &simd_coprocessor_error);
947 947
948 /* Reserve all the builtin and the syscall vector: */
949 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
950 set_bit(i, used_vectors);
951
948#ifdef CONFIG_IA32_EMULATION 952#ifdef CONFIG_IA32_EMULATION
949 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 953 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
954 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
950#endif 955#endif
951 956
952#ifdef CONFIG_X86_32 957#ifdef CONFIG_X86_32
@@ -963,17 +968,9 @@ void __init trap_init(void)
963 } 968 }
964 969
965 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 970 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
966#endif
967
968 /* Reserve all the builtin and the syscall vector: */
969 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
970 set_bit(i, used_vectors);
971
972#ifdef CONFIG_X86_64
973 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
974#else
975 set_bit(SYSCALL_VECTOR, used_vectors); 971 set_bit(SYSCALL_VECTOR, used_vectors);
976#endif 972#endif
973
977 /* 974 /*
978 * Should be a barrier for any external CPU state: 975 * Should be a barrier for any external CPU state:
979 */ 976 */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a03b7279efa0..6f9df2babe48 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -27,6 +27,7 @@
27#include <linux/tty.h> 27#include <linux/tty.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/perf_counter.h>
30 31
31#include <asm-generic/sections.h> 32#include <asm-generic/sections.h>
32 33
@@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1044 if (unlikely(error_code & PF_RSVD)) 1045 if (unlikely(error_code & PF_RSVD))
1045 pgtable_bad(regs, error_code, address); 1046 pgtable_bad(regs, error_code, address);
1046 1047
1048 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
1049
1047 /* 1050 /*
1048 * If we're in an interrupt, have no user context or are running 1051 * If we're in an interrupt, have no user context or are running
1049 * in an atomic region then we must not take the fault: 1052 * in an atomic region then we must not take the fault:
@@ -1137,10 +1140,15 @@ good_area:
1137 return; 1140 return;
1138 } 1141 }
1139 1142
1140 if (fault & VM_FAULT_MAJOR) 1143 if (fault & VM_FAULT_MAJOR) {
1141 tsk->maj_flt++; 1144 tsk->maj_flt++;
1142 else 1145 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
1146 regs, address);
1147 } else {
1143 tsk->min_flt++; 1148 tsk->min_flt++;
1149 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
1150 regs, address);
1151 }
1144 1152
1145 check_v8086_mode(regs, address, tsk); 1153 check_v8086_mode(regs, address, tsk);
1146 1154
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 202864ad49a7..c638685136e1 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self,
40 40
41 switch (val) { 41 switch (val) {
42 case DIE_NMI: 42 case DIE_NMI:
43 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) 43 case DIE_NMI_IPI:
44 ret = NOTIFY_STOP; 44 model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
45 ret = NOTIFY_STOP;
45 break; 46 break;
46 default: 47 default:
47 break; 48 break;
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
134static struct notifier_block profile_exceptions_nb = { 135static struct notifier_block profile_exceptions_nb = {
135 .notifier_call = profile_exceptions_notify, 136 .notifier_call = profile_exceptions_notify,
136 .next = NULL, 137 .next = NULL,
137 .priority = 0 138 .priority = 2
138}; 139};
139 140
140static int nmi_setup(void) 141static int nmi_setup(void)
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 10131fbdaada..4da7230b3d17 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -18,7 +18,7 @@
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/apic.h> 19#include <asm/apic.h>
20#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h> 21#include <asm/perf_counter.h>
22 22
23#include "op_x86_model.h" 23#include "op_x86_model.h"
24#include "op_counter.h" 24#include "op_counter.h"
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
136 u64 val; 136 u64 val;
137 int i; 137 int i;
138 138
139 /*
140 * This can happen if perf counters are in use when
141 * we steal the die notifier NMI.
142 */
143 if (unlikely(!reset_value))
144 goto out;
145
139 for (i = 0 ; i < num_counters; ++i) { 146 for (i = 0 ; i < num_counters; ++i) {
140 if (!reset_value[i]) 147 if (!reset_value[i])
141 continue; 148 continue;
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
146 } 153 }
147 } 154 }
148 155
156out:
149 /* Only P6 based Pentium M need to re-unmask the apic vector but it 157 /* Only P6 based Pentium M need to re-unmask the apic vector but it
150 * doesn't hurt other P6 variant */ 158 * doesn't hurt other P6 variant */
151 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 159 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 1241f118ab56..58bc00f68b12 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
338 } 338 }
339 } 339 }
340 340
341 current->mm->context.vdso = (void *)addr;
342
341 if (compat_uses_vma || !compat) { 343 if (compat_uses_vma || !compat) {
342 /* 344 /*
343 * MAYWRITE to allow gdb to COW and set breakpoints 345 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
358 goto up_fail; 360 goto up_fail;
359 } 361 }
360 362
361 current->mm->context.vdso = (void *)addr;
362 current_thread_info()->sysenter_return = 363 current_thread_info()->sysenter_return =
363 VDSO32_SYMBOL(addr, SYSENTER_RETURN); 364 VDSO32_SYMBOL(addr, SYSENTER_RETURN);
364 365
365 up_fail: 366 up_fail:
367 if (ret)
368 current->mm->context.vdso = NULL;
369
366 up_write(&mm->mmap_sem); 370 up_write(&mm->mmap_sem);
367 371
368 return ret; 372 return ret;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7133cdf9098b..93b7a2938b2f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -115,15 +115,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
115 goto up_fail; 115 goto up_fail;
116 } 116 }
117 117
118 current->mm->context.vdso = (void *)addr;
119
118 ret = install_special_mapping(mm, addr, vdso_size, 120 ret = install_special_mapping(mm, addr, vdso_size,
119 VM_READ|VM_EXEC| 121 VM_READ|VM_EXEC|
120 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| 122 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
121 VM_ALWAYSDUMP, 123 VM_ALWAYSDUMP,
122 vdso_pages); 124 vdso_pages);
123 if (ret) 125 if (ret) {
126 current->mm->context.vdso = NULL;
124 goto up_fail; 127 goto up_fail;
128 }
125 129
126 current->mm->context.vdso = (void *)addr;
127up_fail: 130up_fail:
128 up_write(&mm->mmap_sem); 131 up_write(&mm->mmap_sem);
129 return ret; 132 return ret;