aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorVince Weaver <vincent.weaver@maine.edu>2012-09-26 14:12:52 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-04 07:32:37 -0400
commite717bf4e4fe8adc519f25c4ff93ee50ed0a36710 (patch)
treee356d43d216db2e6c615d658016a0c71d8b78ec7 /arch
parentb3eda8d05c1afe722dc19be3fee7eeadc75e25e2 (diff)
perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU
The following patch adds perf_event support for the Xeon-Phi PMU, as documented in the "Intel Xeon Phi Coprocessor (codename: Knights Corner) Performance Monitoring Units" manual. Even though it is a co-processor, a Phi runs a full Linux environment and can support performance counters. This is just barebones support, it does not add support for interesting new features such as the SPFLT intruction that allows starting/stopping events without entering the kernel. The PMU internally is just like that of an original Pentium, but a "P6-like" MSR interface is provided. The interface is different enough from a real P6 that it's not easy (or practical) to re-use the code in perf_event_p6.c Acked-by: Lawrence F Meadows <lawrence.f.meadows@intel.com> Acked-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Vince Weaver <vincent.weaver@maine.edu> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Cc: eranian@gmail.com Cc: Lawrence F <lawrence.f.meadows@intel.com> Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1209261405320.8398@vincent-weaver-1.um.maine.edu Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c248
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
6 files changed, 262 insertions, 1 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385af..07f96cb5cdb9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,11 @@
121#define MSR_P6_EVNTSEL0 0x00000186 121#define MSR_P6_EVNTSEL0 0x00000186
122#define MSR_P6_EVNTSEL1 0x00000187 122#define MSR_P6_EVNTSEL1 0x00000187
123 123
124#define MSR_KNC_PERFCTR0 0x00000020
125#define MSR_KNC_PERFCTR1 0x00000021
126#define MSR_KNC_EVNTSEL0 0x00000028
127#define MSR_KNC_EVNTSEL1 0x00000029
128
124/* AMD64 MSRs. Not complete. See the architecture manual for a more 129/* AMD64 MSRs. Not complete. See the architecture manual for a more
125 complete list. */ 130 complete list. */
126 131
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d30a6a9a0121..a0e067d3d96c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
32 32
33ifdef CONFIG_PERF_EVENTS 33ifdef CONFIG_PERF_EVENTS
34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o 34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o 35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o 37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
38endif 38endif
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8b6defe7eefc..271d25700297 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
626 626
627int p6_pmu_init(void); 627int p6_pmu_init(void);
628 628
629int knc_pmu_init(void);
630
629#else /* CONFIG_CPU_SUP_INTEL */ 631#else /* CONFIG_CPU_SUP_INTEL */
630 632
631static inline void reserve_ds_buffers(void) 633static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6bca492b8547..324bb523d9d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
1906 switch (boot_cpu_data.x86) { 1906 switch (boot_cpu_data.x86) {
1907 case 0x6: 1907 case 0x6:
1908 return p6_pmu_init(); 1908 return p6_pmu_init();
1909 case 0xb:
1910 return knc_pmu_init();
1909 case 0xf: 1911 case 0xf:
1910 return p4_pmu_init(); 1912 return p4_pmu_init();
1911 } 1913 }
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
new file mode 100644
index 000000000000..7c46bfdbc373
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -0,0 +1,248 @@
1/* Driver for Intel Xeon Phi "Knights Corner" PMU */
2
3#include <linux/perf_event.h>
4#include <linux/types.h>
5
6#include "perf_event.h"
7
8static const u64 knc_perfmon_event_map[] =
9{
10 [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
11 [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
12 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
13 [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
14 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
15 [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
16};
17
18static __initconst u64 knc_hw_cache_event_ids
19 [PERF_COUNT_HW_CACHE_MAX]
20 [PERF_COUNT_HW_CACHE_OP_MAX]
21 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
22{
23 [ C(L1D) ] = {
24 [ C(OP_READ) ] = {
25 /* On Xeon Phi event "0" is a valid DATA_READ */
26 /* (L1 Data Cache Reads) Instruction. */
27 /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
28 /* bit will always be set in x86_pmu_hw_config(). */
29 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
30 /* DATA_READ */
31 [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
32 },
33 [ C(OP_WRITE) ] = {
34 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
35 [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
36 },
37 [ C(OP_PREFETCH) ] = {
38 [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
39 [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
40 },
41 },
42 [ C(L1I ) ] = {
43 [ C(OP_READ) ] = {
44 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
45 [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
46 },
47 [ C(OP_WRITE) ] = {
48 [ C(RESULT_ACCESS) ] = -1,
49 [ C(RESULT_MISS) ] = -1,
50 },
51 [ C(OP_PREFETCH) ] = {
52 [ C(RESULT_ACCESS) ] = 0x0,
53 [ C(RESULT_MISS) ] = 0x0,
54 },
55 },
56 [ C(LL ) ] = {
57 [ C(OP_READ) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
60 },
61 [ C(OP_WRITE) ] = {
62 [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
63 [ C(RESULT_MISS) ] = 0,
64 },
65 [ C(OP_PREFETCH) ] = {
66 [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
67 [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
68 },
69 },
70 [ C(DTLB) ] = {
71 [ C(OP_READ) ] = {
72 [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
73 /* DATA_READ */
74 /* see note on L1 OP_READ */
75 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
76 },
77 [ C(OP_WRITE) ] = {
78 [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
79 [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
80 },
81 [ C(OP_PREFETCH) ] = {
82 [ C(RESULT_ACCESS) ] = 0x0,
83 [ C(RESULT_MISS) ] = 0x0,
84 },
85 },
86 [ C(ITLB) ] = {
87 [ C(OP_READ) ] = {
88 [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
89 [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
90 },
91 [ C(OP_WRITE) ] = {
92 [ C(RESULT_ACCESS) ] = -1,
93 [ C(RESULT_MISS) ] = -1,
94 },
95 [ C(OP_PREFETCH) ] = {
96 [ C(RESULT_ACCESS) ] = -1,
97 [ C(RESULT_MISS) ] = -1,
98 },
99 },
100 [ C(BPU ) ] = {
101 [ C(OP_READ) ] = {
102 [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
103 [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
104 },
105 [ C(OP_WRITE) ] = {
106 [ C(RESULT_ACCESS) ] = -1,
107 [ C(RESULT_MISS) ] = -1,
108 },
109 [ C(OP_PREFETCH) ] = {
110 [ C(RESULT_ACCESS) ] = -1,
111 [ C(RESULT_MISS) ] = -1,
112 },
113 },
114};
115
116
117static u64 knc_pmu_event_map(int hw_event)
118{
119 return knc_perfmon_event_map[hw_event];
120}
121
122static struct event_constraint knc_event_constraints[] =
123{
124 INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
125 INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
126 INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
127 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
128 INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
129 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
130 INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
131 INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
132 INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
133 INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
134 INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
135 INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
136 INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
137 INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
138 INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
139 INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
140 INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
141 INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
142 INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
143 INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
144 INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
145 EVENT_CONSTRAINT_END
146};
147
148#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
149#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
150#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
151
152#define KNC_ENABLE_COUNTER0 0x00000001
153#define KNC_ENABLE_COUNTER1 0x00000002
154
155static void knc_pmu_disable_all(void)
156{
157 u64 val;
158
159 rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
160 val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
161 wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
162}
163
164static void knc_pmu_enable_all(int added)
165{
166 u64 val;
167
168 rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
169 val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
170 wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
171}
172
173static inline void
174knc_pmu_disable_event(struct perf_event *event)
175{
176 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
177 struct hw_perf_event *hwc = &event->hw;
178 u64 val;
179
180 val = hwc->config;
181 if (cpuc->enabled)
182 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
183
184 (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
185}
186
187static void knc_pmu_enable_event(struct perf_event *event)
188{
189 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
190 struct hw_perf_event *hwc = &event->hw;
191 u64 val;
192
193 val = hwc->config;
194 if (cpuc->enabled)
195 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
196
197 (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
198}
199
200PMU_FORMAT_ATTR(event, "config:0-7" );
201PMU_FORMAT_ATTR(umask, "config:8-15" );
202PMU_FORMAT_ATTR(edge, "config:18" );
203PMU_FORMAT_ATTR(inv, "config:23" );
204PMU_FORMAT_ATTR(cmask, "config:24-31" );
205
206static struct attribute *intel_knc_formats_attr[] = {
207 &format_attr_event.attr,
208 &format_attr_umask.attr,
209 &format_attr_edge.attr,
210 &format_attr_inv.attr,
211 &format_attr_cmask.attr,
212 NULL,
213};
214
215static __initconst struct x86_pmu knc_pmu = {
216 .name = "knc",
217 .handle_irq = x86_pmu_handle_irq,
218 .disable_all = knc_pmu_disable_all,
219 .enable_all = knc_pmu_enable_all,
220 .enable = knc_pmu_enable_event,
221 .disable = knc_pmu_disable_event,
222 .hw_config = x86_pmu_hw_config,
223 .schedule_events = x86_schedule_events,
224 .eventsel = MSR_KNC_EVNTSEL0,
225 .perfctr = MSR_KNC_PERFCTR0,
226 .event_map = knc_pmu_event_map,
227 .max_events = ARRAY_SIZE(knc_perfmon_event_map),
228 .apic = 1,
229 .max_period = (1ULL << 31) - 1,
230 .version = 0,
231 .num_counters = 2,
232 /* in theory 40 bits, early silicon is buggy though */
233 .cntval_bits = 32,
234 .cntval_mask = (1ULL << 32) - 1,
235 .get_event_constraints = x86_get_event_constraints,
236 .event_constraints = knc_event_constraints,
237 .format_attrs = intel_knc_formats_attr,
238};
239
240__init int knc_pmu_init(void)
241{
242 x86_pmu = knc_pmu;
243
244 memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
245 sizeof(hw_cache_event_ids));
246
247 return 0;
248}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 966512b2cacf..2e8caf03f593 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
56 switch (boot_cpu_data.x86) { 56 switch (boot_cpu_data.x86) {
57 case 6: 57 case 6:
58 return msr - MSR_P6_PERFCTR0; 58 return msr - MSR_P6_PERFCTR0;
59 case 11:
60 return msr - MSR_KNC_PERFCTR0;
59 case 15: 61 case 15:
60 return msr - MSR_P4_BPU_PERFCTR0; 62 return msr - MSR_P4_BPU_PERFCTR0;
61 } 63 }
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
82 switch (boot_cpu_data.x86) { 84 switch (boot_cpu_data.x86) {
83 case 6: 85 case 6:
84 return msr - MSR_P6_EVNTSEL0; 86 return msr - MSR_P6_EVNTSEL0;
87 case 11:
88 return msr - MSR_KNC_EVNTSEL0;
85 case 15: 89 case 15:
86 return msr - MSR_P4_BSU_ESCR0; 90 return msr - MSR_P4_BSU_ESCR0;
87 } 91 }