aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2010-03-01 05:13:21 -0500
committerRobert Richter <robert.richter@amd.com>2010-03-01 05:22:45 -0500
commit86d62b6fa2845725ad88b95387c8c52731fb2460 (patch)
treee8b4446e1716037fdf9ccdb52e23efed58a3223c /arch
parent3d083407a16698de86b42aee0da2ffb280b5cb7e (diff)
parentcfc9c0b450176a077205ef39092f0dc1a04e020a (diff)
Merge remote branch 'tip/oprofile' into tip/perf/core
Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig18
-rw-r--r--arch/x86/oprofile/nmi_int.c17
-rw-r--r--arch/x86/oprofile/op_model_amd.c244
-rw-r--r--arch/x86/oprofile/op_model_p4.c6
-rw-r--r--arch/x86/oprofile/op_model_ppro.c17
-rw-r--r--arch/x86/oprofile/op_x86_model.h20
6 files changed, 199 insertions, 123 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 9d055b4f0585..06a13729c8df 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -3,11 +3,9 @@
3# 3#
4 4
5config OPROFILE 5config OPROFILE
6 tristate "OProfile system profiling (EXPERIMENTAL)" 6 tristate "OProfile system profiling"
7 depends on PROFILING 7 depends on PROFILING
8 depends on HAVE_OPROFILE 8 depends on HAVE_OPROFILE
9 depends on TRACING_SUPPORT
10 select TRACING
11 select RING_BUFFER 9 select RING_BUFFER
12 select RING_BUFFER_ALLOW_SWAP 10 select RING_BUFFER_ALLOW_SWAP
13 help 11 help
@@ -17,20 +15,6 @@ config OPROFILE
17 15
18 If unsure, say N. 16 If unsure, say N.
19 17
20config OPROFILE_IBS
21 bool "OProfile AMD IBS support (EXPERIMENTAL)"
22 default n
23 depends on OPROFILE && SMP && X86
24 help
25 Instruction-Based Sampling (IBS) is a new profiling
26 technique that provides rich, precise program performance
27 information. IBS is introduced by AMD Family10h processors
28 (AMD Opteron Quad-Core processor "Barcelona") to overcome
29 the limitations of conventional performance counter
30 sampling.
31
32 If unsure, say N.
33
34config OPROFILE_EVENT_MULTIPLEX 18config OPROFILE_EVENT_MULTIPLEX
35 bool "OProfile multiplexing support (EXPERIMENTAL)" 19 bool "OProfile multiplexing support (EXPERIMENTAL)"
36 default n 20 default n
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3347f696edc7..2c505ee71014 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void)
159 159
160 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 per_cpu(cpu_msrs, i).multiplex = 161 per_cpu(cpu_msrs, i).multiplex =
162 kmalloc(multiplex_size, GFP_KERNEL); 162 kzalloc(multiplex_size, GFP_KERNEL);
163 if (!per_cpu(cpu_msrs, i).multiplex) 163 if (!per_cpu(cpu_msrs, i).multiplex)
164 return 0; 164 return 0;
165 } 165 }
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
179 if (counter_config[i].enabled) { 179 if (counter_config[i].enabled) {
180 multiplex[i].saved = -(u64)counter_config[i].count; 180 multiplex[i].saved = -(u64)counter_config[i].count;
181 } else { 181 } else {
182 multiplex[i].addr = 0;
183 multiplex[i].saved = 0; 182 multiplex[i].saved = 0;
184 } 183 }
185 } 184 }
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
189 188
190static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) 189static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
191{ 190{
191 struct op_msr *counters = msrs->counters;
192 struct op_msr *multiplex = msrs->multiplex; 192 struct op_msr *multiplex = msrs->multiplex;
193 int i; 193 int i;
194 194
195 for (i = 0; i < model->num_counters; ++i) { 195 for (i = 0; i < model->num_counters; ++i) {
196 int virt = op_x86_phys_to_virt(i); 196 int virt = op_x86_phys_to_virt(i);
197 if (multiplex[virt].addr) 197 if (counters[i].addr)
198 rdmsrl(multiplex[virt].addr, multiplex[virt].saved); 198 rdmsrl(counters[i].addr, multiplex[virt].saved);
199 } 199 }
200} 200}
201 201
202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) 202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
203{ 203{
204 struct op_msr *counters = msrs->counters;
204 struct op_msr *multiplex = msrs->multiplex; 205 struct op_msr *multiplex = msrs->multiplex;
205 int i; 206 int i;
206 207
207 for (i = 0; i < model->num_counters; ++i) { 208 for (i = 0; i < model->num_counters; ++i) {
208 int virt = op_x86_phys_to_virt(i); 209 int virt = op_x86_phys_to_virt(i);
209 if (multiplex[virt].addr) 210 if (counters[i].addr)
210 wrmsrl(multiplex[virt].addr, multiplex[virt].saved); 211 wrmsrl(counters[i].addr, multiplex[virt].saved);
211 } 212 }
212} 213}
213 214
@@ -303,11 +304,11 @@ static int allocate_msrs(void)
303 304
304 int i; 305 int i;
305 for_each_possible_cpu(i) { 306 for_each_possible_cpu(i) {
306 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, 307 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
307 GFP_KERNEL); 308 GFP_KERNEL);
308 if (!per_cpu(cpu_msrs, i).counters) 309 if (!per_cpu(cpu_msrs, i).counters)
309 return 0; 310 return 0;
310 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, 311 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
311 GFP_KERNEL); 312 GFP_KERNEL);
312 if (!per_cpu(cpu_msrs, i).controls) 313 if (!per_cpu(cpu_msrs, i).controls)
313 return 0; 314 return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c29f03a..6a58256dce9f 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,9 @@
22#include <asm/ptrace.h> 22#include <asm/ptrace.h>
23#include <asm/msr.h> 23#include <asm/msr.h>
24#include <asm/nmi.h> 24#include <asm/nmi.h>
25#include <asm/apic.h>
26#include <asm/processor.h>
27#include <asm/cpufeature.h>
25 28
26#include "op_x86_model.h" 29#include "op_x86_model.h"
27#include "op_counter.h" 30#include "op_counter.h"
@@ -43,15 +46,13 @@
43 46
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 47static unsigned long reset_value[NUM_VIRT_COUNTERS];
45 48
46#ifdef CONFIG_OPROFILE_IBS
47
48/* IbsFetchCtl bits/masks */ 49/* IbsFetchCtl bits/masks */
49#define IBS_FETCH_RAND_EN (1ULL<<57) 50#define IBS_FETCH_RAND_EN (1ULL<<57)
50#define IBS_FETCH_VAL (1ULL<<49) 51#define IBS_FETCH_VAL (1ULL<<49)
51#define IBS_FETCH_ENABLE (1ULL<<48) 52#define IBS_FETCH_ENABLE (1ULL<<48)
52#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL 53#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
53 54
54/*IbsOpCtl bits */ 55/* IbsOpCtl bits */
55#define IBS_OP_CNT_CTL (1ULL<<19) 56#define IBS_OP_CNT_CTL (1ULL<<19)
56#define IBS_OP_VAL (1ULL<<18) 57#define IBS_OP_VAL (1ULL<<18)
57#define IBS_OP_ENABLE (1ULL<<17) 58#define IBS_OP_ENABLE (1ULL<<17)
@@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
59#define IBS_FETCH_SIZE 6 60#define IBS_FETCH_SIZE 6
60#define IBS_OP_SIZE 12 61#define IBS_OP_SIZE 12
61 62
62static int has_ibs; /* AMD Family10h and later */ 63static u32 ibs_caps;
63 64
64struct op_ibs_config { 65struct op_ibs_config {
65 unsigned long op_enabled; 66 unsigned long op_enabled;
@@ -71,24 +72,52 @@ struct op_ibs_config {
71}; 72};
72 73
73static struct op_ibs_config ibs_config; 74static struct op_ibs_config ibs_config;
75static u64 ibs_op_ctl;
74 76
75#endif 77/*
78 * IBS cpuid feature detection
79 */
76 80
77#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 81#define IBS_CPUID_FEATURES 0x8000001b
82
83/*
84 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
85 * bit 0 is used to indicate the existence of IBS.
86 */
87#define IBS_CAPS_AVAIL (1LL<<0)
88#define IBS_CAPS_RDWROPCNT (1LL<<3)
89#define IBS_CAPS_OPCNT (1LL<<4)
90
91/*
92 * IBS randomization macros
93 */
94#define IBS_RANDOM_BITS 12
95#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
96#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
78 97
79static void op_mux_fill_in_addresses(struct op_msrs * const msrs) 98static u32 get_ibs_caps(void)
80{ 99{
81 int i; 100 u32 ibs_caps;
101 unsigned int max_level;
82 102
83 for (i = 0; i < NUM_VIRT_COUNTERS; i++) { 103 if (!boot_cpu_has(X86_FEATURE_IBS))
84 int hw_counter = op_x86_virt_to_phys(i); 104 return 0;
85 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 105
86 msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; 106 /* check IBS cpuid feature flags */
87 else 107 max_level = cpuid_eax(0x80000000);
88 msrs->multiplex[i].addr = 0; 108 if (max_level < IBS_CPUID_FEATURES)
89 } 109 return IBS_CAPS_AVAIL;
110
111 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
112 if (!(ibs_caps & IBS_CAPS_AVAIL))
113 /* cpuid flags not valid */
114 return IBS_CAPS_AVAIL;
115
116 return ibs_caps;
90} 117}
91 118
119#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
120
92static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 121static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
93 struct op_msrs const * const msrs) 122 struct op_msrs const * const msrs)
94{ 123{
@@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
98 /* enable active counters */ 127 /* enable active counters */
99 for (i = 0; i < NUM_COUNTERS; ++i) { 128 for (i = 0; i < NUM_COUNTERS; ++i) {
100 int virt = op_x86_phys_to_virt(i); 129 int virt = op_x86_phys_to_virt(i);
101 if (!counter_config[virt].enabled) 130 if (!reset_value[virt])
102 continue; 131 continue;
103 rdmsrl(msrs->controls[i].addr, val); 132 rdmsrl(msrs->controls[i].addr, val);
104 val &= model->reserved; 133 val &= model->reserved;
@@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
107 } 136 }
108} 137}
109 138
110#else
111
112static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
113
114#endif 139#endif
115 140
116/* functions for op_amd_spec */ 141/* functions for op_amd_spec */
@@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
122 for (i = 0; i < NUM_COUNTERS; i++) { 147 for (i = 0; i < NUM_COUNTERS; i++) {
123 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 148 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
124 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 149 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
125 else
126 msrs->counters[i].addr = 0;
127 } 150 }
128 151
129 for (i = 0; i < NUM_CONTROLS; i++) { 152 for (i = 0; i < NUM_CONTROLS; i++) {
130 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) 153 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
131 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 154 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
132 else
133 msrs->controls[i].addr = 0;
134 } 155 }
135
136 op_mux_fill_in_addresses(msrs);
137} 156}
138 157
139static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, 158static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
144 163
145 /* setup reset_value */ 164 /* setup reset_value */
146 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 165 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
147 if (counter_config[i].enabled) 166 if (counter_config[i].enabled
167 && msrs->counters[op_x86_virt_to_phys(i)].addr)
148 reset_value[i] = counter_config[i].count; 168 reset_value[i] = counter_config[i].count;
149 else 169 else
150 reset_value[i] = 0; 170 reset_value[i] = 0;
@@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
152 172
153 /* clear all counters */ 173 /* clear all counters */
154 for (i = 0; i < NUM_CONTROLS; ++i) { 174 for (i = 0; i < NUM_CONTROLS; ++i) {
155 if (unlikely(!msrs->controls[i].addr)) 175 if (unlikely(!msrs->controls[i].addr)) {
176 if (counter_config[i].enabled && !smp_processor_id())
177 /*
178 * counter is reserved, this is on all
179 * cpus, so report only for cpu #0
180 */
181 op_x86_warn_reserved(i);
156 continue; 182 continue;
183 }
157 rdmsrl(msrs->controls[i].addr, val); 184 rdmsrl(msrs->controls[i].addr, val);
185 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
186 op_x86_warn_in_use(i);
158 val &= model->reserved; 187 val &= model->reserved;
159 wrmsrl(msrs->controls[i].addr, val); 188 wrmsrl(msrs->controls[i].addr, val);
160 } 189 }
@@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
169 /* enable active counters */ 198 /* enable active counters */
170 for (i = 0; i < NUM_COUNTERS; ++i) { 199 for (i = 0; i < NUM_COUNTERS; ++i) {
171 int virt = op_x86_phys_to_virt(i); 200 int virt = op_x86_phys_to_virt(i);
172 if (!counter_config[virt].enabled) 201 if (!reset_value[virt])
173 continue;
174 if (!msrs->counters[i].addr)
175 continue; 202 continue;
176 203
177 /* setup counter registers */ 204 /* setup counter registers */
@@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
185 } 212 }
186} 213}
187 214
188#ifdef CONFIG_OPROFILE_IBS 215/*
216 * 16-bit Linear Feedback Shift Register (LFSR)
217 *
218 * 16 14 13 11
219 * Feedback polynomial = X + X + X + X + 1
220 */
221static unsigned int lfsr_random(void)
222{
223 static unsigned int lfsr_value = 0xF00D;
224 unsigned int bit;
225
226 /* Compute next bit to shift in */
227 bit = ((lfsr_value >> 0) ^
228 (lfsr_value >> 2) ^
229 (lfsr_value >> 3) ^
230 (lfsr_value >> 5)) & 0x0001;
231
232 /* Advance to next register value */
233 lfsr_value = (lfsr_value >> 1) | (bit << 15);
234
235 return lfsr_value;
236}
237
238/*
239 * IBS software randomization
240 *
241 * The IBS periodic op counter is randomized in software. The lower 12
242 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
243 * initialized with a 12 bit random value.
244 */
245static inline u64 op_amd_randomize_ibs_op(u64 val)
246{
247 unsigned int random = lfsr_random();
248
249 if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
250 /*
251 * Work around if the hw can not write to IbsOpCurCnt
252 *
253 * Randomize the lower 8 bits of the 16 bit
254 * IbsOpMaxCnt [15:0] value in the range of -128 to
255 * +127 by adding/subtracting an offset to the
256 * maximum count (IbsOpMaxCnt).
257 *
258 * To avoid over or underflows and protect upper bits
259 * starting at bit 16, the initial value for
260 * IbsOpMaxCnt must fit in the range from 0x0081 to
261 * 0xff80.
262 */
263 val += (s8)(random >> 4);
264 else
265 val |= (u64)(random & IBS_RANDOM_MASK) << 32;
266
267 return val;
268}
189 269
190static inline void 270static inline void
191op_amd_handle_ibs(struct pt_regs * const regs, 271op_amd_handle_ibs(struct pt_regs * const regs,
@@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
194 u64 val, ctl; 274 u64 val, ctl;
195 struct op_entry entry; 275 struct op_entry entry;
196 276
197 if (!has_ibs) 277 if (!ibs_caps)
198 return; 278 return;
199 279
200 if (ibs_config.fetch_enabled) { 280 if (ibs_config.fetch_enabled) {
@@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
236 oprofile_write_commit(&entry); 316 oprofile_write_commit(&entry);
237 317
238 /* reenable the IRQ */ 318 /* reenable the IRQ */
239 ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; 319 ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
240 ctl |= IBS_OP_ENABLE;
241 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 320 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
242 } 321 }
243 } 322 }
@@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs,
246static inline void op_amd_start_ibs(void) 325static inline void op_amd_start_ibs(void)
247{ 326{
248 u64 val; 327 u64 val;
249 if (has_ibs && ibs_config.fetch_enabled) { 328
329 if (!ibs_caps)
330 return;
331
332 if (ibs_config.fetch_enabled) {
250 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 333 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
251 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 334 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
252 val |= IBS_FETCH_ENABLE; 335 val |= IBS_FETCH_ENABLE;
253 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 336 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
254 } 337 }
255 338
256 if (has_ibs && ibs_config.op_enabled) { 339 if (ibs_config.op_enabled) {
257 val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; 340 ibs_op_ctl = ibs_config.max_cnt_op >> 4;
258 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; 341 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
259 val |= IBS_OP_ENABLE; 342 /*
343 * IbsOpCurCnt not supported. See
344 * op_amd_randomize_ibs_op() for details.
345 */
346 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
347 } else {
348 /*
349 * The start value is randomized with a
350 * positive offset, we need to compensate it
351 * with the half of the randomized range. Also
352 * avoid underflows.
353 */
354 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
355 0xFFFFULL);
356 }
357 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
358 ibs_op_ctl |= IBS_OP_CNT_CTL;
359 ibs_op_ctl |= IBS_OP_ENABLE;
360 val = op_amd_randomize_ibs_op(ibs_op_ctl);
260 wrmsrl(MSR_AMD64_IBSOPCTL, val); 361 wrmsrl(MSR_AMD64_IBSOPCTL, val);
261 } 362 }
262} 363}
263 364
264static void op_amd_stop_ibs(void) 365static void op_amd_stop_ibs(void)
265{ 366{
266 if (has_ibs && ibs_config.fetch_enabled) 367 if (!ibs_caps)
368 return;
369
370 if (ibs_config.fetch_enabled)
267 /* clear max count and enable */ 371 /* clear max count and enable */
268 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); 372 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
269 373
270 if (has_ibs && ibs_config.op_enabled) 374 if (ibs_config.op_enabled)
271 /* clear max count and enable */ 375 /* clear max count and enable */
272 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 376 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
273} 377}
274 378
275#else
276
277static inline void op_amd_handle_ibs(struct pt_regs * const regs,
278 struct op_msrs const * const msrs) { }
279static inline void op_amd_start_ibs(void) { }
280static inline void op_amd_stop_ibs(void) { }
281
282#endif
283
284static int op_amd_check_ctrs(struct pt_regs * const regs, 379static int op_amd_check_ctrs(struct pt_regs * const regs,
285 struct op_msrs const * const msrs) 380 struct op_msrs const * const msrs)
286{ 381{
@@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
355 } 450 }
356} 451}
357 452
358#ifdef CONFIG_OPROFILE_IBS
359
360static u8 ibs_eilvt_off; 453static u8 ibs_eilvt_off;
361 454
362static inline void apic_init_ibs_nmi_per_cpu(void *arg) 455static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -405,45 +498,36 @@ static int init_ibs_nmi(void)
405 return 1; 498 return 1;
406 } 499 }
407 500
408#ifdef CONFIG_NUMA
409 /* Sanity check */
410 /* Works only for 64bit with proper numa implementation. */
411 if (nodes != num_possible_nodes()) {
412 printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
413 "found: %d, expected %d",
414 nodes, num_possible_nodes());
415 return 1;
416 }
417#endif
418 return 0; 501 return 0;
419} 502}
420 503
421/* uninitialize the APIC for the IBS interrupts if needed */ 504/* uninitialize the APIC for the IBS interrupts if needed */
422static void clear_ibs_nmi(void) 505static void clear_ibs_nmi(void)
423{ 506{
424 if (has_ibs) 507 if (ibs_caps)
425 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 508 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
426} 509}
427 510
428/* initialize the APIC for the IBS interrupts if available */ 511/* initialize the APIC for the IBS interrupts if available */
429static void ibs_init(void) 512static void ibs_init(void)
430{ 513{
431 has_ibs = boot_cpu_has(X86_FEATURE_IBS); 514 ibs_caps = get_ibs_caps();
432 515
433 if (!has_ibs) 516 if (!ibs_caps)
434 return; 517 return;
435 518
436 if (init_ibs_nmi()) { 519 if (init_ibs_nmi()) {
437 has_ibs = 0; 520 ibs_caps = 0;
438 return; 521 return;
439 } 522 }
440 523
441 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 524 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
525 (unsigned)ibs_caps);
442} 526}
443 527
444static void ibs_exit(void) 528static void ibs_exit(void)
445{ 529{
446 if (!has_ibs) 530 if (!ibs_caps)
447 return; 531 return;
448 532
449 clear_ibs_nmi(); 533 clear_ibs_nmi();
@@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
463 if (ret) 547 if (ret)
464 return ret; 548 return ret;
465 549
466 if (!has_ibs) 550 if (!ibs_caps)
467 return ret; 551 return ret;
468 552
469 /* model specific files */ 553 /* model specific files */
@@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
473 ibs_config.fetch_enabled = 0; 557 ibs_config.fetch_enabled = 0;
474 ibs_config.max_cnt_op = 250000; 558 ibs_config.max_cnt_op = 250000;
475 ibs_config.op_enabled = 0; 559 ibs_config.op_enabled = 0;
476 ibs_config.dispatched_ops = 1; 560 ibs_config.dispatched_ops = 0;
477 561
478 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 562 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
479 oprofilefs_create_ulong(sb, dir, "enable", 563 oprofilefs_create_ulong(sb, dir, "enable",
@@ -488,8 +572,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
488 &ibs_config.op_enabled); 572 &ibs_config.op_enabled);
489 oprofilefs_create_ulong(sb, dir, "max_count", 573 oprofilefs_create_ulong(sb, dir, "max_count",
490 &ibs_config.max_cnt_op); 574 &ibs_config.max_cnt_op);
491 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 575 if (ibs_caps & IBS_CAPS_OPCNT)
492 &ibs_config.dispatched_ops); 576 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
577 &ibs_config.dispatched_ops);
493 578
494 return 0; 579 return 0;
495} 580}
@@ -507,19 +592,6 @@ static void op_amd_exit(void)
507 ibs_exit(); 592 ibs_exit();
508} 593}
509 594
510#else
511
512/* no IBS support */
513
514static int op_amd_init(struct oprofile_operations *ops)
515{
516 return 0;
517}
518
519static void op_amd_exit(void) {}
520
521#endif /* CONFIG_OPROFILE_IBS */
522
523struct op_x86_model_spec op_amd_spec = { 595struct op_x86_model_spec op_amd_spec = {
524 .num_counters = NUM_COUNTERS, 596 .num_counters = NUM_COUNTERS,
525 .num_controls = NUM_CONTROLS, 597 .num_controls = NUM_CONTROLS,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac6b354becdf..e6a160a4684a 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
394 setup_num_counters(); 394 setup_num_counters();
395 stag = get_stagger(); 395 stag = get_stagger();
396 396
397 /* initialize some registers */
398 for (i = 0; i < num_counters; ++i)
399 msrs->counters[i].addr = 0;
400 for (i = 0; i < num_controls; ++i)
401 msrs->controls[i].addr = 0;
402
403 /* the counter & cccr registers we pay attention to */ 397 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) { 398 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb05878554c..5d1727ba409e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
37 for (i = 0; i < num_counters; i++) { 37 for (i = 0; i < num_counters; i++) {
38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
40 else
41 msrs->counters[i].addr = 0;
42 } 40 }
43 41
44 for (i = 0; i < num_counters; i++) { 42 for (i = 0; i < num_counters; i++) {
45 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 43 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
46 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 44 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
47 else
48 msrs->controls[i].addr = 0;
49 } 45 }
50} 46}
51 47
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
57 int i; 53 int i;
58 54
59 if (!reset_value) { 55 if (!reset_value) {
60 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, 56 reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
61 GFP_ATOMIC); 57 GFP_ATOMIC);
62 if (!reset_value) 58 if (!reset_value)
63 return; 59 return;
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
82 78
83 /* clear all counters */ 79 /* clear all counters */
84 for (i = 0; i < num_counters; ++i) { 80 for (i = 0; i < num_counters; ++i) {
85 if (unlikely(!msrs->controls[i].addr)) 81 if (unlikely(!msrs->controls[i].addr)) {
82 if (counter_config[i].enabled && !smp_processor_id())
83 /*
84 * counter is reserved, this is on all
85 * cpus, so report only for cpu #0
86 */
87 op_x86_warn_reserved(i);
86 continue; 88 continue;
89 }
87 rdmsrl(msrs->controls[i].addr, val); 90 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
92 op_x86_warn_in_use(i);
88 val &= model->reserved; 93 val &= model->reserved;
89 wrmsrl(msrs->controls[i].addr, val); 94 wrmsrl(msrs->controls[i].addr, val);
90 } 95 }
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 7b8e75d16081..ff82a755edd4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -57,6 +57,26 @@ struct op_x86_model_spec {
57 57
58struct op_counter_config; 58struct op_counter_config;
59 59
60static inline void op_x86_warn_in_use(int counter)
61{
62 /*
63 * The warning indicates an already running counter. If
64 * oprofile doesn't collect data, then try using a different
65 * performance counter on your platform to monitor the desired
66 * event. Delete counter #%d from the desired event by editing
67 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
68 * cannot be monitored by any other counter, contact your
69 * hardware or BIOS vendor.
70 */
71 pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
72 counter, smp_processor_id());
73}
74
75static inline void op_x86_warn_reserved(int counter)
76{
77 pr_warning("oprofile: counter #%d is already reserved\n", counter);
78}
79
60extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 80extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
61 struct op_counter_config *counter_config); 81 struct op_counter_config *counter_config);
62extern int op_x86_phys_to_virt(int phys); 82extern int op_x86_phys_to_virt(int phys);