diff options
-rw-r--r-- | arch/Kconfig | 18 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 17 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 244 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_p4.c | 6 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 17 | ||||
-rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 20 | ||||
-rw-r--r-- | init/Kconfig | 2 |
7 files changed, 200 insertions, 124 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..06a13729c8df 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -3,11 +3,9 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | config OPROFILE | 5 | config OPROFILE |
6 | tristate "OProfile system profiling (EXPERIMENTAL)" | 6 | tristate "OProfile system profiling" |
7 | depends on PROFILING | 7 | depends on PROFILING |
8 | depends on HAVE_OPROFILE | 8 | depends on HAVE_OPROFILE |
9 | depends on TRACING_SUPPORT | ||
10 | select TRACING | ||
11 | select RING_BUFFER | 9 | select RING_BUFFER |
12 | select RING_BUFFER_ALLOW_SWAP | 10 | select RING_BUFFER_ALLOW_SWAP |
13 | help | 11 | help |
@@ -17,20 +15,6 @@ config OPROFILE | |||
17 | 15 | ||
18 | If unsure, say N. | 16 | If unsure, say N. |
19 | 17 | ||
20 | config OPROFILE_IBS | ||
21 | bool "OProfile AMD IBS support (EXPERIMENTAL)" | ||
22 | default n | ||
23 | depends on OPROFILE && SMP && X86 | ||
24 | help | ||
25 | Instruction-Based Sampling (IBS) is a new profiling | ||
26 | technique that provides rich, precise program performance | ||
27 | information. IBS is introduced by AMD Family10h processors | ||
28 | (AMD Opteron Quad-Core processor "Barcelona") to overcome | ||
29 | the limitations of conventional performance counter | ||
30 | sampling. | ||
31 | |||
32 | If unsure, say N. | ||
33 | |||
34 | config OPROFILE_EVENT_MULTIPLEX | 18 | config OPROFILE_EVENT_MULTIPLEX |
35 | bool "OProfile multiplexing support (EXPERIMENTAL)" | 19 | bool "OProfile multiplexing support (EXPERIMENTAL)" |
36 | default n | 20 | default n |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3347f696edc7..2c505ee71014 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void) | |||
159 | 159 | ||
160 | for_each_possible_cpu(i) { | 160 | for_each_possible_cpu(i) { |
161 | per_cpu(cpu_msrs, i).multiplex = | 161 | per_cpu(cpu_msrs, i).multiplex = |
162 | kmalloc(multiplex_size, GFP_KERNEL); | 162 | kzalloc(multiplex_size, GFP_KERNEL); |
163 | if (!per_cpu(cpu_msrs, i).multiplex) | 163 | if (!per_cpu(cpu_msrs, i).multiplex) |
164 | return 0; | 164 | return 0; |
165 | } | 165 | } |
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
179 | if (counter_config[i].enabled) { | 179 | if (counter_config[i].enabled) { |
180 | multiplex[i].saved = -(u64)counter_config[i].count; | 180 | multiplex[i].saved = -(u64)counter_config[i].count; |
181 | } else { | 181 | } else { |
182 | multiplex[i].addr = 0; | ||
183 | multiplex[i].saved = 0; | 182 | multiplex[i].saved = 0; |
184 | } | 183 | } |
185 | } | 184 | } |
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) | |||
189 | 188 | ||
190 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) | 189 | static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) |
191 | { | 190 | { |
191 | struct op_msr *counters = msrs->counters; | ||
192 | struct op_msr *multiplex = msrs->multiplex; | 192 | struct op_msr *multiplex = msrs->multiplex; |
193 | int i; | 193 | int i; |
194 | 194 | ||
195 | for (i = 0; i < model->num_counters; ++i) { | 195 | for (i = 0; i < model->num_counters; ++i) { |
196 | int virt = op_x86_phys_to_virt(i); | 196 | int virt = op_x86_phys_to_virt(i); |
197 | if (multiplex[virt].addr) | 197 | if (counters[i].addr) |
198 | rdmsrl(multiplex[virt].addr, multiplex[virt].saved); | 198 | rdmsrl(counters[i].addr, multiplex[virt].saved); |
199 | } | 199 | } |
200 | } | 200 | } |
201 | 201 | ||
202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) | 202 | static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) |
203 | { | 203 | { |
204 | struct op_msr *counters = msrs->counters; | ||
204 | struct op_msr *multiplex = msrs->multiplex; | 205 | struct op_msr *multiplex = msrs->multiplex; |
205 | int i; | 206 | int i; |
206 | 207 | ||
207 | for (i = 0; i < model->num_counters; ++i) { | 208 | for (i = 0; i < model->num_counters; ++i) { |
208 | int virt = op_x86_phys_to_virt(i); | 209 | int virt = op_x86_phys_to_virt(i); |
209 | if (multiplex[virt].addr) | 210 | if (counters[i].addr) |
210 | wrmsrl(multiplex[virt].addr, multiplex[virt].saved); | 211 | wrmsrl(counters[i].addr, multiplex[virt].saved); |
211 | } | 212 | } |
212 | } | 213 | } |
213 | 214 | ||
@@ -303,11 +304,11 @@ static int allocate_msrs(void) | |||
303 | 304 | ||
304 | int i; | 305 | int i; |
305 | for_each_possible_cpu(i) { | 306 | for_each_possible_cpu(i) { |
306 | per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, | 307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
307 | GFP_KERNEL); | 308 | GFP_KERNEL); |
308 | if (!per_cpu(cpu_msrs, i).counters) | 309 | if (!per_cpu(cpu_msrs, i).counters) |
309 | return 0; | 310 | return 0; |
310 | per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, | 311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
311 | GFP_KERNEL); | 312 | GFP_KERNEL); |
312 | if (!per_cpu(cpu_msrs, i).controls) | 313 | if (!per_cpu(cpu_msrs, i).controls) |
313 | return 0; | 314 | return 0; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39686c29f03a..6a58256dce9f 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -22,6 +22,9 @@ | |||
22 | #include <asm/ptrace.h> | 22 | #include <asm/ptrace.h> |
23 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
24 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
25 | #include <asm/apic.h> | ||
26 | #include <asm/processor.h> | ||
27 | #include <asm/cpufeature.h> | ||
25 | 28 | ||
26 | #include "op_x86_model.h" | 29 | #include "op_x86_model.h" |
27 | #include "op_counter.h" | 30 | #include "op_counter.h" |
@@ -43,15 +46,13 @@ | |||
43 | 46 | ||
44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; | 47 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; |
45 | 48 | ||
46 | #ifdef CONFIG_OPROFILE_IBS | ||
47 | |||
48 | /* IbsFetchCtl bits/masks */ | 49 | /* IbsFetchCtl bits/masks */ |
49 | #define IBS_FETCH_RAND_EN (1ULL<<57) | 50 | #define IBS_FETCH_RAND_EN (1ULL<<57) |
50 | #define IBS_FETCH_VAL (1ULL<<49) | 51 | #define IBS_FETCH_VAL (1ULL<<49) |
51 | #define IBS_FETCH_ENABLE (1ULL<<48) | 52 | #define IBS_FETCH_ENABLE (1ULL<<48) |
52 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL | 53 | #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL |
53 | 54 | ||
54 | /*IbsOpCtl bits */ | 55 | /* IbsOpCtl bits */ |
55 | #define IBS_OP_CNT_CTL (1ULL<<19) | 56 | #define IBS_OP_CNT_CTL (1ULL<<19) |
56 | #define IBS_OP_VAL (1ULL<<18) | 57 | #define IBS_OP_VAL (1ULL<<18) |
57 | #define IBS_OP_ENABLE (1ULL<<17) | 58 | #define IBS_OP_ENABLE (1ULL<<17) |
@@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; | |||
59 | #define IBS_FETCH_SIZE 6 | 60 | #define IBS_FETCH_SIZE 6 |
60 | #define IBS_OP_SIZE 12 | 61 | #define IBS_OP_SIZE 12 |
61 | 62 | ||
62 | static int has_ibs; /* AMD Family10h and later */ | 63 | static u32 ibs_caps; |
63 | 64 | ||
64 | struct op_ibs_config { | 65 | struct op_ibs_config { |
65 | unsigned long op_enabled; | 66 | unsigned long op_enabled; |
@@ -71,24 +72,52 @@ struct op_ibs_config { | |||
71 | }; | 72 | }; |
72 | 73 | ||
73 | static struct op_ibs_config ibs_config; | 74 | static struct op_ibs_config ibs_config; |
75 | static u64 ibs_op_ctl; | ||
74 | 76 | ||
75 | #endif | 77 | /* |
78 | * IBS cpuid feature detection | ||
79 | */ | ||
76 | 80 | ||
77 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 81 | #define IBS_CPUID_FEATURES 0x8000001b |
82 | |||
83 | /* | ||
84 | * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but | ||
85 | * bit 0 is used to indicate the existence of IBS. | ||
86 | */ | ||
87 | #define IBS_CAPS_AVAIL (1LL<<0) | ||
88 | #define IBS_CAPS_RDWROPCNT (1LL<<3) | ||
89 | #define IBS_CAPS_OPCNT (1LL<<4) | ||
90 | |||
91 | /* | ||
92 | * IBS randomization macros | ||
93 | */ | ||
94 | #define IBS_RANDOM_BITS 12 | ||
95 | #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) | ||
96 | #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) | ||
78 | 97 | ||
79 | static void op_mux_fill_in_addresses(struct op_msrs * const msrs) | 98 | static u32 get_ibs_caps(void) |
80 | { | 99 | { |
81 | int i; | 100 | u32 ibs_caps; |
101 | unsigned int max_level; | ||
82 | 102 | ||
83 | for (i = 0; i < NUM_VIRT_COUNTERS; i++) { | 103 | if (!boot_cpu_has(X86_FEATURE_IBS)) |
84 | int hw_counter = op_x86_virt_to_phys(i); | 104 | return 0; |
85 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 105 | |
86 | msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; | 106 | /* check IBS cpuid feature flags */ |
87 | else | 107 | max_level = cpuid_eax(0x80000000); |
88 | msrs->multiplex[i].addr = 0; | 108 | if (max_level < IBS_CPUID_FEATURES) |
89 | } | 109 | return IBS_CAPS_AVAIL; |
110 | |||
111 | ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); | ||
112 | if (!(ibs_caps & IBS_CAPS_AVAIL)) | ||
113 | /* cpuid flags not valid */ | ||
114 | return IBS_CAPS_AVAIL; | ||
115 | |||
116 | return ibs_caps; | ||
90 | } | 117 | } |
91 | 118 | ||
119 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
120 | |||
92 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | 121 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, |
93 | struct op_msrs const * const msrs) | 122 | struct op_msrs const * const msrs) |
94 | { | 123 | { |
@@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
98 | /* enable active counters */ | 127 | /* enable active counters */ |
99 | for (i = 0; i < NUM_COUNTERS; ++i) { | 128 | for (i = 0; i < NUM_COUNTERS; ++i) { |
100 | int virt = op_x86_phys_to_virt(i); | 129 | int virt = op_x86_phys_to_virt(i); |
101 | if (!counter_config[virt].enabled) | 130 | if (!reset_value[virt]) |
102 | continue; | 131 | continue; |
103 | rdmsrl(msrs->controls[i].addr, val); | 132 | rdmsrl(msrs->controls[i].addr, val); |
104 | val &= model->reserved; | 133 | val &= model->reserved; |
@@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
107 | } | 136 | } |
108 | } | 137 | } |
109 | 138 | ||
110 | #else | ||
111 | |||
112 | static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } | ||
113 | |||
114 | #endif | 139 | #endif |
115 | 140 | ||
116 | /* functions for op_amd_spec */ | 141 | /* functions for op_amd_spec */ |
@@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
122 | for (i = 0; i < NUM_COUNTERS; i++) { | 147 | for (i = 0; i < NUM_COUNTERS; i++) { |
123 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 148 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
124 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | 149 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; |
125 | else | ||
126 | msrs->counters[i].addr = 0; | ||
127 | } | 150 | } |
128 | 151 | ||
129 | for (i = 0; i < NUM_CONTROLS; i++) { | 152 | for (i = 0; i < NUM_CONTROLS; i++) { |
130 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | 153 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) |
131 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | 154 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; |
132 | else | ||
133 | msrs->controls[i].addr = 0; | ||
134 | } | 155 | } |
135 | |||
136 | op_mux_fill_in_addresses(msrs); | ||
137 | } | 156 | } |
138 | 157 | ||
139 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | 158 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, |
@@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
144 | 163 | ||
145 | /* setup reset_value */ | 164 | /* setup reset_value */ |
146 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | 165 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { |
147 | if (counter_config[i].enabled) | 166 | if (counter_config[i].enabled |
167 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
148 | reset_value[i] = counter_config[i].count; | 168 | reset_value[i] = counter_config[i].count; |
149 | else | 169 | else |
150 | reset_value[i] = 0; | 170 | reset_value[i] = 0; |
@@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
152 | 172 | ||
153 | /* clear all counters */ | 173 | /* clear all counters */ |
154 | for (i = 0; i < NUM_CONTROLS; ++i) { | 174 | for (i = 0; i < NUM_CONTROLS; ++i) { |
155 | if (unlikely(!msrs->controls[i].addr)) | 175 | if (unlikely(!msrs->controls[i].addr)) { |
176 | if (counter_config[i].enabled && !smp_processor_id()) | ||
177 | /* | ||
178 | * counter is reserved, this is on all | ||
179 | * cpus, so report only for cpu #0 | ||
180 | */ | ||
181 | op_x86_warn_reserved(i); | ||
156 | continue; | 182 | continue; |
183 | } | ||
157 | rdmsrl(msrs->controls[i].addr, val); | 184 | rdmsrl(msrs->controls[i].addr, val); |
185 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
186 | op_x86_warn_in_use(i); | ||
158 | val &= model->reserved; | 187 | val &= model->reserved; |
159 | wrmsrl(msrs->controls[i].addr, val); | 188 | wrmsrl(msrs->controls[i].addr, val); |
160 | } | 189 | } |
@@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
169 | /* enable active counters */ | 198 | /* enable active counters */ |
170 | for (i = 0; i < NUM_COUNTERS; ++i) { | 199 | for (i = 0; i < NUM_COUNTERS; ++i) { |
171 | int virt = op_x86_phys_to_virt(i); | 200 | int virt = op_x86_phys_to_virt(i); |
172 | if (!counter_config[virt].enabled) | 201 | if (!reset_value[virt]) |
173 | continue; | ||
174 | if (!msrs->counters[i].addr) | ||
175 | continue; | 202 | continue; |
176 | 203 | ||
177 | /* setup counter registers */ | 204 | /* setup counter registers */ |
@@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
185 | } | 212 | } |
186 | } | 213 | } |
187 | 214 | ||
188 | #ifdef CONFIG_OPROFILE_IBS | 215 | /* |
216 | * 16-bit Linear Feedback Shift Register (LFSR) | ||
217 | * | ||
218 | * 16 14 13 11 | ||
219 | * Feedback polynomial = X + X + X + X + 1 | ||
220 | */ | ||
221 | static unsigned int lfsr_random(void) | ||
222 | { | ||
223 | static unsigned int lfsr_value = 0xF00D; | ||
224 | unsigned int bit; | ||
225 | |||
226 | /* Compute next bit to shift in */ | ||
227 | bit = ((lfsr_value >> 0) ^ | ||
228 | (lfsr_value >> 2) ^ | ||
229 | (lfsr_value >> 3) ^ | ||
230 | (lfsr_value >> 5)) & 0x0001; | ||
231 | |||
232 | /* Advance to next register value */ | ||
233 | lfsr_value = (lfsr_value >> 1) | (bit << 15); | ||
234 | |||
235 | return lfsr_value; | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * IBS software randomization | ||
240 | * | ||
241 | * The IBS periodic op counter is randomized in software. The lower 12 | ||
242 | * bits of the 20 bit counter are randomized. IbsOpCurCnt is | ||
243 | * initialized with a 12 bit random value. | ||
244 | */ | ||
245 | static inline u64 op_amd_randomize_ibs_op(u64 val) | ||
246 | { | ||
247 | unsigned int random = lfsr_random(); | ||
248 | |||
249 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) | ||
250 | /* | ||
251 | * Work around if the hw can not write to IbsOpCurCnt | ||
252 | * | ||
253 | * Randomize the lower 8 bits of the 16 bit | ||
254 | * IbsOpMaxCnt [15:0] value in the range of -128 to | ||
255 | * +127 by adding/subtracting an offset to the | ||
256 | * maximum count (IbsOpMaxCnt). | ||
257 | * | ||
258 | * To avoid over or underflows and protect upper bits | ||
259 | * starting at bit 16, the initial value for | ||
260 | * IbsOpMaxCnt must fit in the range from 0x0081 to | ||
261 | * 0xff80. | ||
262 | */ | ||
263 | val += (s8)(random >> 4); | ||
264 | else | ||
265 | val |= (u64)(random & IBS_RANDOM_MASK) << 32; | ||
266 | |||
267 | return val; | ||
268 | } | ||
189 | 269 | ||
190 | static inline void | 270 | static inline void |
191 | op_amd_handle_ibs(struct pt_regs * const regs, | 271 | op_amd_handle_ibs(struct pt_regs * const regs, |
@@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
194 | u64 val, ctl; | 274 | u64 val, ctl; |
195 | struct op_entry entry; | 275 | struct op_entry entry; |
196 | 276 | ||
197 | if (!has_ibs) | 277 | if (!ibs_caps) |
198 | return; | 278 | return; |
199 | 279 | ||
200 | if (ibs_config.fetch_enabled) { | 280 | if (ibs_config.fetch_enabled) { |
@@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
236 | oprofile_write_commit(&entry); | 316 | oprofile_write_commit(&entry); |
237 | 317 | ||
238 | /* reenable the IRQ */ | 318 | /* reenable the IRQ */ |
239 | ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; | 319 | ctl = op_amd_randomize_ibs_op(ibs_op_ctl); |
240 | ctl |= IBS_OP_ENABLE; | ||
241 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); | 320 | wrmsrl(MSR_AMD64_IBSOPCTL, ctl); |
242 | } | 321 | } |
243 | } | 322 | } |
@@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
246 | static inline void op_amd_start_ibs(void) | 325 | static inline void op_amd_start_ibs(void) |
247 | { | 326 | { |
248 | u64 val; | 327 | u64 val; |
249 | if (has_ibs && ibs_config.fetch_enabled) { | 328 | |
329 | if (!ibs_caps) | ||
330 | return; | ||
331 | |||
332 | if (ibs_config.fetch_enabled) { | ||
250 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 333 | val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; |
251 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; | 334 | val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; |
252 | val |= IBS_FETCH_ENABLE; | 335 | val |= IBS_FETCH_ENABLE; |
253 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); | 336 | wrmsrl(MSR_AMD64_IBSFETCHCTL, val); |
254 | } | 337 | } |
255 | 338 | ||
256 | if (has_ibs && ibs_config.op_enabled) { | 339 | if (ibs_config.op_enabled) { |
257 | val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; | 340 | ibs_op_ctl = ibs_config.max_cnt_op >> 4; |
258 | val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; | 341 | if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { |
259 | val |= IBS_OP_ENABLE; | 342 | /* |
343 | * IbsOpCurCnt not supported. See | ||
344 | * op_amd_randomize_ibs_op() for details. | ||
345 | */ | ||
346 | ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); | ||
347 | } else { | ||
348 | /* | ||
349 | * The start value is randomized with a | ||
350 | * positive offset, we need to compensate it | ||
351 | * with the half of the randomized range. Also | ||
352 | * avoid underflows. | ||
353 | */ | ||
354 | ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, | ||
355 | 0xFFFFULL); | ||
356 | } | ||
357 | if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) | ||
358 | ibs_op_ctl |= IBS_OP_CNT_CTL; | ||
359 | ibs_op_ctl |= IBS_OP_ENABLE; | ||
360 | val = op_amd_randomize_ibs_op(ibs_op_ctl); | ||
260 | wrmsrl(MSR_AMD64_IBSOPCTL, val); | 361 | wrmsrl(MSR_AMD64_IBSOPCTL, val); |
261 | } | 362 | } |
262 | } | 363 | } |
263 | 364 | ||
264 | static void op_amd_stop_ibs(void) | 365 | static void op_amd_stop_ibs(void) |
265 | { | 366 | { |
266 | if (has_ibs && ibs_config.fetch_enabled) | 367 | if (!ibs_caps) |
368 | return; | ||
369 | |||
370 | if (ibs_config.fetch_enabled) | ||
267 | /* clear max count and enable */ | 371 | /* clear max count and enable */ |
268 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); | 372 | wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); |
269 | 373 | ||
270 | if (has_ibs && ibs_config.op_enabled) | 374 | if (ibs_config.op_enabled) |
271 | /* clear max count and enable */ | 375 | /* clear max count and enable */ |
272 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 376 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
273 | } | 377 | } |
274 | 378 | ||
275 | #else | ||
276 | |||
277 | static inline void op_amd_handle_ibs(struct pt_regs * const regs, | ||
278 | struct op_msrs const * const msrs) { } | ||
279 | static inline void op_amd_start_ibs(void) { } | ||
280 | static inline void op_amd_stop_ibs(void) { } | ||
281 | |||
282 | #endif | ||
283 | |||
284 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 379 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
285 | struct op_msrs const * const msrs) | 380 | struct op_msrs const * const msrs) |
286 | { | 381 | { |
@@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
355 | } | 450 | } |
356 | } | 451 | } |
357 | 452 | ||
358 | #ifdef CONFIG_OPROFILE_IBS | ||
359 | |||
360 | static u8 ibs_eilvt_off; | 453 | static u8 ibs_eilvt_off; |
361 | 454 | ||
362 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | 455 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) |
@@ -405,45 +498,36 @@ static int init_ibs_nmi(void) | |||
405 | return 1; | 498 | return 1; |
406 | } | 499 | } |
407 | 500 | ||
408 | #ifdef CONFIG_NUMA | ||
409 | /* Sanity check */ | ||
410 | /* Works only for 64bit with proper numa implementation. */ | ||
411 | if (nodes != num_possible_nodes()) { | ||
412 | printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " | ||
413 | "found: %d, expected %d", | ||
414 | nodes, num_possible_nodes()); | ||
415 | return 1; | ||
416 | } | ||
417 | #endif | ||
418 | return 0; | 501 | return 0; |
419 | } | 502 | } |
420 | 503 | ||
421 | /* uninitialize the APIC for the IBS interrupts if needed */ | 504 | /* uninitialize the APIC for the IBS interrupts if needed */ |
422 | static void clear_ibs_nmi(void) | 505 | static void clear_ibs_nmi(void) |
423 | { | 506 | { |
424 | if (has_ibs) | 507 | if (ibs_caps) |
425 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 508 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
426 | } | 509 | } |
427 | 510 | ||
428 | /* initialize the APIC for the IBS interrupts if available */ | 511 | /* initialize the APIC for the IBS interrupts if available */ |
429 | static void ibs_init(void) | 512 | static void ibs_init(void) |
430 | { | 513 | { |
431 | has_ibs = boot_cpu_has(X86_FEATURE_IBS); | 514 | ibs_caps = get_ibs_caps(); |
432 | 515 | ||
433 | if (!has_ibs) | 516 | if (!ibs_caps) |
434 | return; | 517 | return; |
435 | 518 | ||
436 | if (init_ibs_nmi()) { | 519 | if (init_ibs_nmi()) { |
437 | has_ibs = 0; | 520 | ibs_caps = 0; |
438 | return; | 521 | return; |
439 | } | 522 | } |
440 | 523 | ||
441 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | 524 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", |
525 | (unsigned)ibs_caps); | ||
442 | } | 526 | } |
443 | 527 | ||
444 | static void ibs_exit(void) | 528 | static void ibs_exit(void) |
445 | { | 529 | { |
446 | if (!has_ibs) | 530 | if (!ibs_caps) |
447 | return; | 531 | return; |
448 | 532 | ||
449 | clear_ibs_nmi(); | 533 | clear_ibs_nmi(); |
@@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
463 | if (ret) | 547 | if (ret) |
464 | return ret; | 548 | return ret; |
465 | 549 | ||
466 | if (!has_ibs) | 550 | if (!ibs_caps) |
467 | return ret; | 551 | return ret; |
468 | 552 | ||
469 | /* model specific files */ | 553 | /* model specific files */ |
@@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
473 | ibs_config.fetch_enabled = 0; | 557 | ibs_config.fetch_enabled = 0; |
474 | ibs_config.max_cnt_op = 250000; | 558 | ibs_config.max_cnt_op = 250000; |
475 | ibs_config.op_enabled = 0; | 559 | ibs_config.op_enabled = 0; |
476 | ibs_config.dispatched_ops = 1; | 560 | ibs_config.dispatched_ops = 0; |
477 | 561 | ||
478 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 562 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); |
479 | oprofilefs_create_ulong(sb, dir, "enable", | 563 | oprofilefs_create_ulong(sb, dir, "enable", |
@@ -488,8 +572,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
488 | &ibs_config.op_enabled); | 572 | &ibs_config.op_enabled); |
489 | oprofilefs_create_ulong(sb, dir, "max_count", | 573 | oprofilefs_create_ulong(sb, dir, "max_count", |
490 | &ibs_config.max_cnt_op); | 574 | &ibs_config.max_cnt_op); |
491 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 575 | if (ibs_caps & IBS_CAPS_OPCNT) |
492 | &ibs_config.dispatched_ops); | 576 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", |
577 | &ibs_config.dispatched_ops); | ||
493 | 578 | ||
494 | return 0; | 579 | return 0; |
495 | } | 580 | } |
@@ -507,19 +592,6 @@ static void op_amd_exit(void) | |||
507 | ibs_exit(); | 592 | ibs_exit(); |
508 | } | 593 | } |
509 | 594 | ||
510 | #else | ||
511 | |||
512 | /* no IBS support */ | ||
513 | |||
514 | static int op_amd_init(struct oprofile_operations *ops) | ||
515 | { | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void op_amd_exit(void) {} | ||
520 | |||
521 | #endif /* CONFIG_OPROFILE_IBS */ | ||
522 | |||
523 | struct op_x86_model_spec op_amd_spec = { | 595 | struct op_x86_model_spec op_amd_spec = { |
524 | .num_counters = NUM_COUNTERS, | 596 | .num_counters = NUM_COUNTERS, |
525 | .num_controls = NUM_CONTROLS, | 597 | .num_controls = NUM_CONTROLS, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac6b354becdf..e6a160a4684a 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
394 | setup_num_counters(); | 394 | setup_num_counters(); |
395 | stag = get_stagger(); | 395 | stag = get_stagger(); |
396 | 396 | ||
397 | /* initialize some registers */ | ||
398 | for (i = 0; i < num_counters; ++i) | ||
399 | msrs->counters[i].addr = 0; | ||
400 | for (i = 0; i < num_controls; ++i) | ||
401 | msrs->controls[i].addr = 0; | ||
402 | |||
403 | /* the counter & cccr registers we pay attention to */ | 397 | /* the counter & cccr registers we pay attention to */ |
404 | for (i = 0; i < num_counters; ++i) { | 398 | for (i = 0; i < num_counters; ++i) { |
405 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; | 399 | addr = p4_counters[VIRT_CTR(stag, i)].counter_address; |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 8eb05878554c..5d1727ba409e 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) | |||
37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; i++) { |
38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
40 | else | ||
41 | msrs->counters[i].addr = 0; | ||
42 | } | 40 | } |
43 | 41 | ||
44 | for (i = 0; i < num_counters; i++) { | 42 | for (i = 0; i < num_counters; i++) { |
45 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
46 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
47 | else | ||
48 | msrs->controls[i].addr = 0; | ||
49 | } | 45 | } |
50 | } | 46 | } |
51 | 47 | ||
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
57 | int i; | 53 | int i; |
58 | 54 | ||
59 | if (!reset_value) { | 55 | if (!reset_value) { |
60 | reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, | 56 | reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, |
61 | GFP_ATOMIC); | 57 | GFP_ATOMIC); |
62 | if (!reset_value) | 58 | if (!reset_value) |
63 | return; | 59 | return; |
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
82 | 78 | ||
83 | /* clear all counters */ | 79 | /* clear all counters */ |
84 | for (i = 0; i < num_counters; ++i) { | 80 | for (i = 0; i < num_counters; ++i) { |
85 | if (unlikely(!msrs->controls[i].addr)) | 81 | if (unlikely(!msrs->controls[i].addr)) { |
82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
83 | /* | ||
84 | * counter is reserved, this is on all | ||
85 | * cpus, so report only for cpu #0 | ||
86 | */ | ||
87 | op_x86_warn_reserved(i); | ||
86 | continue; | 88 | continue; |
89 | } | ||
87 | rdmsrl(msrs->controls[i].addr, val); | 90 | rdmsrl(msrs->controls[i].addr, val); |
91 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
92 | op_x86_warn_in_use(i); | ||
88 | val &= model->reserved; | 93 | val &= model->reserved; |
89 | wrmsrl(msrs->controls[i].addr, val); | 94 | wrmsrl(msrs->controls[i].addr, val); |
90 | } | 95 | } |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 7b8e75d16081..ff82a755edd4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -57,6 +57,26 @@ struct op_x86_model_spec { | |||
57 | 57 | ||
58 | struct op_counter_config; | 58 | struct op_counter_config; |
59 | 59 | ||
60 | static inline void op_x86_warn_in_use(int counter) | ||
61 | { | ||
62 | /* | ||
63 | * The warning indicates an already running counter. If | ||
64 | * oprofile doesn't collect data, then try using a different | ||
65 | * performance counter on your platform to monitor the desired | ||
66 | * event. Delete counter #%d from the desired event by editing | ||
67 | * the /usr/share/oprofile/%s/<cpu>/events file. If the event | ||
68 | * cannot be monitored by any other counter, contact your | ||
69 | * hardware or BIOS vendor. | ||
70 | */ | ||
71 | pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", | ||
72 | counter, smp_processor_id()); | ||
73 | } | ||
74 | |||
75 | static inline void op_x86_warn_reserved(int counter) | ||
76 | { | ||
77 | pr_warning("oprofile: counter #%d is already reserved\n", counter); | ||
78 | } | ||
79 | |||
60 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, | 80 | extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, |
61 | struct op_counter_config *counter_config); | 81 | struct op_counter_config *counter_config); |
62 | extern int op_x86_phys_to_virt(int phys); | 82 | extern int op_x86_phys_to_virt(int phys); |
diff --git a/init/Kconfig b/init/Kconfig index d038a57004a2..207ae29354a3 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1128,7 +1128,7 @@ config MMAP_ALLOW_UNINITIALIZED | |||
1128 | See Documentation/nommu-mmap.txt for more information. | 1128 | See Documentation/nommu-mmap.txt for more information. |
1129 | 1129 | ||
1130 | config PROFILING | 1130 | config PROFILING |
1131 | bool "Profiling support (EXPERIMENTAL)" | 1131 | bool "Profiling support" |
1132 | help | 1132 | help |
1133 | Say Y here to enable the extended profiling support mechanisms used | 1133 | Say Y here to enable the extended profiling support mechanisms used |
1134 | by profilers such as OProfile. | 1134 | by profilers such as OProfile. |