diff options
author | Robert Richter <robert.richter@amd.com> | 2008-10-15 16:19:41 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2008-10-15 16:19:41 -0400 |
commit | 5a289395bf753f8a318d3a5fa335a757c16c0183 (patch) | |
tree | 3573b8bc4bbd135616bb395d998ef1526fff596f /arch/x86/oprofile/op_model_ppro.c | |
parent | 5f87dfb79f829339508a5d989b8252eb30842587 (diff) | |
parent | 59512900baab03c5629f2ff5efad1d5d4e682ece (diff) |
Merge branch 'oprofile/x86-oprofile-for-tip' into oprofile/oprofile-for-tip
Conflicts:
arch/x86/oprofile/op_model_ppro.c
Diffstat (limited to 'arch/x86/oprofile/op_model_ppro.c')
-rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 108 |
1 files changed, 85 insertions, 23 deletions
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index c665bac4a143..0620d6d45f7d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -1,32 +1,34 @@ | |||
1 | /* | 1 | /* |
2 | * @file op_model_ppro.h | 2 | * @file op_model_ppro.h |
3 | * pentium pro / P6 model-specific MSR operations | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
4 | * | 4 | * |
5 | * @remark Copyright 2002 OProfile authors | 5 | * @remark Copyright 2002 OProfile authors |
6 | * @remark Copyright 2008 Intel Corporation | ||
6 | * @remark Read the file COPYING | 7 | * @remark Read the file COPYING |
7 | * | 8 | * |
8 | * @author John Levon | 9 | * @author John Levon |
9 | * @author Philippe Elie | 10 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
12 | * @author Andi Kleen | ||
11 | */ | 13 | */ |
12 | 14 | ||
13 | #include <linux/oprofile.h> | 15 | #include <linux/oprofile.h> |
16 | #include <linux/slab.h> | ||
14 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
16 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | ||
18 | 22 | ||
19 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
20 | #include "op_counter.h" | 24 | #include "op_counter.h" |
21 | 25 | ||
22 | #define NUM_COUNTERS 2 | 26 | static int num_counters = 2; |
23 | #define NUM_CONTROLS 2 | 27 | static int counter_width = 32; |
24 | 28 | ||
25 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) |
27 | #define CTR_32BIT_WRITE(l, msrs, c) \ | 31 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) |
28 | do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) | ||
29 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
30 | 32 | ||
31 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | 33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
32 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | 34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) |
@@ -40,20 +42,20 @@ | |||
40 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | 42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
41 | #define CTRL_SET_EVENT(val, e) (val |= e) | 43 | #define CTRL_SET_EVENT(val, e) (val |= e) |
42 | 44 | ||
43 | static unsigned long reset_value[NUM_COUNTERS]; | 45 | static u64 *reset_value; |
44 | 46 | ||
45 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
46 | { | 48 | { |
47 | int i; | 49 | int i; |
48 | 50 | ||
49 | for (i = 0; i < NUM_COUNTERS; i++) { | 51 | for (i = 0; i < num_counters; i++) { |
50 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
51 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
52 | else | 54 | else |
53 | msrs->counters[i].addr = 0; | 55 | msrs->counters[i].addr = 0; |
54 | } | 56 | } |
55 | 57 | ||
56 | for (i = 0; i < NUM_CONTROLS; i++) { | 58 | for (i = 0; i < num_counters; i++) { |
57 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
58 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
59 | else | 61 | else |
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
67 | unsigned int low, high; | 69 | unsigned int low, high; |
68 | int i; | 70 | int i; |
69 | 71 | ||
72 | if (!reset_value) { | ||
73 | reset_value = kmalloc(sizeof(unsigned) * num_counters, | ||
74 | GFP_ATOMIC); | ||
75 | if (!reset_value) | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | if (cpu_has_arch_perfmon) { | ||
80 | union cpuid10_eax eax; | ||
81 | eax.full = cpuid_eax(0xa); | ||
82 | if (counter_width < eax.split.bit_width) | ||
83 | counter_width = eax.split.bit_width; | ||
84 | } | ||
85 | |||
70 | /* clear all counters */ | 86 | /* clear all counters */ |
71 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 87 | for (i = 0 ; i < num_counters; ++i) { |
72 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
73 | continue; | 89 | continue; |
74 | CTRL_READ(low, high, msrs, i); | 90 | CTRL_READ(low, high, msrs, i); |
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
77 | } | 93 | } |
78 | 94 | ||
79 | /* avoid a false detection of ctr overflows in NMI handler */ | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
80 | for (i = 0; i < NUM_COUNTERS; ++i) { | 96 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
82 | continue; | 98 | continue; |
83 | CTR_32BIT_WRITE(1, msrs, i); | 99 | wrmsrl(msrs->counters[i].addr, -1LL); |
84 | } | 100 | } |
85 | 101 | ||
86 | /* enable active counters */ | 102 | /* enable active counters */ |
87 | for (i = 0; i < NUM_COUNTERS; ++i) { | 103 | for (i = 0; i < num_counters; ++i) { |
88 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
89 | reset_value[i] = counter_config[i].count; | 105 | reset_value[i] = counter_config[i].count; |
90 | 106 | ||
91 | CTR_32BIT_WRITE(counter_config[i].count, msrs, i); | 107 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
92 | 108 | ||
93 | CTRL_READ(low, high, msrs, i); | 109 | CTRL_READ(low, high, msrs, i); |
94 | CTRL_CLEAR(low); | 110 | CTRL_CLEAR(low); |
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
111 | unsigned int low, high; | 127 | unsigned int low, high; |
112 | int i; | 128 | int i; |
113 | 129 | ||
114 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 130 | for (i = 0 ; i < num_counters; ++i) { |
115 | if (!reset_value[i]) | 131 | if (!reset_value[i]) |
116 | continue; | 132 | continue; |
117 | CTR_READ(low, high, msrs, i); | 133 | CTR_READ(low, high, msrs, i); |
118 | if (CTR_OVERFLOWED(low)) { | 134 | if (CTR_OVERFLOWED(low)) { |
119 | oprofile_add_sample(regs, i); | 135 | oprofile_add_sample(regs, i); |
120 | CTR_32BIT_WRITE(reset_value[i], msrs, i); | 136 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
121 | } | 137 | } |
122 | } | 138 | } |
123 | 139 | ||
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
141 | unsigned int low, high; | 157 | unsigned int low, high; |
142 | int i; | 158 | int i; |
143 | 159 | ||
144 | for (i = 0; i < NUM_COUNTERS; ++i) { | 160 | for (i = 0; i < num_counters; ++i) { |
145 | if (reset_value[i]) { | 161 | if (reset_value[i]) { |
146 | CTRL_READ(low, high, msrs, i); | 162 | CTRL_READ(low, high, msrs, i); |
147 | CTRL_SET_ACTIVE(low); | 163 | CTRL_SET_ACTIVE(low); |
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
156 | unsigned int low, high; | 172 | unsigned int low, high; |
157 | int i; | 173 | int i; |
158 | 174 | ||
159 | for (i = 0; i < NUM_COUNTERS; ++i) { | 175 | for (i = 0; i < num_counters; ++i) { |
160 | if (!reset_value[i]) | 176 | if (!reset_value[i]) |
161 | continue; | 177 | continue; |
162 | CTRL_READ(low, high, msrs, i); | 178 | CTRL_READ(low, high, msrs, i); |
@@ -169,21 +185,67 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
169 | { | 185 | { |
170 | int i; | 186 | int i; |
171 | 187 | ||
172 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 188 | for (i = 0 ; i < num_counters ; ++i) { |
173 | if (CTR_IS_RESERVED(msrs, i)) | 189 | if (CTR_IS_RESERVED(msrs, i)) |
174 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
175 | } | 191 | } |
176 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 192 | for (i = 0 ; i < num_counters ; ++i) { |
177 | if (CTRL_IS_RESERVED(msrs, i)) | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
178 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
179 | } | 195 | } |
196 | if (reset_value) { | ||
197 | kfree(reset_value); | ||
198 | reset_value = NULL; | ||
199 | } | ||
180 | } | 200 | } |
181 | 201 | ||
182 | 202 | ||
183 | struct op_x86_model_spec const op_ppro_spec = { | 203 | struct op_x86_model_spec op_ppro_spec = { |
184 | .num_counters = NUM_COUNTERS, | 204 | .num_counters = 2, /* can be overriden */ |
185 | .num_controls = NUM_CONTROLS, | 205 | .num_controls = 2, /* dito */ |
206 | .fill_in_addresses = &ppro_fill_in_addresses, | ||
207 | .setup_ctrs = &ppro_setup_ctrs, | ||
208 | .check_ctrs = &ppro_check_ctrs, | ||
209 | .start = &ppro_start, | ||
210 | .stop = &ppro_stop, | ||
211 | .shutdown = &ppro_shutdown | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Architectural performance monitoring. | ||
216 | * | ||
217 | * Newer Intel CPUs (Core1+) have support for architectural | ||
218 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | ||
219 | * The advantage of this is that it can be done without knowing about | ||
220 | * the specific CPU. | ||
221 | */ | ||
222 | |||
223 | void arch_perfmon_setup_counters(void) | ||
224 | { | ||
225 | union cpuid10_eax eax; | ||
226 | |||
227 | eax.full = cpuid_eax(0xa); | ||
228 | |||
229 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | ||
230 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | ||
231 | current_cpu_data.x86_model == 15) { | ||
232 | eax.split.version_id = 2; | ||
233 | eax.split.num_counters = 2; | ||
234 | eax.split.bit_width = 40; | ||
235 | } | ||
236 | |||
237 | num_counters = eax.split.num_counters; | ||
238 | |||
239 | op_arch_perfmon_spec.num_counters = num_counters; | ||
240 | op_arch_perfmon_spec.num_controls = num_counters; | ||
241 | op_ppro_spec.num_counters = num_counters; | ||
242 | op_ppro_spec.num_controls = num_counters; | ||
243 | } | ||
244 | |||
245 | struct op_x86_model_spec op_arch_perfmon_spec = { | ||
246 | /* num_counters/num_controls filled in at runtime */ | ||
186 | .fill_in_addresses = &ppro_fill_in_addresses, | 247 | .fill_in_addresses = &ppro_fill_in_addresses, |
248 | /* user space does the cpuid check for available events */ | ||
187 | .setup_ctrs = &ppro_setup_ctrs, | 249 | .setup_ctrs = &ppro_setup_ctrs, |
188 | .check_ctrs = &ppro_check_ctrs, | 250 | .check_ctrs = &ppro_check_ctrs, |
189 | .start = &ppro_start, | 251 | .start = &ppro_start, |