diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 23 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 104 | ||||
-rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 3 |
3 files changed, 102 insertions, 28 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 1059f3fe6b1d..12d6f85084f1 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -429,6 +429,16 @@ static int __init ppro_init(char **cpu_type) | |||
429 | return 1; | 429 | return 1; |
430 | } | 430 | } |
431 | 431 | ||
432 | static int __init arch_perfmon_init(char **cpu_type) | ||
433 | { | ||
434 | if (!cpu_has_arch_perfmon) | ||
435 | return 0; | ||
436 | *cpu_type = "i386/arch_perfmon"; | ||
437 | model = &op_arch_perfmon_spec; | ||
438 | arch_perfmon_setup_counters(); | ||
439 | return 1; | ||
440 | } | ||
441 | |||
432 | /* in order to get sysfs right */ | 442 | /* in order to get sysfs right */ |
433 | static int using_nmi; | 443 | static int using_nmi; |
434 | 444 | ||
@@ -436,7 +446,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
436 | { | 446 | { |
437 | __u8 vendor = boot_cpu_data.x86_vendor; | 447 | __u8 vendor = boot_cpu_data.x86_vendor; |
438 | __u8 family = boot_cpu_data.x86; | 448 | __u8 family = boot_cpu_data.x86; |
439 | char *cpu_type; | 449 | char *cpu_type = NULL; |
440 | int ret = 0; | 450 | int ret = 0; |
441 | 451 | ||
442 | if (!cpu_has_apic) | 452 | if (!cpu_has_apic) |
@@ -474,19 +484,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
474 | switch (family) { | 484 | switch (family) { |
475 | /* Pentium IV */ | 485 | /* Pentium IV */ |
476 | case 0xf: | 486 | case 0xf: |
477 | if (!p4_init(&cpu_type)) | 487 | p4_init(&cpu_type); |
478 | return -ENODEV; | ||
479 | break; | 488 | break; |
480 | 489 | ||
481 | /* A P6-class processor */ | 490 | /* A P6-class processor */ |
482 | case 6: | 491 | case 6: |
483 | if (!ppro_init(&cpu_type)) | 492 | ppro_init(&cpu_type); |
484 | return -ENODEV; | ||
485 | break; | 493 | break; |
486 | 494 | ||
487 | default: | 495 | default: |
488 | return -ENODEV; | 496 | break; |
489 | } | 497 | } |
498 | |||
499 | if (!cpu_type && !arch_perfmon_init(&cpu_type)) | ||
500 | return -ENODEV; | ||
490 | break; | 501 | break; |
491 | 502 | ||
492 | default: | 503 | default: |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57b..12e207a67f1b 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -1,32 +1,34 @@ | |||
1 | /* | 1 | /* |
2 | * @file op_model_ppro.h | 2 | * @file op_model_ppro.h |
3 | * pentium pro / P6 model-specific MSR operations | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
4 | * | 4 | * |
5 | * @remark Copyright 2002 OProfile authors | 5 | * @remark Copyright 2002 OProfile authors |
6 | * @remark Copyright 2008 Intel Corporation | ||
6 | * @remark Read the file COPYING | 7 | * @remark Read the file COPYING |
7 | * | 8 | * |
8 | * @author John Levon | 9 | * @author John Levon |
9 | * @author Philippe Elie | 10 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
12 | * @author Andi Kleen | ||
11 | */ | 13 | */ |
12 | 14 | ||
13 | #include <linux/oprofile.h> | 15 | #include <linux/oprofile.h> |
16 | #include <linux/slab.h> | ||
14 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
16 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | ||
18 | 22 | ||
19 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
20 | #include "op_counter.h" | 24 | #include "op_counter.h" |
21 | 25 | ||
22 | #define NUM_COUNTERS 2 | 26 | static int num_counters = 2; |
23 | #define NUM_CONTROLS 2 | 27 | static int counter_width = 32; |
24 | 28 | ||
25 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) |
27 | #define CTR_32BIT_WRITE(l, msrs, c) \ | 31 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) |
28 | do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) | ||
29 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
30 | 32 | ||
31 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | 33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
32 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | 34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) |
@@ -40,20 +42,20 @@ | |||
40 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | 42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
41 | #define CTRL_SET_EVENT(val, e) (val |= e) | 43 | #define CTRL_SET_EVENT(val, e) (val |= e) |
42 | 44 | ||
43 | static unsigned long reset_value[NUM_COUNTERS]; | 45 | static u64 *reset_value; |
44 | 46 | ||
45 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
46 | { | 48 | { |
47 | int i; | 49 | int i; |
48 | 50 | ||
49 | for (i = 0; i < NUM_COUNTERS; i++) { | 51 | for (i = 0; i < num_counters; i++) { |
50 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
51 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
52 | else | 54 | else |
53 | msrs->counters[i].addr = 0; | 55 | msrs->counters[i].addr = 0; |
54 | } | 56 | } |
55 | 57 | ||
56 | for (i = 0; i < NUM_CONTROLS; i++) { | 58 | for (i = 0; i < num_counters; i++) { |
57 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
58 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
59 | else | 61 | else |
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
67 | unsigned int low, high; | 69 | unsigned int low, high; |
68 | int i; | 70 | int i; |
69 | 71 | ||
72 | if (!reset_value) { | ||
73 | reset_value = kmalloc(sizeof(unsigned) * num_counters, | ||
74 | GFP_ATOMIC); | ||
75 | if (!reset_value) | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | if (cpu_has_arch_perfmon) { | ||
80 | union cpuid10_eax eax; | ||
81 | eax.full = cpuid_eax(0xa); | ||
82 | if (counter_width < eax.split.bit_width) | ||
83 | counter_width = eax.split.bit_width; | ||
84 | } | ||
85 | |||
70 | /* clear all counters */ | 86 | /* clear all counters */ |
71 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 87 | for (i = 0 ; i < num_counters; ++i) { |
72 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
73 | continue; | 89 | continue; |
74 | CTRL_READ(low, high, msrs, i); | 90 | CTRL_READ(low, high, msrs, i); |
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
77 | } | 93 | } |
78 | 94 | ||
79 | /* avoid a false detection of ctr overflows in NMI handler */ | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
80 | for (i = 0; i < NUM_COUNTERS; ++i) { | 96 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
82 | continue; | 98 | continue; |
83 | CTR_32BIT_WRITE(1, msrs, i); | 99 | wrmsrl(msrs->counters[i].addr, -1LL); |
84 | } | 100 | } |
85 | 101 | ||
86 | /* enable active counters */ | 102 | /* enable active counters */ |
87 | for (i = 0; i < NUM_COUNTERS; ++i) { | 103 | for (i = 0; i < num_counters; ++i) { |
88 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
89 | reset_value[i] = counter_config[i].count; | 105 | reset_value[i] = counter_config[i].count; |
90 | 106 | ||
91 | CTR_32BIT_WRITE(counter_config[i].count, msrs, i); | 107 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
92 | 108 | ||
93 | CTRL_READ(low, high, msrs, i); | 109 | CTRL_READ(low, high, msrs, i); |
94 | CTRL_CLEAR(low); | 110 | CTRL_CLEAR(low); |
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
111 | unsigned int low, high; | 127 | unsigned int low, high; |
112 | int i; | 128 | int i; |
113 | 129 | ||
114 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 130 | for (i = 0 ; i < num_counters; ++i) { |
115 | if (!reset_value[i]) | 131 | if (!reset_value[i]) |
116 | continue; | 132 | continue; |
117 | CTR_READ(low, high, msrs, i); | 133 | CTR_READ(low, high, msrs, i); |
118 | if (CTR_OVERFLOWED(low)) { | 134 | if (CTR_OVERFLOWED(low)) { |
119 | oprofile_add_sample(regs, i); | 135 | oprofile_add_sample(regs, i); |
120 | CTR_32BIT_WRITE(reset_value[i], msrs, i); | 136 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
121 | } | 137 | } |
122 | } | 138 | } |
123 | 139 | ||
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
141 | unsigned int low, high; | 157 | unsigned int low, high; |
142 | int i; | 158 | int i; |
143 | 159 | ||
144 | for (i = 0; i < NUM_COUNTERS; ++i) { | 160 | for (i = 0; i < num_counters; ++i) { |
145 | if (reset_value[i]) { | 161 | if (reset_value[i]) { |
146 | CTRL_READ(low, high, msrs, i); | 162 | CTRL_READ(low, high, msrs, i); |
147 | CTRL_SET_ACTIVE(low); | 163 | CTRL_SET_ACTIVE(low); |
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
156 | unsigned int low, high; | 172 | unsigned int low, high; |
157 | int i; | 173 | int i; |
158 | 174 | ||
159 | for (i = 0; i < NUM_COUNTERS; ++i) { | 175 | for (i = 0; i < num_counters; ++i) { |
160 | if (!reset_value[i]) | 176 | if (!reset_value[i]) |
161 | continue; | 177 | continue; |
162 | CTRL_READ(low, high, msrs, i); | 178 | CTRL_READ(low, high, msrs, i); |
@@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
169 | { | 185 | { |
170 | int i; | 186 | int i; |
171 | 187 | ||
172 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 188 | for (i = 0 ; i < num_counters ; ++i) { |
173 | if (CTR_IS_RESERVED(msrs, i)) | 189 | if (CTR_IS_RESERVED(msrs, i)) |
174 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
175 | } | 191 | } |
176 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 192 | for (i = 0 ; i < num_counters ; ++i) { |
177 | if (CTRL_IS_RESERVED(msrs, i)) | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
178 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
179 | } | 195 | } |
196 | if (reset_value) { | ||
197 | kfree(reset_value); | ||
198 | reset_value = NULL; | ||
199 | } | ||
180 | } | 200 | } |
181 | 201 | ||
182 | 202 | ||
183 | struct op_x86_model_spec const op_ppro_spec = { | 203 | struct op_x86_model_spec const op_ppro_spec = { |
184 | .num_counters = NUM_COUNTERS, | 204 | .num_counters = 2, |
185 | .num_controls = NUM_CONTROLS, | 205 | .num_controls = 2, |
206 | .fill_in_addresses = &ppro_fill_in_addresses, | ||
207 | .setup_ctrs = &ppro_setup_ctrs, | ||
208 | .check_ctrs = &ppro_check_ctrs, | ||
209 | .start = &ppro_start, | ||
210 | .stop = &ppro_stop, | ||
211 | .shutdown = &ppro_shutdown | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Architectural performance monitoring. | ||
216 | * | ||
217 | * Newer Intel CPUs (Core1+) have support for architectural | ||
218 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | ||
219 | * The advantage of this is that it can be done without knowing about | ||
220 | * the specific CPU. | ||
221 | */ | ||
222 | |||
223 | void arch_perfmon_setup_counters(void) | ||
224 | { | ||
225 | union cpuid10_eax eax; | ||
226 | |||
227 | eax.full = cpuid_eax(0xa); | ||
228 | |||
229 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | ||
230 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | ||
231 | current_cpu_data.x86_model == 15) { | ||
232 | eax.split.version_id = 2; | ||
233 | eax.split.num_counters = 2; | ||
234 | eax.split.bit_width = 40; | ||
235 | } | ||
236 | |||
237 | num_counters = eax.split.num_counters; | ||
238 | |||
239 | op_arch_perfmon_spec.num_counters = num_counters; | ||
240 | op_arch_perfmon_spec.num_controls = num_counters; | ||
241 | } | ||
242 | |||
243 | struct op_x86_model_spec op_arch_perfmon_spec = { | ||
244 | /* num_counters/num_controls filled in at runtime */ | ||
186 | .fill_in_addresses = &ppro_fill_in_addresses, | 245 | .fill_in_addresses = &ppro_fill_in_addresses, |
246 | /* user space does the cpuid check for available events */ | ||
187 | .setup_ctrs = &ppro_setup_ctrs, | 247 | .setup_ctrs = &ppro_setup_ctrs, |
188 | .check_ctrs = &ppro_check_ctrs, | 248 | .check_ctrs = &ppro_check_ctrs, |
189 | .start = &ppro_start, | 249 | .start = &ppro_start, |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 3d3b85d3c257..0b601893a4df 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -49,5 +49,8 @@ extern struct op_x86_model_spec const op_ppro_spec; | |||
49 | extern struct op_x86_model_spec const op_p4_spec; | 49 | extern struct op_x86_model_spec const op_p4_spec; |
50 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; |
51 | extern struct op_x86_model_spec const op_amd_spec; | 51 | extern struct op_x86_model_spec const op_amd_spec; |
52 | extern struct op_x86_model_spec op_arch_perfmon_spec; | ||
53 | |||
54 | extern void arch_perfmon_setup_counters(void); | ||
52 | 55 | ||
53 | #endif /* OP_X86_MODEL_H */ | 56 | #endif /* OP_X86_MODEL_H */ |