diff options
author | Robert Richter <robert.richter@amd.com> | 2008-10-15 16:19:41 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2008-10-15 16:19:41 -0400 |
commit | 5a289395bf753f8a318d3a5fa335a757c16c0183 (patch) | |
tree | 3573b8bc4bbd135616bb395d998ef1526fff596f | |
parent | 5f87dfb79f829339508a5d989b8252eb30842587 (diff) | |
parent | 59512900baab03c5629f2ff5efad1d5d4e682ece (diff) |
Merge branch 'oprofile/x86-oprofile-for-tip' into oprofile/oprofile-for-tip
Conflicts:
arch/x86/oprofile/op_model_ppro.c
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 26 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 108 | ||||
-rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 9 |
3 files changed, 108 insertions, 35 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 370d832f398d..022cd41ea9b4 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -415,9 +415,6 @@ static int __init ppro_init(char **cpu_type) | |||
415 | case 15: case 23: | 415 | case 15: case 23: |
416 | *cpu_type = "i386/core_2"; | 416 | *cpu_type = "i386/core_2"; |
417 | break; | 417 | break; |
418 | case 26: | ||
419 | *cpu_type = "i386/core_2"; | ||
420 | break; | ||
421 | default: | 418 | default: |
422 | /* Unknown */ | 419 | /* Unknown */ |
423 | return 0; | 420 | return 0; |
@@ -427,6 +424,16 @@ static int __init ppro_init(char **cpu_type) | |||
427 | return 1; | 424 | return 1; |
428 | } | 425 | } |
429 | 426 | ||
427 | static int __init arch_perfmon_init(char **cpu_type) | ||
428 | { | ||
429 | if (!cpu_has_arch_perfmon) | ||
430 | return 0; | ||
431 | *cpu_type = "i386/arch_perfmon"; | ||
432 | model = &op_arch_perfmon_spec; | ||
433 | arch_perfmon_setup_counters(); | ||
434 | return 1; | ||
435 | } | ||
436 | |||
430 | /* in order to get sysfs right */ | 437 | /* in order to get sysfs right */ |
431 | static int using_nmi; | 438 | static int using_nmi; |
432 | 439 | ||
@@ -434,7 +441,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
434 | { | 441 | { |
435 | __u8 vendor = boot_cpu_data.x86_vendor; | 442 | __u8 vendor = boot_cpu_data.x86_vendor; |
436 | __u8 family = boot_cpu_data.x86; | 443 | __u8 family = boot_cpu_data.x86; |
437 | char *cpu_type; | 444 | char *cpu_type = NULL; |
438 | int ret = 0; | 445 | int ret = 0; |
439 | 446 | ||
440 | if (!cpu_has_apic) | 447 | if (!cpu_has_apic) |
@@ -472,19 +479,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
472 | switch (family) { | 479 | switch (family) { |
473 | /* Pentium IV */ | 480 | /* Pentium IV */ |
474 | case 0xf: | 481 | case 0xf: |
475 | if (!p4_init(&cpu_type)) | 482 | p4_init(&cpu_type); |
476 | return -ENODEV; | ||
477 | break; | 483 | break; |
478 | 484 | ||
479 | /* A P6-class processor */ | 485 | /* A P6-class processor */ |
480 | case 6: | 486 | case 6: |
481 | if (!ppro_init(&cpu_type)) | 487 | ppro_init(&cpu_type); |
482 | return -ENODEV; | ||
483 | break; | 488 | break; |
484 | 489 | ||
485 | default: | 490 | default: |
486 | return -ENODEV; | 491 | break; |
487 | } | 492 | } |
493 | |||
494 | if (!cpu_type && !arch_perfmon_init(&cpu_type)) | ||
495 | return -ENODEV; | ||
488 | break; | 496 | break; |
489 | 497 | ||
490 | default: | 498 | default: |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index c665bac4a143..0620d6d45f7d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -1,32 +1,34 @@ | |||
1 | /* | 1 | /* |
2 | * @file op_model_ppro.h | 2 | * @file op_model_ppro.h |
3 | * pentium pro / P6 model-specific MSR operations | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
4 | * | 4 | * |
5 | * @remark Copyright 2002 OProfile authors | 5 | * @remark Copyright 2002 OProfile authors |
6 | * @remark Copyright 2008 Intel Corporation | ||
6 | * @remark Read the file COPYING | 7 | * @remark Read the file COPYING |
7 | * | 8 | * |
8 | * @author John Levon | 9 | * @author John Levon |
9 | * @author Philippe Elie | 10 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
12 | * @author Andi Kleen | ||
11 | */ | 13 | */ |
12 | 14 | ||
13 | #include <linux/oprofile.h> | 15 | #include <linux/oprofile.h> |
16 | #include <linux/slab.h> | ||
14 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
16 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | ||
18 | 22 | ||
19 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
20 | #include "op_counter.h" | 24 | #include "op_counter.h" |
21 | 25 | ||
22 | #define NUM_COUNTERS 2 | 26 | static int num_counters = 2; |
23 | #define NUM_CONTROLS 2 | 27 | static int counter_width = 32; |
24 | 28 | ||
25 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) |
27 | #define CTR_32BIT_WRITE(l, msrs, c) \ | 31 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) |
28 | do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) | ||
29 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
30 | 32 | ||
31 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | 33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
32 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | 34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) |
@@ -40,20 +42,20 @@ | |||
40 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | 42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
41 | #define CTRL_SET_EVENT(val, e) (val |= e) | 43 | #define CTRL_SET_EVENT(val, e) (val |= e) |
42 | 44 | ||
43 | static unsigned long reset_value[NUM_COUNTERS]; | 45 | static u64 *reset_value; |
44 | 46 | ||
45 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
46 | { | 48 | { |
47 | int i; | 49 | int i; |
48 | 50 | ||
49 | for (i = 0; i < NUM_COUNTERS; i++) { | 51 | for (i = 0; i < num_counters; i++) { |
50 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
51 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
52 | else | 54 | else |
53 | msrs->counters[i].addr = 0; | 55 | msrs->counters[i].addr = 0; |
54 | } | 56 | } |
55 | 57 | ||
56 | for (i = 0; i < NUM_CONTROLS; i++) { | 58 | for (i = 0; i < num_counters; i++) { |
57 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
58 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
59 | else | 61 | else |
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
67 | unsigned int low, high; | 69 | unsigned int low, high; |
68 | int i; | 70 | int i; |
69 | 71 | ||
72 | if (!reset_value) { | ||
73 | reset_value = kmalloc(sizeof(unsigned) * num_counters, | ||
74 | GFP_ATOMIC); | ||
75 | if (!reset_value) | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | if (cpu_has_arch_perfmon) { | ||
80 | union cpuid10_eax eax; | ||
81 | eax.full = cpuid_eax(0xa); | ||
82 | if (counter_width < eax.split.bit_width) | ||
83 | counter_width = eax.split.bit_width; | ||
84 | } | ||
85 | |||
70 | /* clear all counters */ | 86 | /* clear all counters */ |
71 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 87 | for (i = 0 ; i < num_counters; ++i) { |
72 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
73 | continue; | 89 | continue; |
74 | CTRL_READ(low, high, msrs, i); | 90 | CTRL_READ(low, high, msrs, i); |
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
77 | } | 93 | } |
78 | 94 | ||
79 | /* avoid a false detection of ctr overflows in NMI handler */ | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
80 | for (i = 0; i < NUM_COUNTERS; ++i) { | 96 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
82 | continue; | 98 | continue; |
83 | CTR_32BIT_WRITE(1, msrs, i); | 99 | wrmsrl(msrs->counters[i].addr, -1LL); |
84 | } | 100 | } |
85 | 101 | ||
86 | /* enable active counters */ | 102 | /* enable active counters */ |
87 | for (i = 0; i < NUM_COUNTERS; ++i) { | 103 | for (i = 0; i < num_counters; ++i) { |
88 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
89 | reset_value[i] = counter_config[i].count; | 105 | reset_value[i] = counter_config[i].count; |
90 | 106 | ||
91 | CTR_32BIT_WRITE(counter_config[i].count, msrs, i); | 107 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
92 | 108 | ||
93 | CTRL_READ(low, high, msrs, i); | 109 | CTRL_READ(low, high, msrs, i); |
94 | CTRL_CLEAR(low); | 110 | CTRL_CLEAR(low); |
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
111 | unsigned int low, high; | 127 | unsigned int low, high; |
112 | int i; | 128 | int i; |
113 | 129 | ||
114 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 130 | for (i = 0 ; i < num_counters; ++i) { |
115 | if (!reset_value[i]) | 131 | if (!reset_value[i]) |
116 | continue; | 132 | continue; |
117 | CTR_READ(low, high, msrs, i); | 133 | CTR_READ(low, high, msrs, i); |
118 | if (CTR_OVERFLOWED(low)) { | 134 | if (CTR_OVERFLOWED(low)) { |
119 | oprofile_add_sample(regs, i); | 135 | oprofile_add_sample(regs, i); |
120 | CTR_32BIT_WRITE(reset_value[i], msrs, i); | 136 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
121 | } | 137 | } |
122 | } | 138 | } |
123 | 139 | ||
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
141 | unsigned int low, high; | 157 | unsigned int low, high; |
142 | int i; | 158 | int i; |
143 | 159 | ||
144 | for (i = 0; i < NUM_COUNTERS; ++i) { | 160 | for (i = 0; i < num_counters; ++i) { |
145 | if (reset_value[i]) { | 161 | if (reset_value[i]) { |
146 | CTRL_READ(low, high, msrs, i); | 162 | CTRL_READ(low, high, msrs, i); |
147 | CTRL_SET_ACTIVE(low); | 163 | CTRL_SET_ACTIVE(low); |
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
156 | unsigned int low, high; | 172 | unsigned int low, high; |
157 | int i; | 173 | int i; |
158 | 174 | ||
159 | for (i = 0; i < NUM_COUNTERS; ++i) { | 175 | for (i = 0; i < num_counters; ++i) { |
160 | if (!reset_value[i]) | 176 | if (!reset_value[i]) |
161 | continue; | 177 | continue; |
162 | CTRL_READ(low, high, msrs, i); | 178 | CTRL_READ(low, high, msrs, i); |
@@ -169,21 +185,67 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
169 | { | 185 | { |
170 | int i; | 186 | int i; |
171 | 187 | ||
172 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 188 | for (i = 0 ; i < num_counters ; ++i) { |
173 | if (CTR_IS_RESERVED(msrs, i)) | 189 | if (CTR_IS_RESERVED(msrs, i)) |
174 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
175 | } | 191 | } |
176 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 192 | for (i = 0 ; i < num_counters ; ++i) { |
177 | if (CTRL_IS_RESERVED(msrs, i)) | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
178 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
179 | } | 195 | } |
196 | if (reset_value) { | ||
197 | kfree(reset_value); | ||
198 | reset_value = NULL; | ||
199 | } | ||
180 | } | 200 | } |
181 | 201 | ||
182 | 202 | ||
183 | struct op_x86_model_spec const op_ppro_spec = { | 203 | struct op_x86_model_spec op_ppro_spec = { |
184 | .num_counters = NUM_COUNTERS, | 204 | .num_counters = 2, /* can be overriden */ |
185 | .num_controls = NUM_CONTROLS, | 205 | .num_controls = 2, /* dito */ |
206 | .fill_in_addresses = &ppro_fill_in_addresses, | ||
207 | .setup_ctrs = &ppro_setup_ctrs, | ||
208 | .check_ctrs = &ppro_check_ctrs, | ||
209 | .start = &ppro_start, | ||
210 | .stop = &ppro_stop, | ||
211 | .shutdown = &ppro_shutdown | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Architectural performance monitoring. | ||
216 | * | ||
217 | * Newer Intel CPUs (Core1+) have support for architectural | ||
218 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | ||
219 | * The advantage of this is that it can be done without knowing about | ||
220 | * the specific CPU. | ||
221 | */ | ||
222 | |||
223 | void arch_perfmon_setup_counters(void) | ||
224 | { | ||
225 | union cpuid10_eax eax; | ||
226 | |||
227 | eax.full = cpuid_eax(0xa); | ||
228 | |||
229 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | ||
230 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | ||
231 | current_cpu_data.x86_model == 15) { | ||
232 | eax.split.version_id = 2; | ||
233 | eax.split.num_counters = 2; | ||
234 | eax.split.bit_width = 40; | ||
235 | } | ||
236 | |||
237 | num_counters = eax.split.num_counters; | ||
238 | |||
239 | op_arch_perfmon_spec.num_counters = num_counters; | ||
240 | op_arch_perfmon_spec.num_controls = num_counters; | ||
241 | op_ppro_spec.num_counters = num_counters; | ||
242 | op_ppro_spec.num_controls = num_counters; | ||
243 | } | ||
244 | |||
245 | struct op_x86_model_spec op_arch_perfmon_spec = { | ||
246 | /* num_counters/num_controls filled in at runtime */ | ||
186 | .fill_in_addresses = &ppro_fill_in_addresses, | 247 | .fill_in_addresses = &ppro_fill_in_addresses, |
248 | /* user space does the cpuid check for available events */ | ||
187 | .setup_ctrs = &ppro_setup_ctrs, | 249 | .setup_ctrs = &ppro_setup_ctrs, |
188 | .check_ctrs = &ppro_check_ctrs, | 250 | .check_ctrs = &ppro_check_ctrs, |
189 | .start = &ppro_start, | 251 | .start = &ppro_start, |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 24ccdebf3ac1..825e79064d64 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -34,8 +34,8 @@ struct pt_regs; | |||
34 | struct op_x86_model_spec { | 34 | struct op_x86_model_spec { |
35 | int (*init)(struct oprofile_operations *ops); | 35 | int (*init)(struct oprofile_operations *ops); |
36 | void (*exit)(void); | 36 | void (*exit)(void); |
37 | unsigned int const num_counters; | 37 | unsigned int num_counters; |
38 | unsigned int const num_controls; | 38 | unsigned int num_controls; |
39 | void (*fill_in_addresses)(struct op_msrs * const msrs); | 39 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
40 | void (*setup_ctrs)(struct op_msrs const * const msrs); | 40 | void (*setup_ctrs)(struct op_msrs const * const msrs); |
41 | int (*check_ctrs)(struct pt_regs * const regs, | 41 | int (*check_ctrs)(struct pt_regs * const regs, |
@@ -45,9 +45,12 @@ struct op_x86_model_spec { | |||
45 | void (*shutdown)(struct op_msrs const * const msrs); | 45 | void (*shutdown)(struct op_msrs const * const msrs); |
46 | }; | 46 | }; |
47 | 47 | ||
48 | extern struct op_x86_model_spec const op_ppro_spec; | 48 | extern struct op_x86_model_spec op_ppro_spec; |
49 | extern struct op_x86_model_spec const op_p4_spec; | 49 | extern struct op_x86_model_spec const op_p4_spec; |
50 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; |
51 | extern struct op_x86_model_spec const op_amd_spec; | 51 | extern struct op_x86_model_spec const op_amd_spec; |
52 | extern struct op_x86_model_spec op_arch_perfmon_spec; | ||
53 | |||
54 | extern void arch_perfmon_setup_counters(void); | ||
52 | 55 | ||
53 | #endif /* OP_X86_MODEL_H */ | 56 | #endif /* OP_X86_MODEL_H */ |