aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/oprofile
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2008-10-15 16:19:41 -0400
committerRobert Richter <robert.richter@amd.com>2008-10-15 16:19:41 -0400
commit5a289395bf753f8a318d3a5fa335a757c16c0183 (patch)
tree3573b8bc4bbd135616bb395d998ef1526fff596f /arch/x86/oprofile
parent5f87dfb79f829339508a5d989b8252eb30842587 (diff)
parent59512900baab03c5629f2ff5efad1d5d4e682ece (diff)
Merge branch 'oprofile/x86-oprofile-for-tip' into oprofile/oprofile-for-tip
Conflicts: arch/x86/oprofile/op_model_ppro.c
Diffstat (limited to 'arch/x86/oprofile')
-rw-r--r--arch/x86/oprofile/nmi_int.c26
-rw-r--r--arch/x86/oprofile/op_model_ppro.c108
-rw-r--r--arch/x86/oprofile/op_x86_model.h9
3 files changed, 108 insertions, 35 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 370d832f398d..022cd41ea9b4 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -415,9 +415,6 @@ static int __init ppro_init(char **cpu_type)
415 case 15: case 23: 415 case 15: case 23:
416 *cpu_type = "i386/core_2"; 416 *cpu_type = "i386/core_2";
417 break; 417 break;
418 case 26:
419 *cpu_type = "i386/core_2";
420 break;
421 default: 418 default:
422 /* Unknown */ 419 /* Unknown */
423 return 0; 420 return 0;
@@ -427,6 +424,16 @@ static int __init ppro_init(char **cpu_type)
427 return 1; 424 return 1;
428} 425}
429 426
427static int __init arch_perfmon_init(char **cpu_type)
428{
429 if (!cpu_has_arch_perfmon)
430 return 0;
431 *cpu_type = "i386/arch_perfmon";
432 model = &op_arch_perfmon_spec;
433 arch_perfmon_setup_counters();
434 return 1;
435}
436
430/* in order to get sysfs right */ 437/* in order to get sysfs right */
431static int using_nmi; 438static int using_nmi;
432 439
@@ -434,7 +441,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
434{ 441{
435 __u8 vendor = boot_cpu_data.x86_vendor; 442 __u8 vendor = boot_cpu_data.x86_vendor;
436 __u8 family = boot_cpu_data.x86; 443 __u8 family = boot_cpu_data.x86;
437 char *cpu_type; 444 char *cpu_type = NULL;
438 int ret = 0; 445 int ret = 0;
439 446
440 if (!cpu_has_apic) 447 if (!cpu_has_apic)
@@ -472,19 +479,20 @@ int __init op_nmi_init(struct oprofile_operations *ops)
472 switch (family) { 479 switch (family) {
473 /* Pentium IV */ 480 /* Pentium IV */
474 case 0xf: 481 case 0xf:
475 if (!p4_init(&cpu_type)) 482 p4_init(&cpu_type);
476 return -ENODEV;
477 break; 483 break;
478 484
479 /* A P6-class processor */ 485 /* A P6-class processor */
480 case 6: 486 case 6:
481 if (!ppro_init(&cpu_type)) 487 ppro_init(&cpu_type);
482 return -ENODEV;
483 break; 488 break;
484 489
485 default: 490 default:
486 return -ENODEV; 491 break;
487 } 492 }
493
494 if (!cpu_type && !arch_perfmon_init(&cpu_type))
495 return -ENODEV;
488 break; 496 break;
489 497
490 default: 498 default:
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index c665bac4a143..0620d6d45f7d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -1,32 +1,34 @@
1/* 1/*
2 * @file op_model_ppro.h 2 * @file op_model_ppro.h
3 * pentium pro / P6 model-specific MSR operations 3 * Family 6 perfmon and architectural perfmon MSR operations
4 * 4 *
5 * @remark Copyright 2002 OProfile authors 5 * @remark Copyright 2002 OProfile authors
6 * @remark Copyright 2008 Intel Corporation
6 * @remark Read the file COPYING 7 * @remark Read the file COPYING
7 * 8 *
8 * @author John Levon 9 * @author John Levon
9 * @author Philippe Elie 10 * @author Philippe Elie
10 * @author Graydon Hoare 11 * @author Graydon Hoare
12 * @author Andi Kleen
11 */ 13 */
12 14
13#include <linux/oprofile.h> 15#include <linux/oprofile.h>
16#include <linux/slab.h>
14#include <asm/ptrace.h> 17#include <asm/ptrace.h>
15#include <asm/msr.h> 18#include <asm/msr.h>
16#include <asm/apic.h> 19#include <asm/apic.h>
17#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h>
18 22
19#include "op_x86_model.h" 23#include "op_x86_model.h"
20#include "op_counter.h" 24#include "op_counter.h"
21 25
22#define NUM_COUNTERS 2 26static int num_counters = 2;
23#define NUM_CONTROLS 2 27static int counter_width = 32;
24 28
25#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) 29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) 30#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
27#define CTR_32BIT_WRITE(l, msrs, c) \ 31#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
30 32
31#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) 33#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
32#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) 34#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -40,20 +42,20 @@
40#define CTRL_SET_UM(val, m) (val |= (m << 8)) 42#define CTRL_SET_UM(val, m) (val |= (m << 8))
41#define CTRL_SET_EVENT(val, e) (val |= e) 43#define CTRL_SET_EVENT(val, e) (val |= e)
42 44
43static unsigned long reset_value[NUM_COUNTERS]; 45static u64 *reset_value;
44 46
45static void ppro_fill_in_addresses(struct op_msrs * const msrs) 47static void ppro_fill_in_addresses(struct op_msrs * const msrs)
46{ 48{
47 int i; 49 int i;
48 50
49 for (i = 0; i < NUM_COUNTERS; i++) { 51 for (i = 0; i < num_counters; i++) {
50 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 52 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
51 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 53 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
52 else 54 else
53 msrs->counters[i].addr = 0; 55 msrs->counters[i].addr = 0;
54 } 56 }
55 57
56 for (i = 0; i < NUM_CONTROLS; i++) { 58 for (i = 0; i < num_counters; i++) {
57 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 59 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
58 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 60 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
59 else 61 else
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
67 unsigned int low, high; 69 unsigned int low, high;
68 int i; 70 int i;
69 71
72 if (!reset_value) {
73 reset_value = kmalloc(sizeof(unsigned) * num_counters,
74 GFP_ATOMIC);
75 if (!reset_value)
76 return;
77 }
78
79 if (cpu_has_arch_perfmon) {
80 union cpuid10_eax eax;
81 eax.full = cpuid_eax(0xa);
82 if (counter_width < eax.split.bit_width)
83 counter_width = eax.split.bit_width;
84 }
85
70 /* clear all counters */ 86 /* clear all counters */
71 for (i = 0 ; i < NUM_CONTROLS; ++i) { 87 for (i = 0 ; i < num_counters; ++i) {
72 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 88 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
73 continue; 89 continue;
74 CTRL_READ(low, high, msrs, i); 90 CTRL_READ(low, high, msrs, i);
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
77 } 93 }
78 94
79 /* avoid a false detection of ctr overflows in NMI handler */ 95 /* avoid a false detection of ctr overflows in NMI handler */
80 for (i = 0; i < NUM_COUNTERS; ++i) { 96 for (i = 0; i < num_counters; ++i) {
81 if (unlikely(!CTR_IS_RESERVED(msrs, i))) 97 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
82 continue; 98 continue;
83 CTR_32BIT_WRITE(1, msrs, i); 99 wrmsrl(msrs->counters[i].addr, -1LL);
84 } 100 }
85 101
86 /* enable active counters */ 102 /* enable active counters */
87 for (i = 0; i < NUM_COUNTERS; ++i) { 103 for (i = 0; i < num_counters; ++i) {
88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { 104 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
89 reset_value[i] = counter_config[i].count; 105 reset_value[i] = counter_config[i].count;
90 106
91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i); 107 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
92 108
93 CTRL_READ(low, high, msrs, i); 109 CTRL_READ(low, high, msrs, i);
94 CTRL_CLEAR(low); 110 CTRL_CLEAR(low);
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
111 unsigned int low, high; 127 unsigned int low, high;
112 int i; 128 int i;
113 129
114 for (i = 0 ; i < NUM_COUNTERS; ++i) { 130 for (i = 0 ; i < num_counters; ++i) {
115 if (!reset_value[i]) 131 if (!reset_value[i])
116 continue; 132 continue;
117 CTR_READ(low, high, msrs, i); 133 CTR_READ(low, high, msrs, i);
118 if (CTR_OVERFLOWED(low)) { 134 if (CTR_OVERFLOWED(low)) {
119 oprofile_add_sample(regs, i); 135 oprofile_add_sample(regs, i);
120 CTR_32BIT_WRITE(reset_value[i], msrs, i); 136 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
121 } 137 }
122 } 138 }
123 139
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs)
141 unsigned int low, high; 157 unsigned int low, high;
142 int i; 158 int i;
143 159
144 for (i = 0; i < NUM_COUNTERS; ++i) { 160 for (i = 0; i < num_counters; ++i) {
145 if (reset_value[i]) { 161 if (reset_value[i]) {
146 CTRL_READ(low, high, msrs, i); 162 CTRL_READ(low, high, msrs, i);
147 CTRL_SET_ACTIVE(low); 163 CTRL_SET_ACTIVE(low);
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
156 unsigned int low, high; 172 unsigned int low, high;
157 int i; 173 int i;
158 174
159 for (i = 0; i < NUM_COUNTERS; ++i) { 175 for (i = 0; i < num_counters; ++i) {
160 if (!reset_value[i]) 176 if (!reset_value[i])
161 continue; 177 continue;
162 CTRL_READ(low, high, msrs, i); 178 CTRL_READ(low, high, msrs, i);
@@ -169,21 +185,67 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
169{ 185{
170 int i; 186 int i;
171 187
172 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 188 for (i = 0 ; i < num_counters ; ++i) {
173 if (CTR_IS_RESERVED(msrs, i)) 189 if (CTR_IS_RESERVED(msrs, i))
174 release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 190 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
175 } 191 }
176 for (i = 0 ; i < NUM_CONTROLS ; ++i) { 192 for (i = 0 ; i < num_counters ; ++i) {
177 if (CTRL_IS_RESERVED(msrs, i)) 193 if (CTRL_IS_RESERVED(msrs, i))
178 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); 194 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
179 } 195 }
196 if (reset_value) {
197 kfree(reset_value);
198 reset_value = NULL;
199 }
180} 200}
181 201
182 202
183struct op_x86_model_spec const op_ppro_spec = { 203struct op_x86_model_spec op_ppro_spec = {
184 .num_counters = NUM_COUNTERS, 204 .num_counters = 2, /* can be overriden */
185 .num_controls = NUM_CONTROLS, 205 .num_controls = 2, /* dito */
206 .fill_in_addresses = &ppro_fill_in_addresses,
207 .setup_ctrs = &ppro_setup_ctrs,
208 .check_ctrs = &ppro_check_ctrs,
209 .start = &ppro_start,
210 .stop = &ppro_stop,
211 .shutdown = &ppro_shutdown
212};
213
214/*
215 * Architectural performance monitoring.
216 *
217 * Newer Intel CPUs (Core1+) have support for architectural
218 * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
219 * The advantage of this is that it can be done without knowing about
220 * the specific CPU.
221 */
222
223void arch_perfmon_setup_counters(void)
224{
225 union cpuid10_eax eax;
226
227 eax.full = cpuid_eax(0xa);
228
229 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
230 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
231 current_cpu_data.x86_model == 15) {
232 eax.split.version_id = 2;
233 eax.split.num_counters = 2;
234 eax.split.bit_width = 40;
235 }
236
237 num_counters = eax.split.num_counters;
238
239 op_arch_perfmon_spec.num_counters = num_counters;
240 op_arch_perfmon_spec.num_controls = num_counters;
241 op_ppro_spec.num_counters = num_counters;
242 op_ppro_spec.num_controls = num_counters;
243}
244
245struct op_x86_model_spec op_arch_perfmon_spec = {
246 /* num_counters/num_controls filled in at runtime */
186 .fill_in_addresses = &ppro_fill_in_addresses, 247 .fill_in_addresses = &ppro_fill_in_addresses,
248 /* user space does the cpuid check for available events */
187 .setup_ctrs = &ppro_setup_ctrs, 249 .setup_ctrs = &ppro_setup_ctrs,
188 .check_ctrs = &ppro_check_ctrs, 250 .check_ctrs = &ppro_check_ctrs,
189 .start = &ppro_start, 251 .start = &ppro_start,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 24ccdebf3ac1..825e79064d64 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -34,8 +34,8 @@ struct pt_regs;
34struct op_x86_model_spec { 34struct op_x86_model_spec {
35 int (*init)(struct oprofile_operations *ops); 35 int (*init)(struct oprofile_operations *ops);
36 void (*exit)(void); 36 void (*exit)(void);
37 unsigned int const num_counters; 37 unsigned int num_counters;
38 unsigned int const num_controls; 38 unsigned int num_controls;
39 void (*fill_in_addresses)(struct op_msrs * const msrs); 39 void (*fill_in_addresses)(struct op_msrs * const msrs);
40 void (*setup_ctrs)(struct op_msrs const * const msrs); 40 void (*setup_ctrs)(struct op_msrs const * const msrs);
41 int (*check_ctrs)(struct pt_regs * const regs, 41 int (*check_ctrs)(struct pt_regs * const regs,
@@ -45,9 +45,12 @@ struct op_x86_model_spec {
45 void (*shutdown)(struct op_msrs const * const msrs); 45 void (*shutdown)(struct op_msrs const * const msrs);
46}; 46};
47 47
48extern struct op_x86_model_spec const op_ppro_spec; 48extern struct op_x86_model_spec op_ppro_spec;
49extern struct op_x86_model_spec const op_p4_spec; 49extern struct op_x86_model_spec const op_p4_spec;
50extern struct op_x86_model_spec const op_p4_ht2_spec; 50extern struct op_x86_model_spec const op_p4_ht2_spec;
51extern struct op_x86_model_spec const op_amd_spec; 51extern struct op_x86_model_spec const op_amd_spec;
52extern struct op_x86_model_spec op_arch_perfmon_spec;
53
54extern void arch_perfmon_setup_counters(void);
52 55
53#endif /* OP_X86_MODEL_H */ 56#endif /* OP_X86_MODEL_H */