aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/oprofile
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/oprofile')
-rw-r--r--arch/x86/oprofile/nmi_int.c23
-rw-r--r--arch/x86/oprofile/op_model_ppro.c104
-rw-r--r--arch/x86/oprofile/op_x86_model.h3
3 files changed, 102 insertions, 28 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 1059f3fe6b1d..12d6f85084f1 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -429,6 +429,16 @@ static int __init ppro_init(char **cpu_type)
429 return 1; 429 return 1;
430} 430}
431 431
432static int __init arch_perfmon_init(char **cpu_type)
433{
434 if (!cpu_has_arch_perfmon)
435 return 0;
436 *cpu_type = "i386/arch_perfmon";
437 model = &op_arch_perfmon_spec;
438 arch_perfmon_setup_counters();
439 return 1;
440}
441
432/* in order to get sysfs right */ 442/* in order to get sysfs right */
433static int using_nmi; 443static int using_nmi;
434 444
@@ -436,7 +446,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
436{ 446{
437 __u8 vendor = boot_cpu_data.x86_vendor; 447 __u8 vendor = boot_cpu_data.x86_vendor;
438 __u8 family = boot_cpu_data.x86; 448 __u8 family = boot_cpu_data.x86;
439 char *cpu_type; 449 char *cpu_type = NULL;
440 int ret = 0; 450 int ret = 0;
441 451
442 if (!cpu_has_apic) 452 if (!cpu_has_apic)
@@ -474,19 +484,20 @@ int __init op_nmi_init(struct oprofile_operations *ops)
474 switch (family) { 484 switch (family) {
475 /* Pentium IV */ 485 /* Pentium IV */
476 case 0xf: 486 case 0xf:
477 if (!p4_init(&cpu_type)) 487 p4_init(&cpu_type);
478 return -ENODEV;
479 break; 488 break;
480 489
481 /* A P6-class processor */ 490 /* A P6-class processor */
482 case 6: 491 case 6:
483 if (!ppro_init(&cpu_type)) 492 ppro_init(&cpu_type);
484 return -ENODEV;
485 break; 493 break;
486 494
487 default: 495 default:
488 return -ENODEV; 496 break;
489 } 497 }
498
499 if (!cpu_type && !arch_perfmon_init(&cpu_type))
500 return -ENODEV;
490 break; 501 break;
491 502
492 default: 503 default:
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index eff431f6c57b..12e207a67f1b 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -1,32 +1,34 @@
1/* 1/*
2 * @file op_model_ppro.h 2 * @file op_model_ppro.h
3 * pentium pro / P6 model-specific MSR operations 3 * Family 6 perfmon and architectural perfmon MSR operations
4 * 4 *
5 * @remark Copyright 2002 OProfile authors 5 * @remark Copyright 2002 OProfile authors
6 * @remark Copyright 2008 Intel Corporation
6 * @remark Read the file COPYING 7 * @remark Read the file COPYING
7 * 8 *
8 * @author John Levon 9 * @author John Levon
9 * @author Philippe Elie 10 * @author Philippe Elie
10 * @author Graydon Hoare 11 * @author Graydon Hoare
12 * @author Andi Kleen
11 */ 13 */
12 14
13#include <linux/oprofile.h> 15#include <linux/oprofile.h>
16#include <linux/slab.h>
14#include <asm/ptrace.h> 17#include <asm/ptrace.h>
15#include <asm/msr.h> 18#include <asm/msr.h>
16#include <asm/apic.h> 19#include <asm/apic.h>
17#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h>
18 22
19#include "op_x86_model.h" 23#include "op_x86_model.h"
20#include "op_counter.h" 24#include "op_counter.h"
21 25
22#define NUM_COUNTERS 2 26static int num_counters = 2;
23#define NUM_CONTROLS 2 27static int counter_width = 32;
24 28
25#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) 29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) 30#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
27#define CTR_32BIT_WRITE(l, msrs, c) \ 31#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
30 32
31#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) 33#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
32#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) 34#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -40,20 +42,20 @@
40#define CTRL_SET_UM(val, m) (val |= (m << 8)) 42#define CTRL_SET_UM(val, m) (val |= (m << 8))
41#define CTRL_SET_EVENT(val, e) (val |= e) 43#define CTRL_SET_EVENT(val, e) (val |= e)
42 44
43static unsigned long reset_value[NUM_COUNTERS]; 45static u64 *reset_value;
44 46
45static void ppro_fill_in_addresses(struct op_msrs * const msrs) 47static void ppro_fill_in_addresses(struct op_msrs * const msrs)
46{ 48{
47 int i; 49 int i;
48 50
49 for (i = 0; i < NUM_COUNTERS; i++) { 51 for (i = 0; i < num_counters; i++) {
50 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 52 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
51 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 53 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
52 else 54 else
53 msrs->counters[i].addr = 0; 55 msrs->counters[i].addr = 0;
54 } 56 }
55 57
56 for (i = 0; i < NUM_CONTROLS; i++) { 58 for (i = 0; i < num_counters; i++) {
57 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 59 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
58 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 60 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
59 else 61 else
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
67 unsigned int low, high; 69 unsigned int low, high;
68 int i; 70 int i;
69 71
72 if (!reset_value) {
73 reset_value = kmalloc(sizeof(unsigned) * num_counters,
74 GFP_ATOMIC);
75 if (!reset_value)
76 return;
77 }
78
79 if (cpu_has_arch_perfmon) {
80 union cpuid10_eax eax;
81 eax.full = cpuid_eax(0xa);
82 if (counter_width < eax.split.bit_width)
83 counter_width = eax.split.bit_width;
84 }
85
70 /* clear all counters */ 86 /* clear all counters */
71 for (i = 0 ; i < NUM_CONTROLS; ++i) { 87 for (i = 0 ; i < num_counters; ++i) {
72 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 88 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
73 continue; 89 continue;
74 CTRL_READ(low, high, msrs, i); 90 CTRL_READ(low, high, msrs, i);
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
77 } 93 }
78 94
79 /* avoid a false detection of ctr overflows in NMI handler */ 95 /* avoid a false detection of ctr overflows in NMI handler */
80 for (i = 0; i < NUM_COUNTERS; ++i) { 96 for (i = 0; i < num_counters; ++i) {
81 if (unlikely(!CTR_IS_RESERVED(msrs, i))) 97 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
82 continue; 98 continue;
83 CTR_32BIT_WRITE(1, msrs, i); 99 wrmsrl(msrs->counters[i].addr, -1LL);
84 } 100 }
85 101
86 /* enable active counters */ 102 /* enable active counters */
87 for (i = 0; i < NUM_COUNTERS; ++i) { 103 for (i = 0; i < num_counters; ++i) {
88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { 104 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
89 reset_value[i] = counter_config[i].count; 105 reset_value[i] = counter_config[i].count;
90 106
91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i); 107 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
92 108
93 CTRL_READ(low, high, msrs, i); 109 CTRL_READ(low, high, msrs, i);
94 CTRL_CLEAR(low); 110 CTRL_CLEAR(low);
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
111 unsigned int low, high; 127 unsigned int low, high;
112 int i; 128 int i;
113 129
114 for (i = 0 ; i < NUM_COUNTERS; ++i) { 130 for (i = 0 ; i < num_counters; ++i) {
115 if (!reset_value[i]) 131 if (!reset_value[i])
116 continue; 132 continue;
117 CTR_READ(low, high, msrs, i); 133 CTR_READ(low, high, msrs, i);
118 if (CTR_OVERFLOWED(low)) { 134 if (CTR_OVERFLOWED(low)) {
119 oprofile_add_sample(regs, i); 135 oprofile_add_sample(regs, i);
120 CTR_32BIT_WRITE(reset_value[i], msrs, i); 136 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
121 } 137 }
122 } 138 }
123 139
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs)
141 unsigned int low, high; 157 unsigned int low, high;
142 int i; 158 int i;
143 159
144 for (i = 0; i < NUM_COUNTERS; ++i) { 160 for (i = 0; i < num_counters; ++i) {
145 if (reset_value[i]) { 161 if (reset_value[i]) {
146 CTRL_READ(low, high, msrs, i); 162 CTRL_READ(low, high, msrs, i);
147 CTRL_SET_ACTIVE(low); 163 CTRL_SET_ACTIVE(low);
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
156 unsigned int low, high; 172 unsigned int low, high;
157 int i; 173 int i;
158 174
159 for (i = 0; i < NUM_COUNTERS; ++i) { 175 for (i = 0; i < num_counters; ++i) {
160 if (!reset_value[i]) 176 if (!reset_value[i])
161 continue; 177 continue;
162 CTRL_READ(low, high, msrs, i); 178 CTRL_READ(low, high, msrs, i);
@@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
169{ 185{
170 int i; 186 int i;
171 187
172 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 188 for (i = 0 ; i < num_counters ; ++i) {
173 if (CTR_IS_RESERVED(msrs, i)) 189 if (CTR_IS_RESERVED(msrs, i))
174 release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 190 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
175 } 191 }
176 for (i = 0 ; i < NUM_CONTROLS ; ++i) { 192 for (i = 0 ; i < num_counters ; ++i) {
177 if (CTRL_IS_RESERVED(msrs, i)) 193 if (CTRL_IS_RESERVED(msrs, i))
178 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); 194 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
179 } 195 }
196 if (reset_value) {
197 kfree(reset_value);
198 reset_value = NULL;
199 }
180} 200}
181 201
182 202
183struct op_x86_model_spec const op_ppro_spec = { 203struct op_x86_model_spec const op_ppro_spec = {
184 .num_counters = NUM_COUNTERS, 204 .num_counters = 2,
185 .num_controls = NUM_CONTROLS, 205 .num_controls = 2,
206 .fill_in_addresses = &ppro_fill_in_addresses,
207 .setup_ctrs = &ppro_setup_ctrs,
208 .check_ctrs = &ppro_check_ctrs,
209 .start = &ppro_start,
210 .stop = &ppro_stop,
211 .shutdown = &ppro_shutdown
212};
213
214/*
215 * Architectural performance monitoring.
216 *
217 * Newer Intel CPUs (Core1+) have support for architectural
218 * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
219 * The advantage of this is that it can be done without knowing about
220 * the specific CPU.
221 */
222
223void arch_perfmon_setup_counters(void)
224{
225 union cpuid10_eax eax;
226
227 eax.full = cpuid_eax(0xa);
228
229 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
230 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
231 current_cpu_data.x86_model == 15) {
232 eax.split.version_id = 2;
233 eax.split.num_counters = 2;
234 eax.split.bit_width = 40;
235 }
236
237 num_counters = eax.split.num_counters;
238
239 op_arch_perfmon_spec.num_counters = num_counters;
240 op_arch_perfmon_spec.num_controls = num_counters;
241}
242
243struct op_x86_model_spec op_arch_perfmon_spec = {
244 /* num_counters/num_controls filled in at runtime */
186 .fill_in_addresses = &ppro_fill_in_addresses, 245 .fill_in_addresses = &ppro_fill_in_addresses,
246 /* user space does the cpuid check for available events */
187 .setup_ctrs = &ppro_setup_ctrs, 247 .setup_ctrs = &ppro_setup_ctrs,
188 .check_ctrs = &ppro_check_ctrs, 248 .check_ctrs = &ppro_check_ctrs,
189 .start = &ppro_start, 249 .start = &ppro_start,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 3d3b85d3c257..0b601893a4df 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -49,5 +49,8 @@ extern struct op_x86_model_spec const op_ppro_spec;
49extern struct op_x86_model_spec const op_p4_spec; 49extern struct op_x86_model_spec const op_p4_spec;
50extern struct op_x86_model_spec const op_p4_ht2_spec; 50extern struct op_x86_model_spec const op_p4_ht2_spec;
51extern struct op_x86_model_spec const op_amd_spec; 51extern struct op_x86_model_spec const op_amd_spec;
52extern struct op_x86_model_spec op_arch_perfmon_spec;
53
54extern void arch_perfmon_setup_counters(void);
52 55
53#endif /* OP_X86_MODEL_H */ 56#endif /* OP_X86_MODEL_H */