aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/oprofile/op_model_ppro.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2008-08-18 08:50:31 -0400
committerRobert Richter <robert.richter@amd.com>2008-10-13 13:25:09 -0400
commitb99170288421c79f0c2efa8b33e26e65f4bb7fb8 (patch)
tree7580ce405a4dbbd2c57fbc9a129102481467e281 /arch/x86/oprofile/op_model_ppro.c
parentf645f6406463a01869c50844befc76d528971690 (diff)
oprofile: Implement Intel architectural perfmon support
Newer Intel CPUs (Core1+) have support for architectural events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. The advantage of this is that it can be done without knowing about the specific CPU, because the CPU describes by itself what performance events are supported. This is only a fallback because only a limited set of 6 events are supported. This allows to do profiling on Nehalem and on Atom systems (later not tested) This patch implements support for that in oprofile's Intel Family 6 profiling module. It also has the advantage of supporting an arbitary number of events now as reported by the CPU. Also allow arbitary counter widths >32bit while we're at it. Requires a patched oprofile userland to support the new architecture. v2: update for latest oprofile tree remove force_arch_perfmon Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/x86/oprofile/op_model_ppro.c')
-rw-r--r--arch/x86/oprofile/op_model_ppro.c104
1 files changed, 82 insertions, 22 deletions
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index eff431f6c57..12e207a67f1 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -1,32 +1,34 @@
1/* 1/*
2 * @file op_model_ppro.h 2 * @file op_model_ppro.h
3 * pentium pro / P6 model-specific MSR operations 3 * Family 6 perfmon and architectural perfmon MSR operations
4 * 4 *
5 * @remark Copyright 2002 OProfile authors 5 * @remark Copyright 2002 OProfile authors
6 * @remark Copyright 2008 Intel Corporation
6 * @remark Read the file COPYING 7 * @remark Read the file COPYING
7 * 8 *
8 * @author John Levon 9 * @author John Levon
9 * @author Philippe Elie 10 * @author Philippe Elie
10 * @author Graydon Hoare 11 * @author Graydon Hoare
12 * @author Andi Kleen
11 */ 13 */
12 14
13#include <linux/oprofile.h> 15#include <linux/oprofile.h>
16#include <linux/slab.h>
14#include <asm/ptrace.h> 17#include <asm/ptrace.h>
15#include <asm/msr.h> 18#include <asm/msr.h>
16#include <asm/apic.h> 19#include <asm/apic.h>
17#include <asm/nmi.h> 20#include <asm/nmi.h>
21#include <asm/intel_arch_perfmon.h>
18 22
19#include "op_x86_model.h" 23#include "op_x86_model.h"
20#include "op_counter.h" 24#include "op_counter.h"
21 25
22#define NUM_COUNTERS 2 26static int num_counters = 2;
23#define NUM_CONTROLS 2 27static int counter_width = 32;
24 28
25#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) 29#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) 30#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
27#define CTR_32BIT_WRITE(l, msrs, c) \ 31#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
30 32
31#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) 33#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
32#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) 34#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -40,20 +42,20 @@
40#define CTRL_SET_UM(val, m) (val |= (m << 8)) 42#define CTRL_SET_UM(val, m) (val |= (m << 8))
41#define CTRL_SET_EVENT(val, e) (val |= e) 43#define CTRL_SET_EVENT(val, e) (val |= e)
42 44
43static unsigned long reset_value[NUM_COUNTERS]; 45static u64 *reset_value;
44 46
45static void ppro_fill_in_addresses(struct op_msrs * const msrs) 47static void ppro_fill_in_addresses(struct op_msrs * const msrs)
46{ 48{
47 int i; 49 int i;
48 50
49 for (i = 0; i < NUM_COUNTERS; i++) { 51 for (i = 0; i < num_counters; i++) {
50 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 52 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
51 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 53 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
52 else 54 else
53 msrs->counters[i].addr = 0; 55 msrs->counters[i].addr = 0;
54 } 56 }
55 57
56 for (i = 0; i < NUM_CONTROLS; i++) { 58 for (i = 0; i < num_counters; i++) {
57 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 59 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
58 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 60 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
59 else 61 else
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
67 unsigned int low, high; 69 unsigned int low, high;
68 int i; 70 int i;
69 71
72 if (!reset_value) {
73 reset_value = kmalloc(sizeof(unsigned) * num_counters,
74 GFP_ATOMIC);
75 if (!reset_value)
76 return;
77 }
78
79 if (cpu_has_arch_perfmon) {
80 union cpuid10_eax eax;
81 eax.full = cpuid_eax(0xa);
82 if (counter_width < eax.split.bit_width)
83 counter_width = eax.split.bit_width;
84 }
85
70 /* clear all counters */ 86 /* clear all counters */
71 for (i = 0 ; i < NUM_CONTROLS; ++i) { 87 for (i = 0 ; i < num_counters; ++i) {
72 if (unlikely(!CTRL_IS_RESERVED(msrs, i))) 88 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
73 continue; 89 continue;
74 CTRL_READ(low, high, msrs, i); 90 CTRL_READ(low, high, msrs, i);
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
77 } 93 }
78 94
79 /* avoid a false detection of ctr overflows in NMI handler */ 95 /* avoid a false detection of ctr overflows in NMI handler */
80 for (i = 0; i < NUM_COUNTERS; ++i) { 96 for (i = 0; i < num_counters; ++i) {
81 if (unlikely(!CTR_IS_RESERVED(msrs, i))) 97 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
82 continue; 98 continue;
83 CTR_32BIT_WRITE(1, msrs, i); 99 wrmsrl(msrs->counters[i].addr, -1LL);
84 } 100 }
85 101
86 /* enable active counters */ 102 /* enable active counters */
87 for (i = 0; i < NUM_COUNTERS; ++i) { 103 for (i = 0; i < num_counters; ++i) {
88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { 104 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
89 reset_value[i] = counter_config[i].count; 105 reset_value[i] = counter_config[i].count;
90 106
91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i); 107 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
92 108
93 CTRL_READ(low, high, msrs, i); 109 CTRL_READ(low, high, msrs, i);
94 CTRL_CLEAR(low); 110 CTRL_CLEAR(low);
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
111 unsigned int low, high; 127 unsigned int low, high;
112 int i; 128 int i;
113 129
114 for (i = 0 ; i < NUM_COUNTERS; ++i) { 130 for (i = 0 ; i < num_counters; ++i) {
115 if (!reset_value[i]) 131 if (!reset_value[i])
116 continue; 132 continue;
117 CTR_READ(low, high, msrs, i); 133 CTR_READ(low, high, msrs, i);
118 if (CTR_OVERFLOWED(low)) { 134 if (CTR_OVERFLOWED(low)) {
119 oprofile_add_sample(regs, i); 135 oprofile_add_sample(regs, i);
120 CTR_32BIT_WRITE(reset_value[i], msrs, i); 136 wrmsrl(msrs->counters[i].addr, -reset_value[i]);
121 } 137 }
122 } 138 }
123 139
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs)
141 unsigned int low, high; 157 unsigned int low, high;
142 int i; 158 int i;
143 159
144 for (i = 0; i < NUM_COUNTERS; ++i) { 160 for (i = 0; i < num_counters; ++i) {
145 if (reset_value[i]) { 161 if (reset_value[i]) {
146 CTRL_READ(low, high, msrs, i); 162 CTRL_READ(low, high, msrs, i);
147 CTRL_SET_ACTIVE(low); 163 CTRL_SET_ACTIVE(low);
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
156 unsigned int low, high; 172 unsigned int low, high;
157 int i; 173 int i;
158 174
159 for (i = 0; i < NUM_COUNTERS; ++i) { 175 for (i = 0; i < num_counters; ++i) {
160 if (!reset_value[i]) 176 if (!reset_value[i])
161 continue; 177 continue;
162 CTRL_READ(low, high, msrs, i); 178 CTRL_READ(low, high, msrs, i);
@@ -169,21 +185,65 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
169{ 185{
170 int i; 186 int i;
171 187
172 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 188 for (i = 0 ; i < num_counters ; ++i) {
173 if (CTR_IS_RESERVED(msrs, i)) 189 if (CTR_IS_RESERVED(msrs, i))
174 release_perfctr_nmi(MSR_P6_PERFCTR0 + i); 190 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
175 } 191 }
176 for (i = 0 ; i < NUM_CONTROLS ; ++i) { 192 for (i = 0 ; i < num_counters ; ++i) {
177 if (CTRL_IS_RESERVED(msrs, i)) 193 if (CTRL_IS_RESERVED(msrs, i))
178 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); 194 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
179 } 195 }
196 if (reset_value) {
197 kfree(reset_value);
198 reset_value = NULL;
199 }
180} 200}
181 201
182 202
183struct op_x86_model_spec const op_ppro_spec = { 203struct op_x86_model_spec const op_ppro_spec = {
184 .num_counters = NUM_COUNTERS, 204 .num_counters = 2,
185 .num_controls = NUM_CONTROLS, 205 .num_controls = 2,
206 .fill_in_addresses = &ppro_fill_in_addresses,
207 .setup_ctrs = &ppro_setup_ctrs,
208 .check_ctrs = &ppro_check_ctrs,
209 .start = &ppro_start,
210 .stop = &ppro_stop,
211 .shutdown = &ppro_shutdown
212};
213
214/*
215 * Architectural performance monitoring.
216 *
217 * Newer Intel CPUs (Core1+) have support for architectural
218 * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
219 * The advantage of this is that it can be done without knowing about
220 * the specific CPU.
221 */
222
223void arch_perfmon_setup_counters(void)
224{
225 union cpuid10_eax eax;
226
227 eax.full = cpuid_eax(0xa);
228
229 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
230 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
231 current_cpu_data.x86_model == 15) {
232 eax.split.version_id = 2;
233 eax.split.num_counters = 2;
234 eax.split.bit_width = 40;
235 }
236
237 num_counters = eax.split.num_counters;
238
239 op_arch_perfmon_spec.num_counters = num_counters;
240 op_arch_perfmon_spec.num_controls = num_counters;
241}
242
243struct op_x86_model_spec op_arch_perfmon_spec = {
244 /* num_counters/num_controls filled in at runtime */
186 .fill_in_addresses = &ppro_fill_in_addresses, 245 .fill_in_addresses = &ppro_fill_in_addresses,
246 /* user space does the cpuid check for available events */
187 .setup_ctrs = &ppro_setup_ctrs, 247 .setup_ctrs = &ppro_setup_ctrs,
188 .check_ctrs = &ppro_check_ctrs, 248 .check_ctrs = &ppro_check_ctrs,
189 .start = &ppro_start, 249 .start = &ppro_start,