aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 05:16:55 -0400
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 05:16:55 -0400
commitff4395654dc6a3a5e35611940047114d4f3d0a7a (patch)
tree1f17f5160046496c29afeb2872153dcbb939b8a0 /arch/x86
parent9402e12b8fef1efe9cf949fc020dcda22d9d8667 (diff)
i386: move oprofile
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/oprofile/Kconfig17
-rw-r--r--arch/x86/oprofile/Makefile12
-rw-r--r--arch/x86/oprofile/backtrace.c127
-rw-r--r--arch/x86/oprofile/init.c48
-rw-r--r--arch/x86/oprofile/nmi_int.c477
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c69
-rw-r--r--arch/x86/oprofile/op_counter.h29
-rw-r--r--arch/x86/oprofile/op_model_athlon.c180
-rw-r--r--arch/x86/oprofile/op_model_p4.c722
-rw-r--r--arch/x86/oprofile/op_model_ppro.c192
-rw-r--r--arch/x86/oprofile/op_x86_model.h51
11 files changed, 1924 insertions, 0 deletions
diff --git a/arch/x86/oprofile/Kconfig b/arch/x86/oprofile/Kconfig
new file mode 100644
index 000000000000..d8a84088471a
--- /dev/null
+++ b/arch/x86/oprofile/Kconfig
@@ -0,0 +1,17 @@
1config PROFILING
2 bool "Profiling support (EXPERIMENTAL)"
3 help
4 Say Y here to enable the extended profiling support mechanisms used
5 by profilers such as OProfile.
6
7
8config OPROFILE
9 tristate "OProfile system profiling (EXPERIMENTAL)"
10 depends on PROFILING
11 help
12 OProfile is a profiling system capable of profiling the
13 whole system, include the kernel, kernel modules, libraries,
14 and applications.
15
16 If unsure, say N.
17
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
new file mode 100644
index 000000000000..30f3eb366667
--- /dev/null
+++ b/arch/x86/oprofile/Makefile
@@ -0,0 +1,12 @@
1obj-$(CONFIG_OPROFILE) += oprofile.o
2
3DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \
7 timer_int.o )
8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
new file mode 100644
index 000000000000..c049ce414f01
--- /dev/null
+++ b/arch/x86/oprofile/backtrace.c
@@ -0,0 +1,127 @@
1/**
2 * @file backtrace.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon
8 * @author David Smith
9 */
10
11#include <linux/oprofile.h>
12#include <linux/sched.h>
13#include <linux/mm.h>
14#include <asm/ptrace.h>
15#include <asm/uaccess.h>
16
17struct frame_head {
18 struct frame_head * ebp;
19 unsigned long ret;
20} __attribute__((packed));
21
22static struct frame_head *
23dump_kernel_backtrace(struct frame_head * head)
24{
25 oprofile_add_trace(head->ret);
26
27 /* frame pointers should strictly progress back up the stack
28 * (towards higher addresses) */
29 if (head >= head->ebp)
30 return NULL;
31
32 return head->ebp;
33}
34
35static struct frame_head *
36dump_user_backtrace(struct frame_head * head)
37{
38 struct frame_head bufhead[2];
39
40 /* Also check accessibility of one struct frame_head beyond */
41 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
42 return NULL;
43 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
44 return NULL;
45
46 oprofile_add_trace(bufhead[0].ret);
47
48 /* frame pointers should strictly progress back up the stack
49 * (towards higher addresses) */
50 if (head >= bufhead[0].ebp)
51 return NULL;
52
53 return bufhead[0].ebp;
54}
55
56/*
57 * | | /\ Higher addresses
58 * | |
59 * --------------- stack base (address of current_thread_info)
60 * | thread info |
61 * . .
62 * | stack |
63 * --------------- saved regs->ebp value if valid (frame_head address)
64 * . .
65 * --------------- saved regs->rsp value if x86_64
66 * | |
67 * --------------- struct pt_regs * stored on stack if 32-bit
68 * | |
69 * . .
70 * | |
71 * --------------- %esp
72 * | |
73 * | | \/ Lower addresses
74 *
75 * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
76 * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
77 * exceptions use special stacks, maintained by the interrupt stack table
78 * (IST). These stacks are set up in trap_init() in
79 * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
80 * to the kernel stack; instead, it points to some location on the NMI
81 * stack. On the other hand, regs->rsp is the stack pointer saved when the
82 * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
83 * processor does not save %esp on the kernel stack when interrupts occur
84 * in the kernel mode.
85 */
86#ifdef CONFIG_FRAME_POINTER
87static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
88{
89 unsigned long headaddr = (unsigned long)head;
90#ifdef CONFIG_X86_64
91 unsigned long stack = (unsigned long)regs->rsp;
92#else
93 unsigned long stack = (unsigned long)regs;
94#endif
95 unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
96
97 return headaddr > stack && headaddr < stack_base;
98}
99#else
100/* without fp, it's just junk */
101static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
102{
103 return 0;
104}
105#endif
106
107
108void
109x86_backtrace(struct pt_regs * const regs, unsigned int depth)
110{
111 struct frame_head *head;
112
113#ifdef CONFIG_X86_64
114 head = (struct frame_head *)regs->rbp;
115#else
116 head = (struct frame_head *)regs->ebp;
117#endif
118
119 if (!user_mode_vm(regs)) {
120 while (depth-- && valid_kernel_stack(head, regs))
121 head = dump_kernel_backtrace(head);
122 return;
123 }
124
125 while (depth-- && head)
126 head = dump_user_backtrace(head);
127}
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
new file mode 100644
index 000000000000..5341d481d92f
--- /dev/null
+++ b/arch/x86/oprofile/init.c
@@ -0,0 +1,48 @@
1/**
2 * @file init.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/errno.h>
13
14/* We support CPUs that have performance counters like the Pentium Pro
15 * with the NMI mode driver.
16 */
17
18extern int op_nmi_init(struct oprofile_operations * ops);
19extern int op_nmi_timer_init(struct oprofile_operations * ops);
20extern void op_nmi_exit(void);
21extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
22
23
24int __init oprofile_arch_init(struct oprofile_operations * ops)
25{
26 int ret;
27
28 ret = -ENODEV;
29
30#ifdef CONFIG_X86_LOCAL_APIC
31 ret = op_nmi_init(ops);
32#endif
33#ifdef CONFIG_X86_IO_APIC
34 if (ret < 0)
35 ret = op_nmi_timer_init(ops);
36#endif
37 ops->backtrace = x86_backtrace;
38
39 return ret;
40}
41
42
43void oprofile_arch_exit(void)
44{
45#ifdef CONFIG_X86_LOCAL_APIC
46 op_nmi_exit();
47#endif
48}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
new file mode 100644
index 000000000000..11b7a51566a8
--- /dev/null
+++ b/arch/x86/oprofile/nmi_int.c
@@ -0,0 +1,477 @@
1/**
2 * @file nmi_int.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/init.h>
11#include <linux/notifier.h>
12#include <linux/smp.h>
13#include <linux/oprofile.h>
14#include <linux/sysdev.h>
15#include <linux/slab.h>
16#include <linux/moduleparam.h>
17#include <linux/kdebug.h>
18#include <asm/nmi.h>
19#include <asm/msr.h>
20#include <asm/apic.h>
21
22#include "op_counter.h"
23#include "op_x86_model.h"
24
25static struct op_x86_model_spec const * model;
26static struct op_msrs cpu_msrs[NR_CPUS];
27static unsigned long saved_lvtpc[NR_CPUS];
28
29static int nmi_start(void);
30static void nmi_stop(void);
31
32/* 0 == registered but off, 1 == registered and on */
33static int nmi_enabled = 0;
34
35#ifdef CONFIG_PM
36
37static int nmi_suspend(struct sys_device *dev, pm_message_t state)
38{
39 if (nmi_enabled == 1)
40 nmi_stop();
41 return 0;
42}
43
44
45static int nmi_resume(struct sys_device *dev)
46{
47 if (nmi_enabled == 1)
48 nmi_start();
49 return 0;
50}
51
52
53static struct sysdev_class oprofile_sysclass = {
54 set_kset_name("oprofile"),
55 .resume = nmi_resume,
56 .suspend = nmi_suspend,
57};
58
59
60static struct sys_device device_oprofile = {
61 .id = 0,
62 .cls = &oprofile_sysclass,
63};
64
65
66static int __init init_sysfs(void)
67{
68 int error;
69 if (!(error = sysdev_class_register(&oprofile_sysclass)))
70 error = sysdev_register(&device_oprofile);
71 return error;
72}
73
74
75static void exit_sysfs(void)
76{
77 sysdev_unregister(&device_oprofile);
78 sysdev_class_unregister(&oprofile_sysclass);
79}
80
81#else
82#define init_sysfs() do { } while (0)
83#define exit_sysfs() do { } while (0)
84#endif /* CONFIG_PM */
85
86static int profile_exceptions_notify(struct notifier_block *self,
87 unsigned long val, void *data)
88{
89 struct die_args *args = (struct die_args *)data;
90 int ret = NOTIFY_DONE;
91 int cpu = smp_processor_id();
92
93 switch(val) {
94 case DIE_NMI:
95 if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
96 ret = NOTIFY_STOP;
97 break;
98 default:
99 break;
100 }
101 return ret;
102}
103
104static void nmi_cpu_save_registers(struct op_msrs * msrs)
105{
106 unsigned int const nr_ctrs = model->num_counters;
107 unsigned int const nr_ctrls = model->num_controls;
108 struct op_msr * counters = msrs->counters;
109 struct op_msr * controls = msrs->controls;
110 unsigned int i;
111
112 for (i = 0; i < nr_ctrs; ++i) {
113 if (counters[i].addr){
114 rdmsr(counters[i].addr,
115 counters[i].saved.low,
116 counters[i].saved.high);
117 }
118 }
119
120 for (i = 0; i < nr_ctrls; ++i) {
121 if (controls[i].addr){
122 rdmsr(controls[i].addr,
123 controls[i].saved.low,
124 controls[i].saved.high);
125 }
126 }
127}
128
129
130static void nmi_save_registers(void * dummy)
131{
132 int cpu = smp_processor_id();
133 struct op_msrs * msrs = &cpu_msrs[cpu];
134 nmi_cpu_save_registers(msrs);
135}
136
137
138static void free_msrs(void)
139{
140 int i;
141 for_each_possible_cpu(i) {
142 kfree(cpu_msrs[i].counters);
143 cpu_msrs[i].counters = NULL;
144 kfree(cpu_msrs[i].controls);
145 cpu_msrs[i].controls = NULL;
146 }
147}
148
149
150static int allocate_msrs(void)
151{
152 int success = 1;
153 size_t controls_size = sizeof(struct op_msr) * model->num_controls;
154 size_t counters_size = sizeof(struct op_msr) * model->num_counters;
155
156 int i;
157 for_each_possible_cpu(i) {
158 cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
159 if (!cpu_msrs[i].counters) {
160 success = 0;
161 break;
162 }
163 cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
164 if (!cpu_msrs[i].controls) {
165 success = 0;
166 break;
167 }
168 }
169
170 if (!success)
171 free_msrs();
172
173 return success;
174}
175
176
177static void nmi_cpu_setup(void * dummy)
178{
179 int cpu = smp_processor_id();
180 struct op_msrs * msrs = &cpu_msrs[cpu];
181 spin_lock(&oprofilefs_lock);
182 model->setup_ctrs(msrs);
183 spin_unlock(&oprofilefs_lock);
184 saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
185 apic_write(APIC_LVTPC, APIC_DM_NMI);
186}
187
188static struct notifier_block profile_exceptions_nb = {
189 .notifier_call = profile_exceptions_notify,
190 .next = NULL,
191 .priority = 0
192};
193
194static int nmi_setup(void)
195{
196 int err=0;
197 int cpu;
198
199 if (!allocate_msrs())
200 return -ENOMEM;
201
202 if ((err = register_die_notifier(&profile_exceptions_nb))){
203 free_msrs();
204 return err;
205 }
206
207 /* We need to serialize save and setup for HT because the subset
208 * of msrs are distinct for save and setup operations
209 */
210
211 /* Assume saved/restored counters are the same on all CPUs */
212 model->fill_in_addresses(&cpu_msrs[0]);
213 for_each_possible_cpu (cpu) {
214 if (cpu != 0) {
215 memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters,
216 sizeof(struct op_msr) * model->num_counters);
217
218 memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls,
219 sizeof(struct op_msr) * model->num_controls);
220 }
221
222 }
223 on_each_cpu(nmi_save_registers, NULL, 0, 1);
224 on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
225 nmi_enabled = 1;
226 return 0;
227}
228
229
230static void nmi_restore_registers(struct op_msrs * msrs)
231{
232 unsigned int const nr_ctrs = model->num_counters;
233 unsigned int const nr_ctrls = model->num_controls;
234 struct op_msr * counters = msrs->counters;
235 struct op_msr * controls = msrs->controls;
236 unsigned int i;
237
238 for (i = 0; i < nr_ctrls; ++i) {
239 if (controls[i].addr){
240 wrmsr(controls[i].addr,
241 controls[i].saved.low,
242 controls[i].saved.high);
243 }
244 }
245
246 for (i = 0; i < nr_ctrs; ++i) {
247 if (counters[i].addr){
248 wrmsr(counters[i].addr,
249 counters[i].saved.low,
250 counters[i].saved.high);
251 }
252 }
253}
254
255
256static void nmi_cpu_shutdown(void * dummy)
257{
258 unsigned int v;
259 int cpu = smp_processor_id();
260 struct op_msrs * msrs = &cpu_msrs[cpu];
261
262 /* restoring APIC_LVTPC can trigger an apic error because the delivery
263 * mode and vector nr combination can be illegal. That's by design: on
264 * power on apic lvt contain a zero vector nr which are legal only for
265 * NMI delivery mode. So inhibit apic err before restoring lvtpc
266 */
267 v = apic_read(APIC_LVTERR);
268 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
269 apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
270 apic_write(APIC_LVTERR, v);
271 nmi_restore_registers(msrs);
272 model->shutdown(msrs);
273}
274
275
276static void nmi_shutdown(void)
277{
278 nmi_enabled = 0;
279 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
280 unregister_die_notifier(&profile_exceptions_nb);
281 free_msrs();
282}
283
284
285static void nmi_cpu_start(void * dummy)
286{
287 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
288 model->start(msrs);
289}
290
291
292static int nmi_start(void)
293{
294 on_each_cpu(nmi_cpu_start, NULL, 0, 1);
295 return 0;
296}
297
298
299static void nmi_cpu_stop(void * dummy)
300{
301 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()];
302 model->stop(msrs);
303}
304
305
306static void nmi_stop(void)
307{
308 on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
309}
310
311
312struct op_counter_config counter_config[OP_MAX_COUNTER];
313
314static int nmi_create_files(struct super_block * sb, struct dentry * root)
315{
316 unsigned int i;
317
318 for (i = 0; i < model->num_counters; ++i) {
319 struct dentry * dir;
320 char buf[4];
321
322 /* quick little hack to _not_ expose a counter if it is not
323 * available for use. This should protect userspace app.
324 * NOTE: assumes 1:1 mapping here (that counters are organized
325 * sequentially in their struct assignment).
326 */
327 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
328 continue;
329
330 snprintf(buf, sizeof(buf), "%d", i);
331 dir = oprofilefs_mkdir(sb, root, buf);
332 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
333 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
334 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
335 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
336 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
337 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
338 }
339
340 return 0;
341}
342
343static int p4force;
344module_param(p4force, int, 0);
345
346static int __init p4_init(char ** cpu_type)
347{
348 __u8 cpu_model = boot_cpu_data.x86_model;
349
350 if (!p4force && (cpu_model > 6 || cpu_model == 5))
351 return 0;
352
353#ifndef CONFIG_SMP
354 *cpu_type = "i386/p4";
355 model = &op_p4_spec;
356 return 1;
357#else
358 switch (smp_num_siblings) {
359 case 1:
360 *cpu_type = "i386/p4";
361 model = &op_p4_spec;
362 return 1;
363
364 case 2:
365 *cpu_type = "i386/p4-ht";
366 model = &op_p4_ht2_spec;
367 return 1;
368 }
369#endif
370
371 printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
372 printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
373 return 0;
374}
375
376
377static int __init ppro_init(char ** cpu_type)
378{
379 __u8 cpu_model = boot_cpu_data.x86_model;
380
381 if (cpu_model == 14)
382 *cpu_type = "i386/core";
383 else if (cpu_model == 15)
384 *cpu_type = "i386/core_2";
385 else if (cpu_model > 0xd)
386 return 0;
387 else if (cpu_model == 9) {
388 *cpu_type = "i386/p6_mobile";
389 } else if (cpu_model > 5) {
390 *cpu_type = "i386/piii";
391 } else if (cpu_model > 2) {
392 *cpu_type = "i386/pii";
393 } else {
394 *cpu_type = "i386/ppro";
395 }
396
397 model = &op_ppro_spec;
398 return 1;
399}
400
401/* in order to get sysfs right */
402static int using_nmi;
403
404int __init op_nmi_init(struct oprofile_operations *ops)
405{
406 __u8 vendor = boot_cpu_data.x86_vendor;
407 __u8 family = boot_cpu_data.x86;
408 char *cpu_type;
409
410 if (!cpu_has_apic)
411 return -ENODEV;
412
413 switch (vendor) {
414 case X86_VENDOR_AMD:
415 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
416
417 switch (family) {
418 default:
419 return -ENODEV;
420 case 6:
421 model = &op_athlon_spec;
422 cpu_type = "i386/athlon";
423 break;
424 case 0xf:
425 model = &op_athlon_spec;
426 /* Actually it could be i386/hammer too, but give
427 user space an consistent name. */
428 cpu_type = "x86-64/hammer";
429 break;
430 case 0x10:
431 model = &op_athlon_spec;
432 cpu_type = "x86-64/family10";
433 break;
434 }
435 break;
436
437 case X86_VENDOR_INTEL:
438 switch (family) {
439 /* Pentium IV */
440 case 0xf:
441 if (!p4_init(&cpu_type))
442 return -ENODEV;
443 break;
444
445 /* A P6-class processor */
446 case 6:
447 if (!ppro_init(&cpu_type))
448 return -ENODEV;
449 break;
450
451 default:
452 return -ENODEV;
453 }
454 break;
455
456 default:
457 return -ENODEV;
458 }
459
460 init_sysfs();
461 using_nmi = 1;
462 ops->create_files = nmi_create_files;
463 ops->setup = nmi_setup;
464 ops->shutdown = nmi_shutdown;
465 ops->start = nmi_start;
466 ops->stop = nmi_stop;
467 ops->cpu_type = cpu_type;
468 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
469 return 0;
470}
471
472
473void op_nmi_exit(void)
474{
475 if (using_nmi)
476 exit_sysfs();
477}
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
new file mode 100644
index 000000000000..1418e36ae7ab
--- /dev/null
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -0,0 +1,69 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author Zwane Mwaikambo <zwane@linuxpower.ca>
8 */
9
10#include <linux/init.h>
11#include <linux/smp.h>
12#include <linux/errno.h>
13#include <linux/oprofile.h>
14#include <linux/rcupdate.h>
15#include <linux/kdebug.h>
16
17#include <asm/nmi.h>
18#include <asm/apic.h>
19#include <asm/ptrace.h>
20
21static int profile_timer_exceptions_notify(struct notifier_block *self,
22 unsigned long val, void *data)
23{
24 struct die_args *args = (struct die_args *)data;
25 int ret = NOTIFY_DONE;
26
27 switch(val) {
28 case DIE_NMI:
29 oprofile_add_sample(args->regs, 0);
30 ret = NOTIFY_STOP;
31 break;
32 default:
33 break;
34 }
35 return ret;
36}
37
38static struct notifier_block profile_timer_exceptions_nb = {
39 .notifier_call = profile_timer_exceptions_notify,
40 .next = NULL,
41 .priority = 0
42};
43
44static int timer_start(void)
45{
46 if (register_die_notifier(&profile_timer_exceptions_nb))
47 return 1;
48 return 0;
49}
50
51
52static void timer_stop(void)
53{
54 unregister_die_notifier(&profile_timer_exceptions_nb);
55 synchronize_sched(); /* Allow already-started NMIs to complete. */
56}
57
58
59int __init op_nmi_timer_init(struct oprofile_operations * ops)
60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start;
65 ops->stop = timer_stop;
66 ops->cpu_type = "timer";
67 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
68 return 0;
69}
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
new file mode 100644
index 000000000000..2880b15c4675
--- /dev/null
+++ b/arch/x86/oprofile/op_counter.h
@@ -0,0 +1,29 @@
1/**
2 * @file op_counter.h
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon
8 */
9
10#ifndef OP_COUNTER_H
11#define OP_COUNTER_H
12
13#define OP_MAX_COUNTER 8
14
15/* Per-perfctr configuration as set via
16 * oprofilefs.
17 */
18struct op_counter_config {
19 unsigned long count;
20 unsigned long enabled;
21 unsigned long event;
22 unsigned long kernel;
23 unsigned long user;
24 unsigned long unit_mask;
25};
26
27extern struct op_counter_config counter_config[];
28
29#endif /* OP_COUNTER_H */
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c
new file mode 100644
index 000000000000..3057a19e4641
--- /dev/null
+++ b/arch/x86/oprofile/op_model_athlon.c
@@ -0,0 +1,180 @@
1/**
2 * @file op_model_athlon.h
3 * athlon / K7 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 */
12
13#include <linux/oprofile.h>
14#include <asm/ptrace.h>
15#include <asm/msr.h>
16#include <asm/nmi.h>
17
18#include "op_x86_model.h"
19#include "op_counter.h"
20
21#define NUM_COUNTERS 4
22#define NUM_CONTROLS 4
23
24#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
25#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
26#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
28
29#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
30#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
31#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
32#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
33#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
34#define CTRL_CLEAR(x) (x &= (1<<21))
35#define CTRL_SET_ENABLE(val) (val |= 1<<20)
36#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
37#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
38#define CTRL_SET_UM(val, m) (val |= (m << 8))
39#define CTRL_SET_EVENT(val, e) (val |= e)
40
41static unsigned long reset_value[NUM_COUNTERS];
42
43static void athlon_fill_in_addresses(struct op_msrs * const msrs)
44{
45 int i;
46
47 for (i=0; i < NUM_COUNTERS; i++) {
48 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
49 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
50 else
51 msrs->counters[i].addr = 0;
52 }
53
54 for (i=0; i < NUM_CONTROLS; i++) {
55 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
56 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
57 else
58 msrs->controls[i].addr = 0;
59 }
60}
61
62
63static void athlon_setup_ctrs(struct op_msrs const * const msrs)
64{
65 unsigned int low, high;
66 int i;
67
68 /* clear all counters */
69 for (i = 0 ; i < NUM_CONTROLS; ++i) {
70 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
71 continue;
72 CTRL_READ(low, high, msrs, i);
73 CTRL_CLEAR(low);
74 CTRL_WRITE(low, high, msrs, i);
75 }
76
77 /* avoid a false detection of ctr overflows in NMI handler */
78 for (i = 0; i < NUM_COUNTERS; ++i) {
79 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
80 continue;
81 CTR_WRITE(1, msrs, i);
82 }
83
84 /* enable active counters */
85 for (i = 0; i < NUM_COUNTERS; ++i) {
86 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
87 reset_value[i] = counter_config[i].count;
88
89 CTR_WRITE(counter_config[i].count, msrs, i);
90
91 CTRL_READ(low, high, msrs, i);
92 CTRL_CLEAR(low);
93 CTRL_SET_ENABLE(low);
94 CTRL_SET_USR(low, counter_config[i].user);
95 CTRL_SET_KERN(low, counter_config[i].kernel);
96 CTRL_SET_UM(low, counter_config[i].unit_mask);
97 CTRL_SET_EVENT(low, counter_config[i].event);
98 CTRL_WRITE(low, high, msrs, i);
99 } else {
100 reset_value[i] = 0;
101 }
102 }
103}
104
105
106static int athlon_check_ctrs(struct pt_regs * const regs,
107 struct op_msrs const * const msrs)
108{
109 unsigned int low, high;
110 int i;
111
112 for (i = 0 ; i < NUM_COUNTERS; ++i) {
113 if (!reset_value[i])
114 continue;
115 CTR_READ(low, high, msrs, i);
116 if (CTR_OVERFLOWED(low)) {
117 oprofile_add_sample(regs, i);
118 CTR_WRITE(reset_value[i], msrs, i);
119 }
120 }
121
122 /* See op_model_ppro.c */
123 return 1;
124}
125
126
127static void athlon_start(struct op_msrs const * const msrs)
128{
129 unsigned int low, high;
130 int i;
131 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
132 if (reset_value[i]) {
133 CTRL_READ(low, high, msrs, i);
134 CTRL_SET_ACTIVE(low);
135 CTRL_WRITE(low, high, msrs, i);
136 }
137 }
138}
139
140
141static void athlon_stop(struct op_msrs const * const msrs)
142{
143 unsigned int low,high;
144 int i;
145
146 /* Subtle: stop on all counters to avoid race with
147 * setting our pm callback */
148 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
149 if (!reset_value[i])
150 continue;
151 CTRL_READ(low, high, msrs, i);
152 CTRL_SET_INACTIVE(low);
153 CTRL_WRITE(low, high, msrs, i);
154 }
155}
156
157static void athlon_shutdown(struct op_msrs const * const msrs)
158{
159 int i;
160
161 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
162 if (CTR_IS_RESERVED(msrs,i))
163 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
164 }
165 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
166 if (CTRL_IS_RESERVED(msrs,i))
167 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
168 }
169}
170
171struct op_x86_model_spec const op_athlon_spec = {
172 .num_counters = NUM_COUNTERS,
173 .num_controls = NUM_CONTROLS,
174 .fill_in_addresses = &athlon_fill_in_addresses,
175 .setup_ctrs = &athlon_setup_ctrs,
176 .check_ctrs = &athlon_check_ctrs,
177 .start = &athlon_start,
178 .stop = &athlon_stop,
179 .shutdown = &athlon_shutdown
180};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
new file mode 100644
index 000000000000..47925927b12f
--- /dev/null
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -0,0 +1,722 @@
1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
13#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h>
16#include <asm/apic.h>
17#include <asm/nmi.h>
18
19#include "op_x86_model.h"
20#include "op_counter.h"
21
22#define NUM_EVENTS 39
23
24#define NUM_COUNTERS_NON_HT 8
25#define NUM_ESCRS_NON_HT 45
26#define NUM_CCCRS_NON_HT 18
27#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
28
29#define NUM_COUNTERS_HT2 4
30#define NUM_ESCRS_HT2 23
31#define NUM_CCCRS_HT2 9
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35static unsigned int num_controls = NUM_CONTROLS_NON_HT;
36
37/* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at
39 kernel boot-time. */
40static inline void setup_num_counters(void)
41{
42#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2){
44 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2;
46 }
47#endif
48}
49
50static int inline addr_increment(void)
51{
52#ifdef CONFIG_SMP
53 return smp_num_siblings == 2 ? 2 : 1;
54#else
55 return 1;
56#endif
57}
58
59
60/* tables to simulate simplified hardware view of p4 registers */
61struct p4_counter_binding {
62 int virt_counter;
63 int counter_address;
64 int cccr_address;
65};
66
67struct p4_event_binding {
68 int escr_select; /* value to put in CCCR */
69 int event_select; /* value to put in ESCR */
70 struct {
71 int virt_counter; /* for this counter... */
72 int escr_address; /* use this ESCR */
73 } bindings[2];
74};
75
76/* nb: these CTR_* defines are a duplicate of defines in
77 event/i386.p4*events. */
78
79
80#define CTR_BPU_0 (1 << 0)
81#define CTR_MS_0 (1 << 1)
82#define CTR_FLAME_0 (1 << 2)
83#define CTR_IQ_4 (1 << 3)
84#define CTR_BPU_2 (1 << 4)
85#define CTR_MS_2 (1 << 5)
86#define CTR_FLAME_2 (1 << 6)
87#define CTR_IQ_5 (1 << 7)
88
89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
90 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
91 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
92 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
93 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
94 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
95 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
96 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
97 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
98};
99
100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
101
102/* p4 event codes in libop/op_event.h are indices into this table. */
103
104static struct p4_event_binding p4_events[NUM_EVENTS] = {
105
106 { /* BRANCH_RETIRED */
107 0x05, 0x06,
108 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
109 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
110 },
111
112 { /* MISPRED_BRANCH_RETIRED */
113 0x04, 0x03,
114 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
115 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
116 },
117
118 { /* TC_DELIVER_MODE */
119 0x01, 0x01,
120 { { CTR_MS_0, MSR_P4_TC_ESCR0},
121 { CTR_MS_2, MSR_P4_TC_ESCR1} }
122 },
123
124 { /* BPU_FETCH_REQUEST */
125 0x00, 0x03,
126 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
127 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
128 },
129
130 { /* ITLB_REFERENCE */
131 0x03, 0x18,
132 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
133 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
134 },
135
136 { /* MEMORY_CANCEL */
137 0x05, 0x02,
138 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
139 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
140 },
141
142 { /* MEMORY_COMPLETE */
143 0x02, 0x08,
144 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
145 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
146 },
147
148 { /* LOAD_PORT_REPLAY */
149 0x02, 0x04,
150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152 },
153
154 { /* STORE_PORT_REPLAY */
155 0x02, 0x05,
156 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
157 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
158 },
159
160 { /* MOB_LOAD_REPLAY */
161 0x02, 0x03,
162 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
163 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
164 },
165
166 { /* PAGE_WALK_TYPE */
167 0x04, 0x01,
168 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
169 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
170 },
171
172 { /* BSQ_CACHE_REFERENCE */
173 0x07, 0x0c,
174 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
175 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
176 },
177
178 { /* IOQ_ALLOCATION */
179 0x06, 0x03,
180 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
181 { 0, 0 } }
182 },
183
184 { /* IOQ_ACTIVE_ENTRIES */
185 0x06, 0x1a,
186 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
187 { 0, 0 } }
188 },
189
190 { /* FSB_DATA_ACTIVITY */
191 0x06, 0x17,
192 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
193 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
194 },
195
196 { /* BSQ_ALLOCATION */
197 0x07, 0x05,
198 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
199 { 0, 0 } }
200 },
201
202 { /* BSQ_ACTIVE_ENTRIES */
203 0x07, 0x06,
204 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
205 { 0, 0 } }
206 },
207
208 { /* X87_ASSIST */
209 0x05, 0x03,
210 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
211 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
212 },
213
214 { /* SSE_INPUT_ASSIST */
215 0x01, 0x34,
216 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
217 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
218 },
219
220 { /* PACKED_SP_UOP */
221 0x01, 0x08,
222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224 },
225
226 { /* PACKED_DP_UOP */
227 0x01, 0x0c,
228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230 },
231
232 { /* SCALAR_SP_UOP */
233 0x01, 0x0a,
234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236 },
237
238 { /* SCALAR_DP_UOP */
239 0x01, 0x0e,
240 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
241 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
242 },
243
244 { /* 64BIT_MMX_UOP */
245 0x01, 0x02,
246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248 },
249
250 { /* 128BIT_MMX_UOP */
251 0x01, 0x1a,
252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254 },
255
256 { /* X87_FP_UOP */
257 0x01, 0x04,
258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260 },
261
262 { /* X87_SIMD_MOVES_UOP */
263 0x01, 0x2e,
264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266 },
267
268 { /* MACHINE_CLEAR */
269 0x05, 0x02,
270 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
271 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
272 },
273
274 { /* GLOBAL_POWER_EVENTS */
275 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
276 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
277 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
278 },
279
280 { /* TC_MS_XFER */
281 0x00, 0x05,
282 { { CTR_MS_0, MSR_P4_MS_ESCR0},
283 { CTR_MS_2, MSR_P4_MS_ESCR1} }
284 },
285
286 { /* UOP_QUEUE_WRITES */
287 0x00, 0x09,
288 { { CTR_MS_0, MSR_P4_MS_ESCR0},
289 { CTR_MS_2, MSR_P4_MS_ESCR1} }
290 },
291
292 { /* FRONT_END_EVENT */
293 0x05, 0x08,
294 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
295 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
296 },
297
298 { /* EXECUTION_EVENT */
299 0x05, 0x0c,
300 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
301 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
302 },
303
304 { /* REPLAY_EVENT */
305 0x05, 0x09,
306 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
307 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
308 },
309
310 { /* INSTR_RETIRED */
311 0x04, 0x02,
312 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
313 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
314 },
315
316 { /* UOPS_RETIRED */
317 0x04, 0x01,
318 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320 },
321
322 { /* UOP_TYPE */
323 0x02, 0x02,
324 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
325 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
326 },
327
328 { /* RETIRED_MISPRED_BRANCH_TYPE */
329 0x02, 0x05,
330 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
331 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
332 },
333
334 { /* RETIRED_BRANCH_TYPE */
335 0x02, 0x04,
336 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
337 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
338 }
339};
340
341
342#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
343
344#define ESCR_RESERVED_BITS 0x80000003
345#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
346#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
347#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
348#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
354
355#define CCCR_RESERVED_BITS 0x38030FFF
356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
357#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
358#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
359#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
367
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
373
374
375/* this assigns a "stagger" to the current CPU, which is used throughout
376 the code in this module as an extra array offset, to select the "even"
377 or "odd" part of all the divided resources. */
378static unsigned int get_stagger(void)
379{
380#ifdef CONFIG_SMP
381 int cpu = smp_processor_id();
382 return (cpu != first_cpu(cpu_sibling_map[cpu]));
383#endif
384 return 0;
385}
386
387
388/* finally, mediate access to a real hardware counter
389 by passing a "virtual" counter numer to this macro,
390 along with your stagger setting. */
391#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
392
393static unsigned long reset_value[NUM_COUNTERS_NON_HT];
394
395
396static void p4_fill_in_addresses(struct op_msrs * const msrs)
397{
398 unsigned int i;
399 unsigned int addr, cccraddr, stag;
400
401 setup_num_counters();
402 stag = get_stagger();
403
404 /* initialize some registers */
405 for (i = 0; i < num_counters; ++i) {
406 msrs->counters[i].addr = 0;
407 }
408 for (i = 0; i < num_controls; ++i) {
409 msrs->controls[i].addr = 0;
410 }
411
412 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){
417 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr;
419 }
420 }
421
422 /* 43 ESCR registers in three or four discontiguous group */
423 for (addr = MSR_P4_BSU_ESCR0 + stag;
424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
427 }
428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
430 * to avoid special case in nmi_{save|restore}_registers() */
431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434 if (reserve_evntsel_nmi(addr))
435 msrs->controls[i].addr = addr;
436 }
437 } else {
438 for (addr = MSR_P4_IQ_ESCR0 + stag;
439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
440 if (reserve_evntsel_nmi(addr))
441 msrs->controls[i].addr = addr;
442 }
443 }
444
445 for (addr = MSR_P4_RAT_ESCR0 + stag;
446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
447 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr;
449 }
450
451 for (addr = MSR_P4_MS_ESCR0 + stag;
452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
453 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr;
455 }
456
457 for (addr = MSR_P4_IX_ESCR0 + stag;
458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
459 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr;
461 }
462
463 /* there are 2 remaining non-contiguously located ESCRs */
464
465 if (num_counters == NUM_COUNTERS_NON_HT) {
466 /* standard non-HT CPUs handle both remaining ESCRs*/
467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
471
472 } else if (stag == 0) {
473 /* HT CPUs give the first remainder to the even thread, as
474 the 32nd control register */
475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
477
478 } else {
479 /* and two copies of the second to the odd thread,
480 for the 22st and 23nd control registers */
481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484 }
485 }
486}
487
488
489static void pmc_setup_one_p4_counter(unsigned int ctr)
490{
491 int i;
492 int const maxbind = 2;
493 unsigned int cccr = 0;
494 unsigned int escr = 0;
495 unsigned int high = 0;
496 unsigned int counter_bit;
497 struct p4_event_binding *ev = NULL;
498 unsigned int stag;
499
500 stag = get_stagger();
501
502 /* convert from counter *number* to counter *bit* */
503 counter_bit = 1 << VIRT_CTR(stag, ctr);
504
505 /* find our event binding structure. */
506 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
507 printk(KERN_ERR
508 "oprofile: P4 event code 0x%lx out of range\n",
509 counter_config[ctr].event);
510 return;
511 }
512
513 ev = &(p4_events[counter_config[ctr].event - 1]);
514
515 for (i = 0; i < maxbind; i++) {
516 if (ev->bindings[i].virt_counter & counter_bit) {
517
518 /* modify ESCR */
519 ESCR_READ(escr, high, ev, i);
520 ESCR_CLEAR(escr);
521 if (stag == 0) {
522 ESCR_SET_USR_0(escr, counter_config[ctr].user);
523 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
524 } else {
525 ESCR_SET_USR_1(escr, counter_config[ctr].user);
526 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
527 }
528 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
529 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
530 ESCR_WRITE(escr, high, ev, i);
531
532 /* modify CCCR */
533 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
534 CCCR_CLEAR(cccr);
535 CCCR_SET_REQUIRED_BITS(cccr);
536 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
537 if (stag == 0) {
538 CCCR_SET_PMI_OVF_0(cccr);
539 } else {
540 CCCR_SET_PMI_OVF_1(cccr);
541 }
542 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
543 return;
544 }
545 }
546
547 printk(KERN_ERR
548 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
549 counter_config[ctr].event, stag, ctr);
550}
551
552
553static void p4_setup_ctrs(struct op_msrs const * const msrs)
554{
555 unsigned int i;
556 unsigned int low, high;
557 unsigned int stag;
558
559 stag = get_stagger();
560
561 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
562 if (! MISC_PMC_ENABLED_P(low)) {
563 printk(KERN_ERR "oprofile: P4 PMC not available\n");
564 return;
565 }
566
567 /* clear the cccrs we will use */
568 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570 continue;
571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
572 CCCR_CLEAR(low);
573 CCCR_SET_REQUIRED_BITS(low);
574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
575 }
576
577 /* clear all escrs (including those outside our concern) */
578 for (i = num_counters; i < num_controls; i++) {
579 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
580 continue;
581 wrmsr(msrs->controls[i].addr, 0, 0);
582 }
583
584 /* setup all counters */
585 for (i = 0 ; i < num_counters ; ++i) {
586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
587 reset_value[i] = counter_config[i].count;
588 pmc_setup_one_p4_counter(i);
589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
590 } else {
591 reset_value[i] = 0;
592 }
593 }
594}
595
596
597static int p4_check_ctrs(struct pt_regs * const regs,
598 struct op_msrs const * const msrs)
599{
600 unsigned long ctr, low, high, stag, real;
601 int i;
602
603 stag = get_stagger();
604
605 for (i = 0; i < num_counters; ++i) {
606
607 if (!reset_value[i])
608 continue;
609
610 /*
611 * there is some eccentricity in the hardware which
612 * requires that we perform 2 extra corrections:
613 *
614 * - check both the CCCR:OVF flag for overflow and the
615 * counter high bit for un-flagged overflows.
616 *
617 * - write the counter back twice to ensure it gets
618 * updated properly.
619 *
620 * the former seems to be related to extra NMIs happening
621 * during the current NMI; the latter is reported as errata
622 * N15 in intel doc 249199-029, pentium 4 specification
623 * update, though their suggested work-around does not
624 * appear to solve the problem.
625 */
626
627 real = VIRT_CTR(stag, i);
628
629 CCCR_READ(low, high, real);
630 CTR_READ(ctr, high, real);
631 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
632 oprofile_add_sample(regs, i);
633 CTR_WRITE(reset_value[i], real);
634 CCCR_CLEAR_OVF(low);
635 CCCR_WRITE(low, high, real);
636 CTR_WRITE(reset_value[i], real);
637 }
638 }
639
640 /* P4 quirk: you have to re-unmask the apic vector */
641 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
642
643 /* See op_model_ppro.c */
644 return 1;
645}
646
647
648static void p4_start(struct op_msrs const * const msrs)
649{
650 unsigned int low, high, stag;
651 int i;
652
653 stag = get_stagger();
654
655 for (i = 0; i < num_counters; ++i) {
656 if (!reset_value[i])
657 continue;
658 CCCR_READ(low, high, VIRT_CTR(stag, i));
659 CCCR_SET_ENABLE(low);
660 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
661 }
662}
663
664
665static void p4_stop(struct op_msrs const * const msrs)
666{
667 unsigned int low, high, stag;
668 int i;
669
670 stag = get_stagger();
671
672 for (i = 0; i < num_counters; ++i) {
673 if (!reset_value[i])
674 continue;
675 CCCR_READ(low, high, VIRT_CTR(stag, i));
676 CCCR_SET_DISABLE(low);
677 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
678 }
679}
680
681static void p4_shutdown(struct op_msrs const * const msrs)
682{
683 int i;
684
685 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i))
687 release_perfctr_nmi(msrs->counters[i].addr);
688 }
689 /* some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits.
692 */
693 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i))
695 release_evntsel_nmi(msrs->controls[i].addr);
696 }
697}
698
699
700#ifdef CONFIG_SMP
701struct op_x86_model_spec const op_p4_ht2_spec = {
702 .num_counters = NUM_COUNTERS_HT2,
703 .num_controls = NUM_CONTROLS_HT2,
704 .fill_in_addresses = &p4_fill_in_addresses,
705 .setup_ctrs = &p4_setup_ctrs,
706 .check_ctrs = &p4_check_ctrs,
707 .start = &p4_start,
708 .stop = &p4_stop,
709 .shutdown = &p4_shutdown
710};
711#endif
712
713struct op_x86_model_spec const op_p4_spec = {
714 .num_counters = NUM_COUNTERS_NON_HT,
715 .num_controls = NUM_CONTROLS_NON_HT,
716 .fill_in_addresses = &p4_fill_in_addresses,
717 .setup_ctrs = &p4_setup_ctrs,
718 .check_ctrs = &p4_check_ctrs,
719 .start = &p4_start,
720 .stop = &p4_stop,
721 .shutdown = &p4_shutdown
722};
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
new file mode 100644
index 000000000000..c554f52cb808
--- /dev/null
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -0,0 +1,192 @@
1/**
2 * @file op_model_ppro.h
3 * pentium pro / P6 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 * @author Graydon Hoare
11 */
12
13#include <linux/oprofile.h>
14#include <asm/ptrace.h>
15#include <asm/msr.h>
16#include <asm/apic.h>
17#include <asm/nmi.h>
18
19#include "op_x86_model.h"
20#include "op_counter.h"
21
22#define NUM_COUNTERS 2
23#define NUM_CONTROLS 2
24
25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
27#define CTR_32BIT_WRITE(l,msrs,c) \
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
30
31#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
32#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
33#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
34#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
35#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
36#define CTRL_CLEAR(x) (x &= (1<<21))
37#define CTRL_SET_ENABLE(val) (val |= 1<<20)
38#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
39#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
40#define CTRL_SET_UM(val, m) (val |= (m << 8))
41#define CTRL_SET_EVENT(val, e) (val |= e)
42
43static unsigned long reset_value[NUM_COUNTERS];
44
45static void ppro_fill_in_addresses(struct op_msrs * const msrs)
46{
47 int i;
48
49 for (i=0; i < NUM_COUNTERS; i++) {
50 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
51 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
52 else
53 msrs->counters[i].addr = 0;
54 }
55
56 for (i=0; i < NUM_CONTROLS; i++) {
57 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
58 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
59 else
60 msrs->controls[i].addr = 0;
61 }
62}
63
64
65static void ppro_setup_ctrs(struct op_msrs const * const msrs)
66{
67 unsigned int low, high;
68 int i;
69
70 /* clear all counters */
71 for (i = 0 ; i < NUM_CONTROLS; ++i) {
72 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
73 continue;
74 CTRL_READ(low, high, msrs, i);
75 CTRL_CLEAR(low);
76 CTRL_WRITE(low, high, msrs, i);
77 }
78
79 /* avoid a false detection of ctr overflows in NMI handler */
80 for (i = 0; i < NUM_COUNTERS; ++i) {
81 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
82 continue;
83 CTR_32BIT_WRITE(1, msrs, i);
84 }
85
86 /* enable active counters */
87 for (i = 0; i < NUM_COUNTERS; ++i) {
88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
89 reset_value[i] = counter_config[i].count;
90
91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
92
93 CTRL_READ(low, high, msrs, i);
94 CTRL_CLEAR(low);
95 CTRL_SET_ENABLE(low);
96 CTRL_SET_USR(low, counter_config[i].user);
97 CTRL_SET_KERN(low, counter_config[i].kernel);
98 CTRL_SET_UM(low, counter_config[i].unit_mask);
99 CTRL_SET_EVENT(low, counter_config[i].event);
100 CTRL_WRITE(low, high, msrs, i);
101 } else {
102 reset_value[i] = 0;
103 }
104 }
105}
106
107
108static int ppro_check_ctrs(struct pt_regs * const regs,
109 struct op_msrs const * const msrs)
110{
111 unsigned int low, high;
112 int i;
113
114 for (i = 0 ; i < NUM_COUNTERS; ++i) {
115 if (!reset_value[i])
116 continue;
117 CTR_READ(low, high, msrs, i);
118 if (CTR_OVERFLOWED(low)) {
119 oprofile_add_sample(regs, i);
120 CTR_32BIT_WRITE(reset_value[i], msrs, i);
121 }
122 }
123
124 /* Only P6 based Pentium M need to re-unmask the apic vector but it
125 * doesn't hurt other P6 variant */
126 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
127
128 /* We can't work out if we really handled an interrupt. We
129 * might have caught a *second* counter just after overflowing
130 * the interrupt for this counter then arrives
131 * and we don't find a counter that's overflowed, so we
132 * would return 0 and get dazed + confused. Instead we always
133 * assume we found an overflow. This sucks.
134 */
135 return 1;
136}
137
138
139static void ppro_start(struct op_msrs const * const msrs)
140{
141 unsigned int low,high;
142 int i;
143
144 for (i = 0; i < NUM_COUNTERS; ++i) {
145 if (reset_value[i]) {
146 CTRL_READ(low, high, msrs, i);
147 CTRL_SET_ACTIVE(low);
148 CTRL_WRITE(low, high, msrs, i);
149 }
150 }
151}
152
153
154static void ppro_stop(struct op_msrs const * const msrs)
155{
156 unsigned int low,high;
157 int i;
158
159 for (i = 0; i < NUM_COUNTERS; ++i) {
160 if (!reset_value[i])
161 continue;
162 CTRL_READ(low, high, msrs, i);
163 CTRL_SET_INACTIVE(low);
164 CTRL_WRITE(low, high, msrs, i);
165 }
166}
167
168static void ppro_shutdown(struct op_msrs const * const msrs)
169{
170 int i;
171
172 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
173 if (CTR_IS_RESERVED(msrs,i))
174 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
175 }
176 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
177 if (CTRL_IS_RESERVED(msrs,i))
178 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
179 }
180}
181
182
183struct op_x86_model_spec const op_ppro_spec = {
184 .num_counters = NUM_COUNTERS,
185 .num_controls = NUM_CONTROLS,
186 .fill_in_addresses = &ppro_fill_in_addresses,
187 .setup_ctrs = &ppro_setup_ctrs,
188 .check_ctrs = &ppro_check_ctrs,
189 .start = &ppro_start,
190 .stop = &ppro_stop,
191 .shutdown = &ppro_shutdown
192};
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
new file mode 100644
index 000000000000..abb1aa95b979
--- /dev/null
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -0,0 +1,51 @@
1/**
2 * @file op_x86_model.h
3 * interface to x86 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#ifndef OP_X86_MODEL_H
12#define OP_X86_MODEL_H
13
14struct op_saved_msr {
15 unsigned int high;
16 unsigned int low;
17};
18
19struct op_msr {
20 unsigned long addr;
21 struct op_saved_msr saved;
22};
23
24struct op_msrs {
25 struct op_msr * counters;
26 struct op_msr * controls;
27};
28
29struct pt_regs;
30
31/* The model vtable abstracts the differences between
32 * various x86 CPU model's perfctr support.
33 */
34struct op_x86_model_spec {
35 unsigned int const num_counters;
36 unsigned int const num_controls;
37 void (*fill_in_addresses)(struct op_msrs * const msrs);
38 void (*setup_ctrs)(struct op_msrs const * const msrs);
39 int (*check_ctrs)(struct pt_regs * const regs,
40 struct op_msrs const * const msrs);
41 void (*start)(struct op_msrs const * const msrs);
42 void (*stop)(struct op_msrs const * const msrs);
43 void (*shutdown)(struct op_msrs const * const msrs);
44};
45
46extern struct op_x86_model_spec const op_ppro_spec;
47extern struct op_x86_model_spec const op_p4_spec;
48extern struct op_x86_model_spec const op_p4_ht2_spec;
49extern struct op_x86_model_spec const op_athlon_spec;
50
51#endif /* OP_X86_MODEL_H */