diff options
Diffstat (limited to 'arch/i386/oprofile')
-rw-r--r-- | arch/i386/oprofile/Kconfig | 23 | ||||
-rw-r--r-- | arch/i386/oprofile/Makefile | 12 | ||||
-rw-r--r-- | arch/i386/oprofile/backtrace.c | 111 | ||||
-rw-r--r-- | arch/i386/oprofile/init.c | 48 | ||||
-rw-r--r-- | arch/i386/oprofile/nmi_int.c | 427 | ||||
-rw-r--r-- | arch/i386/oprofile/nmi_timer_int.c | 55 | ||||
-rw-r--r-- | arch/i386/oprofile/op_counter.h | 29 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_athlon.c | 149 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_p4.c | 725 | ||||
-rw-r--r-- | arch/i386/oprofile/op_model_ppro.c | 143 | ||||
-rw-r--r-- | arch/i386/oprofile/op_x86_model.h | 50 |
11 files changed, 1772 insertions, 0 deletions
diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig new file mode 100644 index 000000000000..5ade19801b97 --- /dev/null +++ b/arch/i386/oprofile/Kconfig | |||
@@ -0,0 +1,23 @@ | |||
1 | |||
2 | menu "Profiling support" | ||
3 | depends on EXPERIMENTAL | ||
4 | |||
5 | config PROFILING | ||
6 | bool "Profiling support (EXPERIMENTAL)" | ||
7 | help | ||
8 | Say Y here to enable the extended profiling support mechanisms used | ||
9 | by profilers such as OProfile. | ||
10 | |||
11 | |||
12 | config OPROFILE | ||
13 | tristate "OProfile system profiling (EXPERIMENTAL)" | ||
14 | depends on PROFILING | ||
15 | help | ||
16 | OProfile is a profiling system capable of profiling the | ||
17 | whole system, include the kernel, kernel modules, libraries, | ||
18 | and applications. | ||
19 | |||
20 | If unsure, say N. | ||
21 | |||
22 | endmenu | ||
23 | |||
diff --git a/arch/i386/oprofile/Makefile b/arch/i386/oprofile/Makefile new file mode 100644 index 000000000000..30f3eb366667 --- /dev/null +++ b/arch/i386/oprofile/Makefile | |||
@@ -0,0 +1,12 @@ | |||
1 | obj-$(CONFIG_OPROFILE) += oprofile.o | ||
2 | |||
3 | DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | ||
4 | oprof.o cpu_buffer.o buffer_sync.o \ | ||
5 | event_buffer.o oprofile_files.o \ | ||
6 | oprofilefs.o oprofile_stats.o \ | ||
7 | timer_int.o ) | ||
8 | |||
9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o | ||
10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ | ||
11 | op_model_ppro.o op_model_p4.o | ||
12 | oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o | ||
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c new file mode 100644 index 000000000000..52d72e074f7f --- /dev/null +++ b/arch/i386/oprofile/backtrace.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /** | ||
2 | * @file backtrace.c | ||
3 | * | ||
4 | * @remark Copyright 2002 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author John Levon | ||
8 | * @author David Smith | ||
9 | */ | ||
10 | |||
11 | #include <linux/oprofile.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | |||
16 | struct frame_head { | ||
17 | struct frame_head * ebp; | ||
18 | unsigned long ret; | ||
19 | } __attribute__((packed)); | ||
20 | |||
21 | static struct frame_head * | ||
22 | dump_backtrace(struct frame_head * head) | ||
23 | { | ||
24 | oprofile_add_trace(head->ret); | ||
25 | |||
26 | /* frame pointers should strictly progress back up the stack | ||
27 | * (towards higher addresses) */ | ||
28 | if (head >= head->ebp) | ||
29 | return NULL; | ||
30 | |||
31 | return head->ebp; | ||
32 | } | ||
33 | |||
34 | /* check that the page(s) containing the frame head are present */ | ||
35 | static int pages_present(struct frame_head * head) | ||
36 | { | ||
37 | struct mm_struct * mm = current->mm; | ||
38 | |||
39 | /* FIXME: only necessary once per page */ | ||
40 | if (!check_user_page_readable(mm, (unsigned long)head)) | ||
41 | return 0; | ||
42 | |||
43 | return check_user_page_readable(mm, (unsigned long)(head + 1)); | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * | | /\ Higher addresses | ||
48 | * | | | ||
49 | * --------------- stack base (address of current_thread_info) | ||
50 | * | thread info | | ||
51 | * . . | ||
52 | * | stack | | ||
53 | * --------------- saved regs->ebp value if valid (frame_head address) | ||
54 | * . . | ||
55 | * --------------- struct pt_regs stored on stack (struct pt_regs *) | ||
56 | * | | | ||
57 | * . . | ||
58 | * | | | ||
59 | * --------------- %esp | ||
60 | * | | | ||
61 | * | | \/ Lower addresses | ||
62 | * | ||
63 | * Thus, &pt_regs <-> stack base restricts the valid(ish) ebp values | ||
64 | */ | ||
65 | #ifdef CONFIG_FRAME_POINTER | ||
66 | static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) | ||
67 | { | ||
68 | unsigned long headaddr = (unsigned long)head; | ||
69 | unsigned long stack = (unsigned long)regs; | ||
70 | unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; | ||
71 | |||
72 | return headaddr > stack && headaddr < stack_base; | ||
73 | } | ||
74 | #else | ||
75 | /* without fp, it's just junk */ | ||
76 | static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) | ||
77 | { | ||
78 | return 0; | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | |||
83 | void | ||
84 | x86_backtrace(struct pt_regs * const regs, unsigned int depth) | ||
85 | { | ||
86 | struct frame_head *head; | ||
87 | |||
88 | #ifdef CONFIG_X86_64 | ||
89 | head = (struct frame_head *)regs->rbp; | ||
90 | #else | ||
91 | head = (struct frame_head *)regs->ebp; | ||
92 | #endif | ||
93 | |||
94 | if (!user_mode(regs)) { | ||
95 | while (depth-- && valid_kernel_stack(head, regs)) | ||
96 | head = dump_backtrace(head); | ||
97 | return; | ||
98 | } | ||
99 | |||
100 | #ifdef CONFIG_SMP | ||
101 | if (!spin_trylock(¤t->mm->page_table_lock)) | ||
102 | return; | ||
103 | #endif | ||
104 | |||
105 | while (depth-- && head && pages_present(head)) | ||
106 | head = dump_backtrace(head); | ||
107 | |||
108 | #ifdef CONFIG_SMP | ||
109 | spin_unlock(¤t->mm->page_table_lock); | ||
110 | #endif | ||
111 | } | ||
diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c new file mode 100644 index 000000000000..c90332de582b --- /dev/null +++ b/arch/i386/oprofile/init.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /** | ||
2 | * @file init.c | ||
3 | * | ||
4 | * @remark Copyright 2002 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author John Levon <levon@movementarian.org> | ||
8 | */ | ||
9 | |||
10 | #include <linux/oprofile.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/errno.h> | ||
13 | |||
14 | /* We support CPUs that have performance counters like the Pentium Pro | ||
15 | * with the NMI mode driver. | ||
16 | */ | ||
17 | |||
18 | extern int nmi_init(struct oprofile_operations * ops); | ||
19 | extern int nmi_timer_init(struct oprofile_operations * ops); | ||
20 | extern void nmi_exit(void); | ||
21 | extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); | ||
22 | |||
23 | |||
24 | int __init oprofile_arch_init(struct oprofile_operations * ops) | ||
25 | { | ||
26 | int ret; | ||
27 | |||
28 | ret = -ENODEV; | ||
29 | |||
30 | #ifdef CONFIG_X86_LOCAL_APIC | ||
31 | ret = nmi_init(ops); | ||
32 | #endif | ||
33 | #ifdef CONFIG_X86_IO_APIC | ||
34 | if (ret < 0) | ||
35 | ret = nmi_timer_init(ops); | ||
36 | #endif | ||
37 | ops->backtrace = x86_backtrace; | ||
38 | |||
39 | return ret; | ||
40 | } | ||
41 | |||
42 | |||
43 | void oprofile_arch_exit(void) | ||
44 | { | ||
45 | #ifdef CONFIG_X86_LOCAL_APIC | ||
46 | nmi_exit(); | ||
47 | #endif | ||
48 | } | ||
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c new file mode 100644 index 000000000000..3492d961d3f1 --- /dev/null +++ b/arch/i386/oprofile/nmi_int.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /** | ||
2 | * @file nmi_int.c | ||
3 | * | ||
4 | * @remark Copyright 2002 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author John Levon <levon@movementarian.org> | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/notifier.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/oprofile.h> | ||
14 | #include <linux/sysdev.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <asm/nmi.h> | ||
17 | #include <asm/msr.h> | ||
18 | #include <asm/apic.h> | ||
19 | |||
20 | #include "op_counter.h" | ||
21 | #include "op_x86_model.h" | ||
22 | |||
23 | static struct op_x86_model_spec const * model; | ||
24 | static struct op_msrs cpu_msrs[NR_CPUS]; | ||
25 | static unsigned long saved_lvtpc[NR_CPUS]; | ||
26 | |||
27 | static int nmi_start(void); | ||
28 | static void nmi_stop(void); | ||
29 | |||
30 | /* 0 == registered but off, 1 == registered and on */ | ||
31 | static int nmi_enabled = 0; | ||
32 | |||
33 | #ifdef CONFIG_PM | ||
34 | |||
35 | static int nmi_suspend(struct sys_device *dev, u32 state) | ||
36 | { | ||
37 | if (nmi_enabled == 1) | ||
38 | nmi_stop(); | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | |||
43 | static int nmi_resume(struct sys_device *dev) | ||
44 | { | ||
45 | if (nmi_enabled == 1) | ||
46 | nmi_start(); | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | |||
51 | static struct sysdev_class oprofile_sysclass = { | ||
52 | set_kset_name("oprofile"), | ||
53 | .resume = nmi_resume, | ||
54 | .suspend = nmi_suspend, | ||
55 | }; | ||
56 | |||
57 | |||
58 | static struct sys_device device_oprofile = { | ||
59 | .id = 0, | ||
60 | .cls = &oprofile_sysclass, | ||
61 | }; | ||
62 | |||
63 | |||
64 | static int __init init_driverfs(void) | ||
65 | { | ||
66 | int error; | ||
67 | if (!(error = sysdev_class_register(&oprofile_sysclass))) | ||
68 | error = sysdev_register(&device_oprofile); | ||
69 | return error; | ||
70 | } | ||
71 | |||
72 | |||
73 | static void exit_driverfs(void) | ||
74 | { | ||
75 | sysdev_unregister(&device_oprofile); | ||
76 | sysdev_class_unregister(&oprofile_sysclass); | ||
77 | } | ||
78 | |||
79 | #else | ||
80 | #define init_driverfs() do { } while (0) | ||
81 | #define exit_driverfs() do { } while (0) | ||
82 | #endif /* CONFIG_PM */ | ||
83 | |||
84 | |||
85 | static int nmi_callback(struct pt_regs * regs, int cpu) | ||
86 | { | ||
87 | return model->check_ctrs(regs, &cpu_msrs[cpu]); | ||
88 | } | ||
89 | |||
90 | |||
91 | static void nmi_cpu_save_registers(struct op_msrs * msrs) | ||
92 | { | ||
93 | unsigned int const nr_ctrs = model->num_counters; | ||
94 | unsigned int const nr_ctrls = model->num_controls; | ||
95 | struct op_msr * counters = msrs->counters; | ||
96 | struct op_msr * controls = msrs->controls; | ||
97 | unsigned int i; | ||
98 | |||
99 | for (i = 0; i < nr_ctrs; ++i) { | ||
100 | rdmsr(counters[i].addr, | ||
101 | counters[i].saved.low, | ||
102 | counters[i].saved.high); | ||
103 | } | ||
104 | |||
105 | for (i = 0; i < nr_ctrls; ++i) { | ||
106 | rdmsr(controls[i].addr, | ||
107 | controls[i].saved.low, | ||
108 | controls[i].saved.high); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | |||
113 | static void nmi_save_registers(void * dummy) | ||
114 | { | ||
115 | int cpu = smp_processor_id(); | ||
116 | struct op_msrs * msrs = &cpu_msrs[cpu]; | ||
117 | model->fill_in_addresses(msrs); | ||
118 | nmi_cpu_save_registers(msrs); | ||
119 | } | ||
120 | |||
121 | |||
122 | static void free_msrs(void) | ||
123 | { | ||
124 | int i; | ||
125 | for (i = 0; i < NR_CPUS; ++i) { | ||
126 | kfree(cpu_msrs[i].counters); | ||
127 | cpu_msrs[i].counters = NULL; | ||
128 | kfree(cpu_msrs[i].controls); | ||
129 | cpu_msrs[i].controls = NULL; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | |||
134 | static int allocate_msrs(void) | ||
135 | { | ||
136 | int success = 1; | ||
137 | size_t controls_size = sizeof(struct op_msr) * model->num_controls; | ||
138 | size_t counters_size = sizeof(struct op_msr) * model->num_counters; | ||
139 | |||
140 | int i; | ||
141 | for (i = 0; i < NR_CPUS; ++i) { | ||
142 | if (!cpu_online(i)) | ||
143 | continue; | ||
144 | |||
145 | cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); | ||
146 | if (!cpu_msrs[i].counters) { | ||
147 | success = 0; | ||
148 | break; | ||
149 | } | ||
150 | cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); | ||
151 | if (!cpu_msrs[i].controls) { | ||
152 | success = 0; | ||
153 | break; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | if (!success) | ||
158 | free_msrs(); | ||
159 | |||
160 | return success; | ||
161 | } | ||
162 | |||
163 | |||
164 | static void nmi_cpu_setup(void * dummy) | ||
165 | { | ||
166 | int cpu = smp_processor_id(); | ||
167 | struct op_msrs * msrs = &cpu_msrs[cpu]; | ||
168 | spin_lock(&oprofilefs_lock); | ||
169 | model->setup_ctrs(msrs); | ||
170 | spin_unlock(&oprofilefs_lock); | ||
171 | saved_lvtpc[cpu] = apic_read(APIC_LVTPC); | ||
172 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
173 | } | ||
174 | |||
175 | |||
176 | static int nmi_setup(void) | ||
177 | { | ||
178 | if (!allocate_msrs()) | ||
179 | return -ENOMEM; | ||
180 | |||
181 | /* We walk a thin line between law and rape here. | ||
182 | * We need to be careful to install our NMI handler | ||
183 | * without actually triggering any NMIs as this will | ||
184 | * break the core code horrifically. | ||
185 | */ | ||
186 | if (reserve_lapic_nmi() < 0) { | ||
187 | free_msrs(); | ||
188 | return -EBUSY; | ||
189 | } | ||
190 | /* We need to serialize save and setup for HT because the subset | ||
191 | * of msrs are distinct for save and setup operations | ||
192 | */ | ||
193 | on_each_cpu(nmi_save_registers, NULL, 0, 1); | ||
194 | on_each_cpu(nmi_cpu_setup, NULL, 0, 1); | ||
195 | set_nmi_callback(nmi_callback); | ||
196 | nmi_enabled = 1; | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | |||
201 | static void nmi_restore_registers(struct op_msrs * msrs) | ||
202 | { | ||
203 | unsigned int const nr_ctrs = model->num_counters; | ||
204 | unsigned int const nr_ctrls = model->num_controls; | ||
205 | struct op_msr * counters = msrs->counters; | ||
206 | struct op_msr * controls = msrs->controls; | ||
207 | unsigned int i; | ||
208 | |||
209 | for (i = 0; i < nr_ctrls; ++i) { | ||
210 | wrmsr(controls[i].addr, | ||
211 | controls[i].saved.low, | ||
212 | controls[i].saved.high); | ||
213 | } | ||
214 | |||
215 | for (i = 0; i < nr_ctrs; ++i) { | ||
216 | wrmsr(counters[i].addr, | ||
217 | counters[i].saved.low, | ||
218 | counters[i].saved.high); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | |||
223 | static void nmi_cpu_shutdown(void * dummy) | ||
224 | { | ||
225 | unsigned int v; | ||
226 | int cpu = smp_processor_id(); | ||
227 | struct op_msrs * msrs = &cpu_msrs[cpu]; | ||
228 | |||
229 | /* restoring APIC_LVTPC can trigger an apic error because the delivery | ||
230 | * mode and vector nr combination can be illegal. That's by design: on | ||
231 | * power on apic lvt contain a zero vector nr which are legal only for | ||
232 | * NMI delivery mode. So inhibit apic err before restoring lvtpc | ||
233 | */ | ||
234 | v = apic_read(APIC_LVTERR); | ||
235 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); | ||
236 | apic_write(APIC_LVTPC, saved_lvtpc[cpu]); | ||
237 | apic_write(APIC_LVTERR, v); | ||
238 | nmi_restore_registers(msrs); | ||
239 | } | ||
240 | |||
241 | |||
242 | static void nmi_shutdown(void) | ||
243 | { | ||
244 | nmi_enabled = 0; | ||
245 | on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); | ||
246 | unset_nmi_callback(); | ||
247 | release_lapic_nmi(); | ||
248 | free_msrs(); | ||
249 | } | ||
250 | |||
251 | |||
252 | static void nmi_cpu_start(void * dummy) | ||
253 | { | ||
254 | struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; | ||
255 | model->start(msrs); | ||
256 | } | ||
257 | |||
258 | |||
259 | static int nmi_start(void) | ||
260 | { | ||
261 | on_each_cpu(nmi_cpu_start, NULL, 0, 1); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | |||
266 | static void nmi_cpu_stop(void * dummy) | ||
267 | { | ||
268 | struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; | ||
269 | model->stop(msrs); | ||
270 | } | ||
271 | |||
272 | |||
273 | static void nmi_stop(void) | ||
274 | { | ||
275 | on_each_cpu(nmi_cpu_stop, NULL, 0, 1); | ||
276 | } | ||
277 | |||
278 | |||
279 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | ||
280 | |||
281 | static int nmi_create_files(struct super_block * sb, struct dentry * root) | ||
282 | { | ||
283 | unsigned int i; | ||
284 | |||
285 | for (i = 0; i < model->num_counters; ++i) { | ||
286 | struct dentry * dir; | ||
287 | char buf[2]; | ||
288 | |||
289 | snprintf(buf, 2, "%d", i); | ||
290 | dir = oprofilefs_mkdir(sb, root, buf); | ||
291 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | ||
292 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | ||
293 | oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); | ||
294 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | ||
295 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | ||
296 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | ||
297 | } | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | |||
303 | static int __init p4_init(char ** cpu_type) | ||
304 | { | ||
305 | __u8 cpu_model = boot_cpu_data.x86_model; | ||
306 | |||
307 | if (cpu_model > 4) | ||
308 | return 0; | ||
309 | |||
310 | #ifndef CONFIG_SMP | ||
311 | *cpu_type = "i386/p4"; | ||
312 | model = &op_p4_spec; | ||
313 | return 1; | ||
314 | #else | ||
315 | switch (smp_num_siblings) { | ||
316 | case 1: | ||
317 | *cpu_type = "i386/p4"; | ||
318 | model = &op_p4_spec; | ||
319 | return 1; | ||
320 | |||
321 | case 2: | ||
322 | *cpu_type = "i386/p4-ht"; | ||
323 | model = &op_p4_ht2_spec; | ||
324 | return 1; | ||
325 | } | ||
326 | #endif | ||
327 | |||
328 | printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n"); | ||
329 | printk(KERN_INFO "oprofile: Reverting to timer mode.\n"); | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | |||
334 | static int __init ppro_init(char ** cpu_type) | ||
335 | { | ||
336 | __u8 cpu_model = boot_cpu_data.x86_model; | ||
337 | |||
338 | if (cpu_model > 0xd) | ||
339 | return 0; | ||
340 | |||
341 | if (cpu_model == 9) { | ||
342 | *cpu_type = "i386/p6_mobile"; | ||
343 | } else if (cpu_model > 5) { | ||
344 | *cpu_type = "i386/piii"; | ||
345 | } else if (cpu_model > 2) { | ||
346 | *cpu_type = "i386/pii"; | ||
347 | } else { | ||
348 | *cpu_type = "i386/ppro"; | ||
349 | } | ||
350 | |||
351 | model = &op_ppro_spec; | ||
352 | return 1; | ||
353 | } | ||
354 | |||
355 | /* in order to get driverfs right */ | ||
356 | static int using_nmi; | ||
357 | |||
358 | int __init nmi_init(struct oprofile_operations *ops) | ||
359 | { | ||
360 | __u8 vendor = boot_cpu_data.x86_vendor; | ||
361 | __u8 family = boot_cpu_data.x86; | ||
362 | char *cpu_type; | ||
363 | |||
364 | if (!cpu_has_apic) | ||
365 | return -ENODEV; | ||
366 | |||
367 | switch (vendor) { | ||
368 | case X86_VENDOR_AMD: | ||
369 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ | ||
370 | |||
371 | switch (family) { | ||
372 | default: | ||
373 | return -ENODEV; | ||
374 | case 6: | ||
375 | model = &op_athlon_spec; | ||
376 | cpu_type = "i386/athlon"; | ||
377 | break; | ||
378 | case 0xf: | ||
379 | model = &op_athlon_spec; | ||
380 | /* Actually it could be i386/hammer too, but give | ||
381 | user space an consistent name. */ | ||
382 | cpu_type = "x86-64/hammer"; | ||
383 | break; | ||
384 | } | ||
385 | break; | ||
386 | |||
387 | case X86_VENDOR_INTEL: | ||
388 | switch (family) { | ||
389 | /* Pentium IV */ | ||
390 | case 0xf: | ||
391 | if (!p4_init(&cpu_type)) | ||
392 | return -ENODEV; | ||
393 | break; | ||
394 | |||
395 | /* A P6-class processor */ | ||
396 | case 6: | ||
397 | if (!ppro_init(&cpu_type)) | ||
398 | return -ENODEV; | ||
399 | break; | ||
400 | |||
401 | default: | ||
402 | return -ENODEV; | ||
403 | } | ||
404 | break; | ||
405 | |||
406 | default: | ||
407 | return -ENODEV; | ||
408 | } | ||
409 | |||
410 | init_driverfs(); | ||
411 | using_nmi = 1; | ||
412 | ops->create_files = nmi_create_files; | ||
413 | ops->setup = nmi_setup; | ||
414 | ops->shutdown = nmi_shutdown; | ||
415 | ops->start = nmi_start; | ||
416 | ops->stop = nmi_stop; | ||
417 | ops->cpu_type = cpu_type; | ||
418 | printk(KERN_INFO "oprofile: using NMI interrupt.\n"); | ||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | |||
423 | void nmi_exit(void) | ||
424 | { | ||
425 | if (using_nmi) | ||
426 | exit_driverfs(); | ||
427 | } | ||
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c new file mode 100644 index 000000000000..b2e462abf337 --- /dev/null +++ b/arch/i386/oprofile/nmi_timer_int.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /** | ||
2 | * @file nmi_timer_int.c | ||
3 | * | ||
4 | * @remark Copyright 2003 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author Zwane Mwaikambo <zwane@linuxpower.ca> | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/irq.h> | ||
13 | #include <linux/oprofile.h> | ||
14 | #include <linux/rcupdate.h> | ||
15 | |||
16 | |||
17 | #include <asm/nmi.h> | ||
18 | #include <asm/apic.h> | ||
19 | #include <asm/ptrace.h> | ||
20 | |||
21 | static int nmi_timer_callback(struct pt_regs * regs, int cpu) | ||
22 | { | ||
23 | oprofile_add_sample(regs, 0); | ||
24 | return 1; | ||
25 | } | ||
26 | |||
27 | static int timer_start(void) | ||
28 | { | ||
29 | disable_timer_nmi_watchdog(); | ||
30 | set_nmi_callback(nmi_timer_callback); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | |||
35 | static void timer_stop(void) | ||
36 | { | ||
37 | enable_timer_nmi_watchdog(); | ||
38 | unset_nmi_callback(); | ||
39 | synchronize_kernel(); | ||
40 | } | ||
41 | |||
42 | |||
43 | int __init nmi_timer_init(struct oprofile_operations * ops) | ||
44 | { | ||
45 | extern int nmi_active; | ||
46 | |||
47 | if (nmi_active <= 0) | ||
48 | return -ENODEV; | ||
49 | |||
50 | ops->start = timer_start; | ||
51 | ops->stop = timer_stop; | ||
52 | ops->cpu_type = "timer"; | ||
53 | printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); | ||
54 | return 0; | ||
55 | } | ||
diff --git a/arch/i386/oprofile/op_counter.h b/arch/i386/oprofile/op_counter.h new file mode 100644 index 000000000000..2880b15c4675 --- /dev/null +++ b/arch/i386/oprofile/op_counter.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /** | ||
2 | * @file op_counter.h | ||
3 | * | ||
4 | * @remark Copyright 2002 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author John Levon | ||
8 | */ | ||
9 | |||
10 | #ifndef OP_COUNTER_H | ||
11 | #define OP_COUNTER_H | ||
12 | |||
13 | #define OP_MAX_COUNTER 8 | ||
14 | |||
15 | /* Per-perfctr configuration as set via | ||
16 | * oprofilefs. | ||
17 | */ | ||
18 | struct op_counter_config { | ||
19 | unsigned long count; | ||
20 | unsigned long enabled; | ||
21 | unsigned long event; | ||
22 | unsigned long kernel; | ||
23 | unsigned long user; | ||
24 | unsigned long unit_mask; | ||
25 | }; | ||
26 | |||
27 | extern struct op_counter_config counter_config[]; | ||
28 | |||
29 | #endif /* OP_COUNTER_H */ | ||
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c new file mode 100644 index 000000000000..3ad9a72a5036 --- /dev/null +++ b/arch/i386/oprofile/op_model_athlon.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /** | ||
2 | * @file op_model_athlon.h | ||
3 | * athlon / K7 model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author John Levon | ||
9 | * @author Philippe Elie | ||
10 | * @author Graydon Hoare | ||
11 | */ | ||
12 | |||
13 | #include <linux/oprofile.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/msr.h> | ||
16 | |||
17 | #include "op_x86_model.h" | ||
18 | #include "op_counter.h" | ||
19 | |||
20 | #define NUM_COUNTERS 4 | ||
21 | #define NUM_CONTROLS 4 | ||
22 | |||
23 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) | ||
24 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) | ||
25 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
26 | |||
27 | #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) | ||
28 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) | ||
29 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
30 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
31 | #define CTRL_CLEAR(x) (x &= (1<<21)) | ||
32 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
33 | #define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) | ||
34 | #define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) | ||
35 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
36 | #define CTRL_SET_EVENT(val, e) (val |= e) | ||
37 | |||
38 | static unsigned long reset_value[NUM_COUNTERS]; | ||
39 | |||
40 | static void athlon_fill_in_addresses(struct op_msrs * const msrs) | ||
41 | { | ||
42 | msrs->counters[0].addr = MSR_K7_PERFCTR0; | ||
43 | msrs->counters[1].addr = MSR_K7_PERFCTR1; | ||
44 | msrs->counters[2].addr = MSR_K7_PERFCTR2; | ||
45 | msrs->counters[3].addr = MSR_K7_PERFCTR3; | ||
46 | |||
47 | msrs->controls[0].addr = MSR_K7_EVNTSEL0; | ||
48 | msrs->controls[1].addr = MSR_K7_EVNTSEL1; | ||
49 | msrs->controls[2].addr = MSR_K7_EVNTSEL2; | ||
50 | msrs->controls[3].addr = MSR_K7_EVNTSEL3; | ||
51 | } | ||
52 | |||
53 | |||
54 | static void athlon_setup_ctrs(struct op_msrs const * const msrs) | ||
55 | { | ||
56 | unsigned int low, high; | ||
57 | int i; | ||
58 | |||
59 | /* clear all counters */ | ||
60 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | ||
61 | CTRL_READ(low, high, msrs, i); | ||
62 | CTRL_CLEAR(low); | ||
63 | CTRL_WRITE(low, high, msrs, i); | ||
64 | } | ||
65 | |||
66 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
67 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
68 | CTR_WRITE(1, msrs, i); | ||
69 | } | ||
70 | |||
71 | /* enable active counters */ | ||
72 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
73 | if (counter_config[i].enabled) { | ||
74 | reset_value[i] = counter_config[i].count; | ||
75 | |||
76 | CTR_WRITE(counter_config[i].count, msrs, i); | ||
77 | |||
78 | CTRL_READ(low, high, msrs, i); | ||
79 | CTRL_CLEAR(low); | ||
80 | CTRL_SET_ENABLE(low); | ||
81 | CTRL_SET_USR(low, counter_config[i].user); | ||
82 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
83 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
84 | CTRL_SET_EVENT(low, counter_config[i].event); | ||
85 | CTRL_WRITE(low, high, msrs, i); | ||
86 | } else { | ||
87 | reset_value[i] = 0; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | |||
93 | static int athlon_check_ctrs(struct pt_regs * const regs, | ||
94 | struct op_msrs const * const msrs) | ||
95 | { | ||
96 | unsigned int low, high; | ||
97 | int i; | ||
98 | |||
99 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | ||
100 | CTR_READ(low, high, msrs, i); | ||
101 | if (CTR_OVERFLOWED(low)) { | ||
102 | oprofile_add_sample(regs, i); | ||
103 | CTR_WRITE(reset_value[i], msrs, i); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /* See op_model_ppro.c */ | ||
108 | return 1; | ||
109 | } | ||
110 | |||
111 | |||
112 | static void athlon_start(struct op_msrs const * const msrs) | ||
113 | { | ||
114 | unsigned int low, high; | ||
115 | int i; | ||
116 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
117 | if (reset_value[i]) { | ||
118 | CTRL_READ(low, high, msrs, i); | ||
119 | CTRL_SET_ACTIVE(low); | ||
120 | CTRL_WRITE(low, high, msrs, i); | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | |||
125 | |||
126 | static void athlon_stop(struct op_msrs const * const msrs) | ||
127 | { | ||
128 | unsigned int low,high; | ||
129 | int i; | ||
130 | |||
131 | /* Subtle: stop on all counters to avoid race with | ||
132 | * setting our pm callback */ | ||
133 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | ||
134 | CTRL_READ(low, high, msrs, i); | ||
135 | CTRL_SET_INACTIVE(low); | ||
136 | CTRL_WRITE(low, high, msrs, i); | ||
137 | } | ||
138 | } | ||
139 | |||
140 | |||
141 | struct op_x86_model_spec const op_athlon_spec = { | ||
142 | .num_counters = NUM_COUNTERS, | ||
143 | .num_controls = NUM_CONTROLS, | ||
144 | .fill_in_addresses = &athlon_fill_in_addresses, | ||
145 | .setup_ctrs = &athlon_setup_ctrs, | ||
146 | .check_ctrs = &athlon_check_ctrs, | ||
147 | .start = &athlon_start, | ||
148 | .stop = &athlon_stop | ||
149 | }; | ||
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c new file mode 100644 index 000000000000..ac8a066035c2 --- /dev/null +++ b/arch/i386/oprofile/op_model_p4.c | |||
@@ -0,0 +1,725 @@ | |||
1 | /** | ||
2 | * @file op_model_p4.c | ||
3 | * P4 model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author Graydon Hoare | ||
9 | */ | ||
10 | |||
11 | #include <linux/oprofile.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/fixmap.h> | ||
16 | #include <asm/apic.h> | ||
17 | |||
18 | #include "op_x86_model.h" | ||
19 | #include "op_counter.h" | ||
20 | |||
21 | #define NUM_EVENTS 39 | ||
22 | |||
23 | #define NUM_COUNTERS_NON_HT 8 | ||
24 | #define NUM_ESCRS_NON_HT 45 | ||
25 | #define NUM_CCCRS_NON_HT 18 | ||
26 | #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) | ||
27 | |||
28 | #define NUM_COUNTERS_HT2 4 | ||
29 | #define NUM_ESCRS_HT2 23 | ||
30 | #define NUM_CCCRS_HT2 9 | ||
31 | #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) | ||
32 | |||
33 | static unsigned int num_counters = NUM_COUNTERS_NON_HT; | ||
34 | |||
35 | |||
36 | /* this has to be checked dynamically since the | ||
37 | hyper-threadedness of a chip is discovered at | ||
38 | kernel boot-time. */ | ||
39 | static inline void setup_num_counters(void) | ||
40 | { | ||
41 | #ifdef CONFIG_SMP | ||
42 | if (smp_num_siblings == 2) | ||
43 | num_counters = NUM_COUNTERS_HT2; | ||
44 | #endif | ||
45 | } | ||
46 | |||
47 | static int inline addr_increment(void) | ||
48 | { | ||
49 | #ifdef CONFIG_SMP | ||
50 | return smp_num_siblings == 2 ? 2 : 1; | ||
51 | #else | ||
52 | return 1; | ||
53 | #endif | ||
54 | } | ||
55 | |||
56 | |||
57 | /* tables to simulate simplified hardware view of p4 registers */ | ||
58 | struct p4_counter_binding { | ||
59 | int virt_counter; | ||
60 | int counter_address; | ||
61 | int cccr_address; | ||
62 | }; | ||
63 | |||
64 | struct p4_event_binding { | ||
65 | int escr_select; /* value to put in CCCR */ | ||
66 | int event_select; /* value to put in ESCR */ | ||
67 | struct { | ||
68 | int virt_counter; /* for this counter... */ | ||
69 | int escr_address; /* use this ESCR */ | ||
70 | } bindings[2]; | ||
71 | }; | ||
72 | |||
73 | /* nb: these CTR_* defines are a duplicate of defines in | ||
74 | event/i386.p4*events. */ | ||
75 | |||
76 | |||
77 | #define CTR_BPU_0 (1 << 0) | ||
78 | #define CTR_MS_0 (1 << 1) | ||
79 | #define CTR_FLAME_0 (1 << 2) | ||
80 | #define CTR_IQ_4 (1 << 3) | ||
81 | #define CTR_BPU_2 (1 << 4) | ||
82 | #define CTR_MS_2 (1 << 5) | ||
83 | #define CTR_FLAME_2 (1 << 6) | ||
84 | #define CTR_IQ_5 (1 << 7) | ||
85 | |||
86 | static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { | ||
87 | { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, | ||
88 | { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, | ||
89 | { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, | ||
90 | { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, | ||
91 | { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, | ||
92 | { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, | ||
93 | { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, | ||
94 | { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } | ||
95 | }; | ||
96 | |||
97 | #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT | ||
98 | |||
99 | /* All cccr we don't use. */ | ||
100 | static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { | ||
101 | MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, | ||
102 | MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, | ||
103 | MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, | ||
104 | MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, | ||
105 | MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 | ||
106 | }; | ||
107 | |||
108 | /* p4 event codes in libop/op_event.h are indices into this table. */ | ||
109 | |||
110 | static struct p4_event_binding p4_events[NUM_EVENTS] = { | ||
111 | |||
112 | { /* BRANCH_RETIRED */ | ||
113 | 0x05, 0x06, | ||
114 | { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
115 | {CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
116 | }, | ||
117 | |||
118 | { /* MISPRED_BRANCH_RETIRED */ | ||
119 | 0x04, 0x03, | ||
120 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
121 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
122 | }, | ||
123 | |||
124 | { /* TC_DELIVER_MODE */ | ||
125 | 0x01, 0x01, | ||
126 | { { CTR_MS_0, MSR_P4_TC_ESCR0}, | ||
127 | { CTR_MS_2, MSR_P4_TC_ESCR1} } | ||
128 | }, | ||
129 | |||
130 | { /* BPU_FETCH_REQUEST */ | ||
131 | 0x00, 0x03, | ||
132 | { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, | ||
133 | { CTR_BPU_2, MSR_P4_BPU_ESCR1} } | ||
134 | }, | ||
135 | |||
136 | { /* ITLB_REFERENCE */ | ||
137 | 0x03, 0x18, | ||
138 | { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, | ||
139 | { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } | ||
140 | }, | ||
141 | |||
142 | { /* MEMORY_CANCEL */ | ||
143 | 0x05, 0x02, | ||
144 | { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, | ||
145 | { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } | ||
146 | }, | ||
147 | |||
148 | { /* MEMORY_COMPLETE */ | ||
149 | 0x02, 0x08, | ||
150 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
151 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
152 | }, | ||
153 | |||
154 | { /* LOAD_PORT_REPLAY */ | ||
155 | 0x02, 0x04, | ||
156 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
157 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
158 | }, | ||
159 | |||
160 | { /* STORE_PORT_REPLAY */ | ||
161 | 0x02, 0x05, | ||
162 | { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, | ||
163 | { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } | ||
164 | }, | ||
165 | |||
166 | { /* MOB_LOAD_REPLAY */ | ||
167 | 0x02, 0x03, | ||
168 | { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, | ||
169 | { CTR_BPU_2, MSR_P4_MOB_ESCR1} } | ||
170 | }, | ||
171 | |||
172 | { /* PAGE_WALK_TYPE */ | ||
173 | 0x04, 0x01, | ||
174 | { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, | ||
175 | { CTR_BPU_2, MSR_P4_PMH_ESCR1} } | ||
176 | }, | ||
177 | |||
178 | { /* BSQ_CACHE_REFERENCE */ | ||
179 | 0x07, 0x0c, | ||
180 | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, | ||
181 | { CTR_BPU_2, MSR_P4_BSU_ESCR1} } | ||
182 | }, | ||
183 | |||
184 | { /* IOQ_ALLOCATION */ | ||
185 | 0x06, 0x03, | ||
186 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
187 | { 0, 0 } } | ||
188 | }, | ||
189 | |||
190 | { /* IOQ_ACTIVE_ENTRIES */ | ||
191 | 0x06, 0x1a, | ||
192 | { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, | ||
193 | { 0, 0 } } | ||
194 | }, | ||
195 | |||
196 | { /* FSB_DATA_ACTIVITY */ | ||
197 | 0x06, 0x17, | ||
198 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
199 | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } | ||
200 | }, | ||
201 | |||
202 | { /* BSQ_ALLOCATION */ | ||
203 | 0x07, 0x05, | ||
204 | { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, | ||
205 | { 0, 0 } } | ||
206 | }, | ||
207 | |||
208 | { /* BSQ_ACTIVE_ENTRIES */ | ||
209 | 0x07, 0x06, | ||
210 | { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, | ||
211 | { 0, 0 } } | ||
212 | }, | ||
213 | |||
214 | { /* X87_ASSIST */ | ||
215 | 0x05, 0x03, | ||
216 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
217 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
218 | }, | ||
219 | |||
220 | { /* SSE_INPUT_ASSIST */ | ||
221 | 0x01, 0x34, | ||
222 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
223 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
224 | }, | ||
225 | |||
226 | { /* PACKED_SP_UOP */ | ||
227 | 0x01, 0x08, | ||
228 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
229 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
230 | }, | ||
231 | |||
232 | { /* PACKED_DP_UOP */ | ||
233 | 0x01, 0x0c, | ||
234 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
235 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
236 | }, | ||
237 | |||
238 | { /* SCALAR_SP_UOP */ | ||
239 | 0x01, 0x0a, | ||
240 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
241 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
242 | }, | ||
243 | |||
244 | { /* SCALAR_DP_UOP */ | ||
245 | 0x01, 0x0e, | ||
246 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
247 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
248 | }, | ||
249 | |||
250 | { /* 64BIT_MMX_UOP */ | ||
251 | 0x01, 0x02, | ||
252 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
253 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
254 | }, | ||
255 | |||
256 | { /* 128BIT_MMX_UOP */ | ||
257 | 0x01, 0x1a, | ||
258 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
259 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
260 | }, | ||
261 | |||
262 | { /* X87_FP_UOP */ | ||
263 | 0x01, 0x04, | ||
264 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
265 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
266 | }, | ||
267 | |||
268 | { /* X87_SIMD_MOVES_UOP */ | ||
269 | 0x01, 0x2e, | ||
270 | { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, | ||
271 | { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } | ||
272 | }, | ||
273 | |||
274 | { /* MACHINE_CLEAR */ | ||
275 | 0x05, 0x02, | ||
276 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
277 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
278 | }, | ||
279 | |||
280 | { /* GLOBAL_POWER_EVENTS */ | ||
281 | 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, | ||
282 | { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, | ||
283 | { CTR_BPU_2, MSR_P4_FSB_ESCR1} } | ||
284 | }, | ||
285 | |||
286 | { /* TC_MS_XFER */ | ||
287 | 0x00, 0x05, | ||
288 | { { CTR_MS_0, MSR_P4_MS_ESCR0}, | ||
289 | { CTR_MS_2, MSR_P4_MS_ESCR1} } | ||
290 | }, | ||
291 | |||
292 | { /* UOP_QUEUE_WRITES */ | ||
293 | 0x00, 0x09, | ||
294 | { { CTR_MS_0, MSR_P4_MS_ESCR0}, | ||
295 | { CTR_MS_2, MSR_P4_MS_ESCR1} } | ||
296 | }, | ||
297 | |||
298 | { /* FRONT_END_EVENT */ | ||
299 | 0x05, 0x08, | ||
300 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
301 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
302 | }, | ||
303 | |||
304 | { /* EXECUTION_EVENT */ | ||
305 | 0x05, 0x0c, | ||
306 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
307 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
308 | }, | ||
309 | |||
310 | { /* REPLAY_EVENT */ | ||
311 | 0x05, 0x09, | ||
312 | { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, | ||
313 | { CTR_IQ_5, MSR_P4_CRU_ESCR3} } | ||
314 | }, | ||
315 | |||
316 | { /* INSTR_RETIRED */ | ||
317 | 0x04, 0x02, | ||
318 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
319 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
320 | }, | ||
321 | |||
322 | { /* UOPS_RETIRED */ | ||
323 | 0x04, 0x01, | ||
324 | { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, | ||
325 | { CTR_IQ_5, MSR_P4_CRU_ESCR1} } | ||
326 | }, | ||
327 | |||
328 | { /* UOP_TYPE */ | ||
329 | 0x02, 0x02, | ||
330 | { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, | ||
331 | { CTR_IQ_5, MSR_P4_RAT_ESCR1} } | ||
332 | }, | ||
333 | |||
334 | { /* RETIRED_MISPRED_BRANCH_TYPE */ | ||
335 | 0x02, 0x05, | ||
336 | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, | ||
337 | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } | ||
338 | }, | ||
339 | |||
340 | { /* RETIRED_BRANCH_TYPE */ | ||
341 | 0x02, 0x04, | ||
342 | { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, | ||
343 | { CTR_MS_2, MSR_P4_TBPU_ESCR1} } | ||
344 | } | ||
345 | }; | ||
346 | |||
347 | |||
348 | #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) | ||
349 | |||
350 | #define ESCR_RESERVED_BITS 0x80000003 | ||
351 | #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) | ||
352 | #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) | ||
353 | #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) | ||
354 | #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) | ||
355 | #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) | ||
356 | #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) | ||
357 | #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) | ||
358 | #define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) | ||
359 | #define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) | ||
360 | |||
361 | #define CCCR_RESERVED_BITS 0x38030FFF | ||
362 | #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) | ||
363 | #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) | ||
364 | #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) | ||
365 | #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) | ||
366 | #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) | ||
367 | #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) | ||
368 | #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) | ||
369 | #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) | ||
370 | #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) | ||
371 | #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) | ||
372 | #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) | ||
373 | |||
374 | #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) | ||
375 | #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) | ||
376 | #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) | ||
377 | |||
378 | |||
379 | /* this assigns a "stagger" to the current CPU, which is used throughout | ||
380 | the code in this module as an extra array offset, to select the "even" | ||
381 | or "odd" part of all the divided resources. */ | ||
382 | static unsigned int get_stagger(void) | ||
383 | { | ||
384 | #ifdef CONFIG_SMP | ||
385 | int cpu = smp_processor_id(); | ||
386 | return (cpu != first_cpu(cpu_sibling_map[cpu])); | ||
387 | #endif | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | |||
392 | /* finally, mediate access to a real hardware counter | ||
393 | by passing a "virtual" counter numer to this macro, | ||
394 | along with your stagger setting. */ | ||
395 | #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) | ||
396 | |||
397 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | ||
398 | |||
399 | |||
400 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | ||
401 | { | ||
402 | unsigned int i; | ||
403 | unsigned int addr, stag; | ||
404 | |||
405 | setup_num_counters(); | ||
406 | stag = get_stagger(); | ||
407 | |||
408 | /* the counter registers we pay attention to */ | ||
409 | for (i = 0; i < num_counters; ++i) { | ||
410 | msrs->counters[i].addr = | ||
411 | p4_counters[VIRT_CTR(stag, i)].counter_address; | ||
412 | } | ||
413 | |||
414 | /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ | ||
415 | |||
416 | /* 18 CCCR registers */ | ||
417 | for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; | ||
418 | addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { | ||
419 | msrs->controls[i].addr = addr; | ||
420 | } | ||
421 | |||
422 | /* 43 ESCR registers in three or four discontiguous group */ | ||
423 | for (addr = MSR_P4_BSU_ESCR0 + stag; | ||
424 | addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { | ||
425 | msrs->controls[i].addr = addr; | ||
426 | } | ||
427 | |||
428 | /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 | ||
429 | * to avoid special case in nmi_{save|restore}_registers() */ | ||
430 | if (boot_cpu_data.x86_model >= 0x3) { | ||
431 | for (addr = MSR_P4_BSU_ESCR0 + stag; | ||
432 | addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { | ||
433 | msrs->controls[i].addr = addr; | ||
434 | } | ||
435 | } else { | ||
436 | for (addr = MSR_P4_IQ_ESCR0 + stag; | ||
437 | addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { | ||
438 | msrs->controls[i].addr = addr; | ||
439 | } | ||
440 | } | ||
441 | |||
442 | for (addr = MSR_P4_RAT_ESCR0 + stag; | ||
443 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | ||
444 | msrs->controls[i].addr = addr; | ||
445 | } | ||
446 | |||
447 | for (addr = MSR_P4_MS_ESCR0 + stag; | ||
448 | addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { | ||
449 | msrs->controls[i].addr = addr; | ||
450 | } | ||
451 | |||
452 | for (addr = MSR_P4_IX_ESCR0 + stag; | ||
453 | addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { | ||
454 | msrs->controls[i].addr = addr; | ||
455 | } | ||
456 | |||
457 | /* there are 2 remaining non-contiguously located ESCRs */ | ||
458 | |||
459 | if (num_counters == NUM_COUNTERS_NON_HT) { | ||
460 | /* standard non-HT CPUs handle both remaining ESCRs*/ | ||
461 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
462 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
463 | |||
464 | } else if (stag == 0) { | ||
465 | /* HT CPUs give the first remainder to the even thread, as | ||
466 | the 32nd control register */ | ||
467 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; | ||
468 | |||
469 | } else { | ||
470 | /* and two copies of the second to the odd thread, | ||
471 | for the 22st and 23nd control registers */ | ||
472 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
473 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | ||
474 | } | ||
475 | } | ||
476 | |||
477 | |||
478 | static void pmc_setup_one_p4_counter(unsigned int ctr) | ||
479 | { | ||
480 | int i; | ||
481 | int const maxbind = 2; | ||
482 | unsigned int cccr = 0; | ||
483 | unsigned int escr = 0; | ||
484 | unsigned int high = 0; | ||
485 | unsigned int counter_bit; | ||
486 | struct p4_event_binding *ev = NULL; | ||
487 | unsigned int stag; | ||
488 | |||
489 | stag = get_stagger(); | ||
490 | |||
491 | /* convert from counter *number* to counter *bit* */ | ||
492 | counter_bit = 1 << VIRT_CTR(stag, ctr); | ||
493 | |||
494 | /* find our event binding structure. */ | ||
495 | if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { | ||
496 | printk(KERN_ERR | ||
497 | "oprofile: P4 event code 0x%lx out of range\n", | ||
498 | counter_config[ctr].event); | ||
499 | return; | ||
500 | } | ||
501 | |||
502 | ev = &(p4_events[counter_config[ctr].event - 1]); | ||
503 | |||
504 | for (i = 0; i < maxbind; i++) { | ||
505 | if (ev->bindings[i].virt_counter & counter_bit) { | ||
506 | |||
507 | /* modify ESCR */ | ||
508 | ESCR_READ(escr, high, ev, i); | ||
509 | ESCR_CLEAR(escr); | ||
510 | if (stag == 0) { | ||
511 | ESCR_SET_USR_0(escr, counter_config[ctr].user); | ||
512 | ESCR_SET_OS_0(escr, counter_config[ctr].kernel); | ||
513 | } else { | ||
514 | ESCR_SET_USR_1(escr, counter_config[ctr].user); | ||
515 | ESCR_SET_OS_1(escr, counter_config[ctr].kernel); | ||
516 | } | ||
517 | ESCR_SET_EVENT_SELECT(escr, ev->event_select); | ||
518 | ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); | ||
519 | ESCR_WRITE(escr, high, ev, i); | ||
520 | |||
521 | /* modify CCCR */ | ||
522 | CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); | ||
523 | CCCR_CLEAR(cccr); | ||
524 | CCCR_SET_REQUIRED_BITS(cccr); | ||
525 | CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); | ||
526 | if (stag == 0) { | ||
527 | CCCR_SET_PMI_OVF_0(cccr); | ||
528 | } else { | ||
529 | CCCR_SET_PMI_OVF_1(cccr); | ||
530 | } | ||
531 | CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); | ||
532 | return; | ||
533 | } | ||
534 | } | ||
535 | |||
536 | printk(KERN_ERR | ||
537 | "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", | ||
538 | counter_config[ctr].event, stag, ctr); | ||
539 | } | ||
540 | |||
541 | |||
542 | static void p4_setup_ctrs(struct op_msrs const * const msrs) | ||
543 | { | ||
544 | unsigned int i; | ||
545 | unsigned int low, high; | ||
546 | unsigned int addr; | ||
547 | unsigned int stag; | ||
548 | |||
549 | stag = get_stagger(); | ||
550 | |||
551 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
552 | if (! MISC_PMC_ENABLED_P(low)) { | ||
553 | printk(KERN_ERR "oprofile: P4 PMC not available\n"); | ||
554 | return; | ||
555 | } | ||
556 | |||
557 | /* clear the cccrs we will use */ | ||
558 | for (i = 0 ; i < num_counters ; i++) { | ||
559 | rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | ||
560 | CCCR_CLEAR(low); | ||
561 | CCCR_SET_REQUIRED_BITS(low); | ||
562 | wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); | ||
563 | } | ||
564 | |||
565 | /* clear cccrs outside our concern */ | ||
566 | for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { | ||
567 | rdmsr(p4_unused_cccr[i], low, high); | ||
568 | CCCR_CLEAR(low); | ||
569 | CCCR_SET_REQUIRED_BITS(low); | ||
570 | wrmsr(p4_unused_cccr[i], low, high); | ||
571 | } | ||
572 | |||
573 | /* clear all escrs (including those outside our concern) */ | ||
574 | for (addr = MSR_P4_BSU_ESCR0 + stag; | ||
575 | addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { | ||
576 | wrmsr(addr, 0, 0); | ||
577 | } | ||
578 | |||
579 | /* On older models clear also MSR_P4_IQ_ESCR0/1 */ | ||
580 | if (boot_cpu_data.x86_model < 0x3) { | ||
581 | wrmsr(MSR_P4_IQ_ESCR0, 0, 0); | ||
582 | wrmsr(MSR_P4_IQ_ESCR1, 0, 0); | ||
583 | } | ||
584 | |||
585 | for (addr = MSR_P4_RAT_ESCR0 + stag; | ||
586 | addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { | ||
587 | wrmsr(addr, 0, 0); | ||
588 | } | ||
589 | |||
590 | for (addr = MSR_P4_MS_ESCR0 + stag; | ||
591 | addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ | ||
592 | wrmsr(addr, 0, 0); | ||
593 | } | ||
594 | |||
595 | for (addr = MSR_P4_IX_ESCR0 + stag; | ||
596 | addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ | ||
597 | wrmsr(addr, 0, 0); | ||
598 | } | ||
599 | |||
600 | if (num_counters == NUM_COUNTERS_NON_HT) { | ||
601 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
602 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
603 | } else if (stag == 0) { | ||
604 | wrmsr(MSR_P4_CRU_ESCR4, 0, 0); | ||
605 | } else { | ||
606 | wrmsr(MSR_P4_CRU_ESCR5, 0, 0); | ||
607 | } | ||
608 | |||
609 | /* setup all counters */ | ||
610 | for (i = 0 ; i < num_counters ; ++i) { | ||
611 | if (counter_config[i].enabled) { | ||
612 | reset_value[i] = counter_config[i].count; | ||
613 | pmc_setup_one_p4_counter(i); | ||
614 | CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); | ||
615 | } else { | ||
616 | reset_value[i] = 0; | ||
617 | } | ||
618 | } | ||
619 | } | ||
620 | |||
621 | |||
622 | static int p4_check_ctrs(struct pt_regs * const regs, | ||
623 | struct op_msrs const * const msrs) | ||
624 | { | ||
625 | unsigned long ctr, low, high, stag, real; | ||
626 | int i; | ||
627 | |||
628 | stag = get_stagger(); | ||
629 | |||
630 | for (i = 0; i < num_counters; ++i) { | ||
631 | |||
632 | if (!reset_value[i]) | ||
633 | continue; | ||
634 | |||
635 | /* | ||
636 | * there is some eccentricity in the hardware which | ||
637 | * requires that we perform 2 extra corrections: | ||
638 | * | ||
639 | * - check both the CCCR:OVF flag for overflow and the | ||
640 | * counter high bit for un-flagged overflows. | ||
641 | * | ||
642 | * - write the counter back twice to ensure it gets | ||
643 | * updated properly. | ||
644 | * | ||
645 | * the former seems to be related to extra NMIs happening | ||
646 | * during the current NMI; the latter is reported as errata | ||
647 | * N15 in intel doc 249199-029, pentium 4 specification | ||
648 | * update, though their suggested work-around does not | ||
649 | * appear to solve the problem. | ||
650 | */ | ||
651 | |||
652 | real = VIRT_CTR(stag, i); | ||
653 | |||
654 | CCCR_READ(low, high, real); | ||
655 | CTR_READ(ctr, high, real); | ||
656 | if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { | ||
657 | oprofile_add_sample(regs, i); | ||
658 | CTR_WRITE(reset_value[i], real); | ||
659 | CCCR_CLEAR_OVF(low); | ||
660 | CCCR_WRITE(low, high, real); | ||
661 | CTR_WRITE(reset_value[i], real); | ||
662 | } | ||
663 | } | ||
664 | |||
665 | /* P4 quirk: you have to re-unmask the apic vector */ | ||
666 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
667 | |||
668 | /* See op_model_ppro.c */ | ||
669 | return 1; | ||
670 | } | ||
671 | |||
672 | |||
673 | static void p4_start(struct op_msrs const * const msrs) | ||
674 | { | ||
675 | unsigned int low, high, stag; | ||
676 | int i; | ||
677 | |||
678 | stag = get_stagger(); | ||
679 | |||
680 | for (i = 0; i < num_counters; ++i) { | ||
681 | if (!reset_value[i]) | ||
682 | continue; | ||
683 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | ||
684 | CCCR_SET_ENABLE(low); | ||
685 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | ||
686 | } | ||
687 | } | ||
688 | |||
689 | |||
690 | static void p4_stop(struct op_msrs const * const msrs) | ||
691 | { | ||
692 | unsigned int low, high, stag; | ||
693 | int i; | ||
694 | |||
695 | stag = get_stagger(); | ||
696 | |||
697 | for (i = 0; i < num_counters; ++i) { | ||
698 | CCCR_READ(low, high, VIRT_CTR(stag, i)); | ||
699 | CCCR_SET_DISABLE(low); | ||
700 | CCCR_WRITE(low, high, VIRT_CTR(stag, i)); | ||
701 | } | ||
702 | } | ||
703 | |||
704 | |||
705 | #ifdef CONFIG_SMP | ||
706 | struct op_x86_model_spec const op_p4_ht2_spec = { | ||
707 | .num_counters = NUM_COUNTERS_HT2, | ||
708 | .num_controls = NUM_CONTROLS_HT2, | ||
709 | .fill_in_addresses = &p4_fill_in_addresses, | ||
710 | .setup_ctrs = &p4_setup_ctrs, | ||
711 | .check_ctrs = &p4_check_ctrs, | ||
712 | .start = &p4_start, | ||
713 | .stop = &p4_stop | ||
714 | }; | ||
715 | #endif | ||
716 | |||
717 | struct op_x86_model_spec const op_p4_spec = { | ||
718 | .num_counters = NUM_COUNTERS_NON_HT, | ||
719 | .num_controls = NUM_CONTROLS_NON_HT, | ||
720 | .fill_in_addresses = &p4_fill_in_addresses, | ||
721 | .setup_ctrs = &p4_setup_ctrs, | ||
722 | .check_ctrs = &p4_check_ctrs, | ||
723 | .start = &p4_start, | ||
724 | .stop = &p4_stop | ||
725 | }; | ||
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c new file mode 100644 index 000000000000..d719015fc044 --- /dev/null +++ b/arch/i386/oprofile/op_model_ppro.c | |||
@@ -0,0 +1,143 @@ | |||
1 | /** | ||
2 | * @file op_model_ppro.h | ||
3 | * pentium pro / P6 model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author John Levon | ||
9 | * @author Philippe Elie | ||
10 | * @author Graydon Hoare | ||
11 | */ | ||
12 | |||
13 | #include <linux/oprofile.h> | ||
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/msr.h> | ||
16 | #include <asm/apic.h> | ||
17 | |||
18 | #include "op_x86_model.h" | ||
19 | #include "op_counter.h" | ||
20 | |||
21 | #define NUM_COUNTERS 2 | ||
22 | #define NUM_CONTROLS 2 | ||
23 | |||
24 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) | ||
25 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) | ||
26 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
27 | |||
28 | #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) | ||
29 | #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) | ||
30 | #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) | ||
31 | #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) | ||
32 | #define CTRL_CLEAR(x) (x &= (1<<21)) | ||
33 | #define CTRL_SET_ENABLE(val) (val |= 1<<20) | ||
34 | #define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) | ||
35 | #define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) | ||
36 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | ||
37 | #define CTRL_SET_EVENT(val, e) (val |= e) | ||
38 | |||
39 | static unsigned long reset_value[NUM_COUNTERS]; | ||
40 | |||
41 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | ||
42 | { | ||
43 | msrs->counters[0].addr = MSR_P6_PERFCTR0; | ||
44 | msrs->counters[1].addr = MSR_P6_PERFCTR1; | ||
45 | |||
46 | msrs->controls[0].addr = MSR_P6_EVNTSEL0; | ||
47 | msrs->controls[1].addr = MSR_P6_EVNTSEL1; | ||
48 | } | ||
49 | |||
50 | |||
51 | static void ppro_setup_ctrs(struct op_msrs const * const msrs) | ||
52 | { | ||
53 | unsigned int low, high; | ||
54 | int i; | ||
55 | |||
56 | /* clear all counters */ | ||
57 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | ||
58 | CTRL_READ(low, high, msrs, i); | ||
59 | CTRL_CLEAR(low); | ||
60 | CTRL_WRITE(low, high, msrs, i); | ||
61 | } | ||
62 | |||
63 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
64 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
65 | CTR_WRITE(1, msrs, i); | ||
66 | } | ||
67 | |||
68 | /* enable active counters */ | ||
69 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
70 | if (counter_config[i].enabled) { | ||
71 | reset_value[i] = counter_config[i].count; | ||
72 | |||
73 | CTR_WRITE(counter_config[i].count, msrs, i); | ||
74 | |||
75 | CTRL_READ(low, high, msrs, i); | ||
76 | CTRL_CLEAR(low); | ||
77 | CTRL_SET_ENABLE(low); | ||
78 | CTRL_SET_USR(low, counter_config[i].user); | ||
79 | CTRL_SET_KERN(low, counter_config[i].kernel); | ||
80 | CTRL_SET_UM(low, counter_config[i].unit_mask); | ||
81 | CTRL_SET_EVENT(low, counter_config[i].event); | ||
82 | CTRL_WRITE(low, high, msrs, i); | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | |||
88 | static int ppro_check_ctrs(struct pt_regs * const regs, | ||
89 | struct op_msrs const * const msrs) | ||
90 | { | ||
91 | unsigned int low, high; | ||
92 | int i; | ||
93 | |||
94 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | ||
95 | CTR_READ(low, high, msrs, i); | ||
96 | if (CTR_OVERFLOWED(low)) { | ||
97 | oprofile_add_sample(regs, i); | ||
98 | CTR_WRITE(reset_value[i], msrs, i); | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | ||
103 | * doesn't hurt other P6 variant */ | ||
104 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
105 | |||
106 | /* We can't work out if we really handled an interrupt. We | ||
107 | * might have caught a *second* counter just after overflowing | ||
108 | * the interrupt for this counter then arrives | ||
109 | * and we don't find a counter that's overflowed, so we | ||
110 | * would return 0 and get dazed + confused. Instead we always | ||
111 | * assume we found an overflow. This sucks. | ||
112 | */ | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | |||
117 | static void ppro_start(struct op_msrs const * const msrs) | ||
118 | { | ||
119 | unsigned int low,high; | ||
120 | CTRL_READ(low, high, msrs, 0); | ||
121 | CTRL_SET_ACTIVE(low); | ||
122 | CTRL_WRITE(low, high, msrs, 0); | ||
123 | } | ||
124 | |||
125 | |||
126 | static void ppro_stop(struct op_msrs const * const msrs) | ||
127 | { | ||
128 | unsigned int low,high; | ||
129 | CTRL_READ(low, high, msrs, 0); | ||
130 | CTRL_SET_INACTIVE(low); | ||
131 | CTRL_WRITE(low, high, msrs, 0); | ||
132 | } | ||
133 | |||
134 | |||
135 | struct op_x86_model_spec const op_ppro_spec = { | ||
136 | .num_counters = NUM_COUNTERS, | ||
137 | .num_controls = NUM_CONTROLS, | ||
138 | .fill_in_addresses = &ppro_fill_in_addresses, | ||
139 | .setup_ctrs = &ppro_setup_ctrs, | ||
140 | .check_ctrs = &ppro_check_ctrs, | ||
141 | .start = &ppro_start, | ||
142 | .stop = &ppro_stop | ||
143 | }; | ||
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h new file mode 100644 index 000000000000..123b7e90a9ee --- /dev/null +++ b/arch/i386/oprofile/op_x86_model.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /** | ||
2 | * @file op_x86_model.h | ||
3 | * interface to x86 model-specific MSR operations | ||
4 | * | ||
5 | * @remark Copyright 2002 OProfile authors | ||
6 | * @remark Read the file COPYING | ||
7 | * | ||
8 | * @author Graydon Hoare | ||
9 | */ | ||
10 | |||
11 | #ifndef OP_X86_MODEL_H | ||
12 | #define OP_X86_MODEL_H | ||
13 | |||
14 | struct op_saved_msr { | ||
15 | unsigned int high; | ||
16 | unsigned int low; | ||
17 | }; | ||
18 | |||
19 | struct op_msr { | ||
20 | unsigned long addr; | ||
21 | struct op_saved_msr saved; | ||
22 | }; | ||
23 | |||
24 | struct op_msrs { | ||
25 | struct op_msr * counters; | ||
26 | struct op_msr * controls; | ||
27 | }; | ||
28 | |||
29 | struct pt_regs; | ||
30 | |||
31 | /* The model vtable abstracts the differences between | ||
32 | * various x86 CPU model's perfctr support. | ||
33 | */ | ||
34 | struct op_x86_model_spec { | ||
35 | unsigned int const num_counters; | ||
36 | unsigned int const num_controls; | ||
37 | void (*fill_in_addresses)(struct op_msrs * const msrs); | ||
38 | void (*setup_ctrs)(struct op_msrs const * const msrs); | ||
39 | int (*check_ctrs)(struct pt_regs * const regs, | ||
40 | struct op_msrs const * const msrs); | ||
41 | void (*start)(struct op_msrs const * const msrs); | ||
42 | void (*stop)(struct op_msrs const * const msrs); | ||
43 | }; | ||
44 | |||
45 | extern struct op_x86_model_spec const op_ppro_spec; | ||
46 | extern struct op_x86_model_spec const op_p4_spec; | ||
47 | extern struct op_x86_model_spec const op_p4_ht2_spec; | ||
48 | extern struct op_x86_model_spec const op_athlon_spec; | ||
49 | |||
50 | #endif /* OP_X86_MODEL_H */ | ||