diff options
-rw-r--r-- | arch/arm/oprofile/common.c | 333 |
1 files changed, 281 insertions, 52 deletions
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 3fcd752d6146..aad83df89bac 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c | |||
@@ -2,32 +2,183 @@ | |||
2 | * @file common.c | 2 | * @file common.c |
3 | * | 3 | * |
4 | * @remark Copyright 2004 Oprofile Authors | 4 | * @remark Copyright 2004 Oprofile Authors |
5 | * @remark Copyright 2010 ARM Ltd. | ||
5 | * @remark Read the file COPYING | 6 | * @remark Read the file COPYING |
6 | * | 7 | * |
7 | * @author Zwane Mwaikambo | 8 | * @author Zwane Mwaikambo |
9 | * @author Will Deacon [move to perf] | ||
8 | */ | 10 | */ |
9 | 11 | ||
12 | #include <linux/cpumask.h> | ||
13 | #include <linux/errno.h> | ||
10 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/mutex.h> | ||
11 | #include <linux/oprofile.h> | 16 | #include <linux/oprofile.h> |
12 | #include <linux/errno.h> | 17 | #include <linux/perf_event.h> |
13 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
14 | #include <linux/sysdev.h> | 19 | #include <linux/sysdev.h> |
15 | #include <linux/mutex.h> | 20 | #include <asm/stacktrace.h> |
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | #include <asm/perf_event.h> | ||
24 | #include <asm/ptrace.h> | ||
16 | 25 | ||
17 | #include "op_counter.h" | 26 | #ifdef CONFIG_HW_PERF_EVENTS |
18 | #include "op_arm_model.h" | 27 | /* |
28 | * Per performance monitor configuration as set via oprofilefs. | ||
29 | */ | ||
30 | struct op_counter_config { | ||
31 | unsigned long count; | ||
32 | unsigned long enabled; | ||
33 | unsigned long event; | ||
34 | unsigned long unit_mask; | ||
35 | unsigned long kernel; | ||
36 | unsigned long user; | ||
37 | struct perf_event_attr attr; | ||
38 | }; | ||
19 | 39 | ||
20 | static struct op_arm_model_spec *op_arm_model; | ||
21 | static int op_arm_enabled; | 40 | static int op_arm_enabled; |
22 | static DEFINE_MUTEX(op_arm_mutex); | 41 | static DEFINE_MUTEX(op_arm_mutex); |
23 | 42 | ||
24 | struct op_counter_config *counter_config; | 43 | static struct op_counter_config *counter_config; |
44 | static struct perf_event **perf_events[nr_cpumask_bits]; | ||
45 | static int perf_num_counters; | ||
46 | |||
47 | /* | ||
48 | * Overflow callback for oprofile. | ||
49 | */ | ||
50 | static void op_overflow_handler(struct perf_event *event, int unused, | ||
51 | struct perf_sample_data *data, struct pt_regs *regs) | ||
52 | { | ||
53 | int id; | ||
54 | u32 cpu = smp_processor_id(); | ||
55 | |||
56 | for (id = 0; id < perf_num_counters; ++id) | ||
57 | if (perf_events[cpu][id] == event) | ||
58 | break; | ||
59 | |||
60 | if (id != perf_num_counters) | ||
61 | oprofile_add_sample(regs, id); | ||
62 | else | ||
63 | pr_warning("oprofile: ignoring spurious overflow " | ||
64 | "on cpu %u\n", cpu); | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * Called by op_arm_setup to create perf attributes to mirror the oprofile | ||
69 | * settings in counter_config. Attributes are created as `pinned' events and | ||
70 | * so are permanently scheduled on the PMU. | ||
71 | */ | ||
72 | static void op_perf_setup(void) | ||
73 | { | ||
74 | int i; | ||
75 | u32 size = sizeof(struct perf_event_attr); | ||
76 | struct perf_event_attr *attr; | ||
77 | |||
78 | for (i = 0; i < perf_num_counters; ++i) { | ||
79 | attr = &counter_config[i].attr; | ||
80 | memset(attr, 0, size); | ||
81 | attr->type = PERF_TYPE_RAW; | ||
82 | attr->size = size; | ||
83 | attr->config = counter_config[i].event; | ||
84 | attr->sample_period = counter_config[i].count; | ||
85 | attr->pinned = 1; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static int op_create_counter(int cpu, int event) | ||
90 | { | ||
91 | int ret = 0; | ||
92 | struct perf_event *pevent; | ||
93 | |||
94 | if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL)) | ||
95 | return ret; | ||
96 | |||
97 | pevent = perf_event_create_kernel_counter(&counter_config[event].attr, | ||
98 | cpu, -1, | ||
99 | op_overflow_handler); | ||
100 | |||
101 | if (IS_ERR(pevent)) { | ||
102 | ret = PTR_ERR(pevent); | ||
103 | } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { | ||
104 | pr_warning("oprofile: failed to enable event %d " | ||
105 | "on CPU %d\n", event, cpu); | ||
106 | ret = -EBUSY; | ||
107 | } else { | ||
108 | perf_events[cpu][event] = pevent; | ||
109 | } | ||
110 | |||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | static void op_destroy_counter(int cpu, int event) | ||
115 | { | ||
116 | struct perf_event *pevent = perf_events[cpu][event]; | ||
117 | |||
118 | if (pevent) { | ||
119 | perf_event_release_kernel(pevent); | ||
120 | perf_events[cpu][event] = NULL; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Called by op_arm_start to create active perf events based on the | ||
126 | * perviously configured attributes. | ||
127 | */ | ||
128 | static int op_perf_start(void) | ||
129 | { | ||
130 | int cpu, event, ret = 0; | ||
131 | |||
132 | for_each_online_cpu(cpu) { | ||
133 | for (event = 0; event < perf_num_counters; ++event) { | ||
134 | ret = op_create_counter(cpu, event); | ||
135 | if (ret) | ||
136 | goto out; | ||
137 | } | ||
138 | } | ||
139 | |||
140 | out: | ||
141 | return ret; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Called by op_arm_stop at the end of a profiling run. | ||
146 | */ | ||
147 | static void op_perf_stop(void) | ||
148 | { | ||
149 | int cpu, event; | ||
150 | |||
151 | for_each_online_cpu(cpu) | ||
152 | for (event = 0; event < perf_num_counters; ++event) | ||
153 | op_destroy_counter(cpu, event); | ||
154 | } | ||
155 | |||
156 | |||
157 | static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) | ||
158 | { | ||
159 | switch (id) { | ||
160 | case ARM_PERF_PMU_ID_XSCALE1: | ||
161 | return "arm/xscale1"; | ||
162 | case ARM_PERF_PMU_ID_XSCALE2: | ||
163 | return "arm/xscale2"; | ||
164 | case ARM_PERF_PMU_ID_V6: | ||
165 | return "arm/armv6"; | ||
166 | case ARM_PERF_PMU_ID_V6MP: | ||
167 | return "arm/mpcore"; | ||
168 | case ARM_PERF_PMU_ID_CA8: | ||
169 | return "arm/armv7"; | ||
170 | case ARM_PERF_PMU_ID_CA9: | ||
171 | return "arm/armv7-ca9"; | ||
172 | default: | ||
173 | return NULL; | ||
174 | } | ||
175 | } | ||
25 | 176 | ||
26 | static int op_arm_create_files(struct super_block *sb, struct dentry *root) | 177 | static int op_arm_create_files(struct super_block *sb, struct dentry *root) |
27 | { | 178 | { |
28 | unsigned int i; | 179 | unsigned int i; |
29 | 180 | ||
30 | for (i = 0; i < op_arm_model->num_counters; i++) { | 181 | for (i = 0; i < perf_num_counters; i++) { |
31 | struct dentry *dir; | 182 | struct dentry *dir; |
32 | char buf[4]; | 183 | char buf[4]; |
33 | 184 | ||
@@ -46,12 +197,10 @@ static int op_arm_create_files(struct super_block *sb, struct dentry *root) | |||
46 | 197 | ||
47 | static int op_arm_setup(void) | 198 | static int op_arm_setup(void) |
48 | { | 199 | { |
49 | int ret; | ||
50 | |||
51 | spin_lock(&oprofilefs_lock); | 200 | spin_lock(&oprofilefs_lock); |
52 | ret = op_arm_model->setup_ctrs(); | 201 | op_perf_setup(); |
53 | spin_unlock(&oprofilefs_lock); | 202 | spin_unlock(&oprofilefs_lock); |
54 | return ret; | 203 | return 0; |
55 | } | 204 | } |
56 | 205 | ||
57 | static int op_arm_start(void) | 206 | static int op_arm_start(void) |
@@ -60,8 +209,9 @@ static int op_arm_start(void) | |||
60 | 209 | ||
61 | mutex_lock(&op_arm_mutex); | 210 | mutex_lock(&op_arm_mutex); |
62 | if (!op_arm_enabled) { | 211 | if (!op_arm_enabled) { |
63 | ret = op_arm_model->start(); | 212 | ret = 0; |
64 | op_arm_enabled = !ret; | 213 | op_perf_start(); |
214 | op_arm_enabled = 1; | ||
65 | } | 215 | } |
66 | mutex_unlock(&op_arm_mutex); | 216 | mutex_unlock(&op_arm_mutex); |
67 | return ret; | 217 | return ret; |
@@ -71,7 +221,7 @@ static void op_arm_stop(void) | |||
71 | { | 221 | { |
72 | mutex_lock(&op_arm_mutex); | 222 | mutex_lock(&op_arm_mutex); |
73 | if (op_arm_enabled) | 223 | if (op_arm_enabled) |
74 | op_arm_model->stop(); | 224 | op_perf_stop(); |
75 | op_arm_enabled = 0; | 225 | op_arm_enabled = 0; |
76 | mutex_unlock(&op_arm_mutex); | 226 | mutex_unlock(&op_arm_mutex); |
77 | } | 227 | } |
@@ -81,7 +231,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state) | |||
81 | { | 231 | { |
82 | mutex_lock(&op_arm_mutex); | 232 | mutex_lock(&op_arm_mutex); |
83 | if (op_arm_enabled) | 233 | if (op_arm_enabled) |
84 | op_arm_model->stop(); | 234 | op_perf_stop(); |
85 | mutex_unlock(&op_arm_mutex); | 235 | mutex_unlock(&op_arm_mutex); |
86 | return 0; | 236 | return 0; |
87 | } | 237 | } |
@@ -89,7 +239,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state) | |||
89 | static int op_arm_resume(struct sys_device *dev) | 239 | static int op_arm_resume(struct sys_device *dev) |
90 | { | 240 | { |
91 | mutex_lock(&op_arm_mutex); | 241 | mutex_lock(&op_arm_mutex); |
92 | if (op_arm_enabled && op_arm_model->start()) | 242 | if (op_arm_enabled && op_perf_start()) |
93 | op_arm_enabled = 0; | 243 | op_arm_enabled = 0; |
94 | mutex_unlock(&op_arm_mutex); | 244 | mutex_unlock(&op_arm_mutex); |
95 | return 0; | 245 | return 0; |
@@ -126,58 +276,137 @@ static void exit_driverfs(void) | |||
126 | #define exit_driverfs() do { } while (0) | 276 | #define exit_driverfs() do { } while (0) |
127 | #endif /* CONFIG_PM */ | 277 | #endif /* CONFIG_PM */ |
128 | 278 | ||
129 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 279 | static int report_trace(struct stackframe *frame, void *d) |
130 | { | 280 | { |
131 | struct op_arm_model_spec *spec = NULL; | 281 | unsigned int *depth = d; |
132 | int ret = -ENODEV; | ||
133 | 282 | ||
134 | ops->backtrace = arm_backtrace; | 283 | if (*depth) { |
284 | oprofile_add_trace(frame->pc); | ||
285 | (*depth)--; | ||
286 | } | ||
135 | 287 | ||
136 | #ifdef CONFIG_CPU_XSCALE | 288 | return *depth == 0; |
137 | spec = &op_xscale_spec; | 289 | } |
138 | #endif | ||
139 | 290 | ||
140 | #ifdef CONFIG_OPROFILE_ARMV6 | 291 | /* |
141 | spec = &op_armv6_spec; | 292 | * The registers we're interested in are at the end of the variable |
142 | #endif | 293 | * length saved register structure. The fp points at the end of this |
294 | * structure so the address of this struct is: | ||
295 | * (struct frame_tail *)(xxx->fp)-1 | ||
296 | */ | ||
297 | struct frame_tail { | ||
298 | struct frame_tail *fp; | ||
299 | unsigned long sp; | ||
300 | unsigned long lr; | ||
301 | } __attribute__((packed)); | ||
143 | 302 | ||
144 | #ifdef CONFIG_OPROFILE_MPCORE | 303 | static struct frame_tail* user_backtrace(struct frame_tail *tail) |
145 | spec = &op_mpcore_spec; | 304 | { |
146 | #endif | 305 | struct frame_tail buftail[2]; |
147 | 306 | ||
148 | #ifdef CONFIG_OPROFILE_ARMV7 | 307 | /* Also check accessibility of one struct frame_tail beyond */ |
149 | spec = &op_armv7_spec; | 308 | if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) |
150 | #endif | 309 | return NULL; |
310 | if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) | ||
311 | return NULL; | ||
151 | 312 | ||
152 | if (spec) { | 313 | oprofile_add_trace(buftail[0].lr); |
153 | ret = spec->init(); | ||
154 | if (ret < 0) | ||
155 | return ret; | ||
156 | 314 | ||
157 | counter_config = kcalloc(spec->num_counters, sizeof(struct op_counter_config), | 315 | /* frame pointers should strictly progress back up the stack |
158 | GFP_KERNEL); | 316 | * (towards higher addresses) */ |
159 | if (!counter_config) | 317 | if (tail >= buftail[0].fp) |
160 | return -ENOMEM; | 318 | return NULL; |
319 | |||
320 | return buftail[0].fp-1; | ||
321 | } | ||
322 | |||
323 | static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) | ||
324 | { | ||
325 | struct frame_tail *tail = ((struct frame_tail *) regs->ARM_fp) - 1; | ||
326 | |||
327 | if (!user_mode(regs)) { | ||
328 | struct stackframe frame; | ||
329 | frame.fp = regs->ARM_fp; | ||
330 | frame.sp = regs->ARM_sp; | ||
331 | frame.lr = regs->ARM_lr; | ||
332 | frame.pc = regs->ARM_pc; | ||
333 | walk_stackframe(&frame, report_trace, &depth); | ||
334 | return; | ||
335 | } | ||
336 | |||
337 | while (depth-- && tail && !((unsigned long) tail & 3)) | ||
338 | tail = user_backtrace(tail); | ||
339 | } | ||
340 | |||
341 | int __init oprofile_arch_init(struct oprofile_operations *ops) | ||
342 | { | ||
343 | int cpu, ret = 0; | ||
344 | |||
345 | perf_num_counters = armpmu_get_max_events(); | ||
346 | |||
347 | counter_config = kcalloc(perf_num_counters, | ||
348 | sizeof(struct op_counter_config), GFP_KERNEL); | ||
161 | 349 | ||
162 | op_arm_model = spec; | 350 | if (!counter_config) { |
163 | init_driverfs(); | 351 | pr_info("oprofile: failed to allocate %d " |
164 | ops->create_files = op_arm_create_files; | 352 | "counters\n", perf_num_counters); |
165 | ops->setup = op_arm_setup; | 353 | return -ENOMEM; |
166 | ops->shutdown = op_arm_stop; | ||
167 | ops->start = op_arm_start; | ||
168 | ops->stop = op_arm_stop; | ||
169 | ops->cpu_type = op_arm_model->name; | ||
170 | printk(KERN_INFO "oprofile: using %s\n", spec->name); | ||
171 | } | 354 | } |
172 | 355 | ||
356 | for_each_possible_cpu(cpu) { | ||
357 | perf_events[cpu] = kcalloc(perf_num_counters, | ||
358 | sizeof(struct perf_event *), GFP_KERNEL); | ||
359 | if (!perf_events[cpu]) { | ||
360 | pr_info("oprofile: failed to allocate %d perf events " | ||
361 | "for cpu %d\n", perf_num_counters, cpu); | ||
362 | while (--cpu >= 0) | ||
363 | kfree(perf_events[cpu]); | ||
364 | return -ENOMEM; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | init_driverfs(); | ||
369 | ops->backtrace = arm_backtrace; | ||
370 | ops->create_files = op_arm_create_files; | ||
371 | ops->setup = op_arm_setup; | ||
372 | ops->start = op_arm_start; | ||
373 | ops->stop = op_arm_stop; | ||
374 | ops->shutdown = op_arm_stop; | ||
375 | ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id()); | ||
376 | |||
377 | if (!ops->cpu_type) | ||
378 | ret = -ENODEV; | ||
379 | else | ||
380 | pr_info("oprofile: using %s\n", ops->cpu_type); | ||
381 | |||
173 | return ret; | 382 | return ret; |
174 | } | 383 | } |
175 | 384 | ||
176 | void oprofile_arch_exit(void) | 385 | void oprofile_arch_exit(void) |
177 | { | 386 | { |
178 | if (op_arm_model) { | 387 | int cpu, id; |
388 | struct perf_event *event; | ||
389 | |||
390 | if (*perf_events) { | ||
179 | exit_driverfs(); | 391 | exit_driverfs(); |
180 | op_arm_model = NULL; | 392 | for_each_possible_cpu(cpu) { |
393 | for (id = 0; id < perf_num_counters; ++id) { | ||
394 | event = perf_events[cpu][id]; | ||
395 | if (event != NULL) | ||
396 | perf_event_release_kernel(event); | ||
397 | } | ||
398 | kfree(perf_events[cpu]); | ||
399 | } | ||
181 | } | 400 | } |
182 | kfree(counter_config); | 401 | |
402 | if (counter_config) | ||
403 | kfree(counter_config); | ||
404 | } | ||
405 | #else | ||
406 | int __init oprofile_arch_init(struct oprofile_operations *ops) | ||
407 | { | ||
408 | pr_info("oprofile: hardware counters not available\n"); | ||
409 | return -ENODEV; | ||
183 | } | 410 | } |
411 | void oprofile_arch_exit(void) {} | ||
412 | #endif /* CONFIG_HW_PERF_EVENTS */ | ||