diff options
| -rw-r--r-- | arch/arm/oprofile/common.c | 333 |
1 files changed, 281 insertions, 52 deletions
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 3fcd752d6146..aad83df89bac 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c | |||
| @@ -2,32 +2,183 @@ | |||
| 2 | * @file common.c | 2 | * @file common.c |
| 3 | * | 3 | * |
| 4 | * @remark Copyright 2004 Oprofile Authors | 4 | * @remark Copyright 2004 Oprofile Authors |
| 5 | * @remark Copyright 2010 ARM Ltd. | ||
| 5 | * @remark Read the file COPYING | 6 | * @remark Read the file COPYING |
| 6 | * | 7 | * |
| 7 | * @author Zwane Mwaikambo | 8 | * @author Zwane Mwaikambo |
| 9 | * @author Will Deacon [move to perf] | ||
| 8 | */ | 10 | */ |
| 9 | 11 | ||
| 12 | #include <linux/cpumask.h> | ||
| 13 | #include <linux/errno.h> | ||
| 10 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 15 | #include <linux/mutex.h> | ||
| 11 | #include <linux/oprofile.h> | 16 | #include <linux/oprofile.h> |
| 12 | #include <linux/errno.h> | 17 | #include <linux/perf_event.h> |
| 13 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 14 | #include <linux/sysdev.h> | 19 | #include <linux/sysdev.h> |
| 15 | #include <linux/mutex.h> | 20 | #include <asm/stacktrace.h> |
| 21 | #include <linux/uaccess.h> | ||
| 22 | |||
| 23 | #include <asm/perf_event.h> | ||
| 24 | #include <asm/ptrace.h> | ||
| 16 | 25 | ||
| 17 | #include "op_counter.h" | 26 | #ifdef CONFIG_HW_PERF_EVENTS |
| 18 | #include "op_arm_model.h" | 27 | /* |
| 28 | * Per performance monitor configuration as set via oprofilefs. | ||
| 29 | */ | ||
| 30 | struct op_counter_config { | ||
| 31 | unsigned long count; | ||
| 32 | unsigned long enabled; | ||
| 33 | unsigned long event; | ||
| 34 | unsigned long unit_mask; | ||
| 35 | unsigned long kernel; | ||
| 36 | unsigned long user; | ||
| 37 | struct perf_event_attr attr; | ||
| 38 | }; | ||
| 19 | 39 | ||
| 20 | static struct op_arm_model_spec *op_arm_model; | ||
| 21 | static int op_arm_enabled; | 40 | static int op_arm_enabled; |
| 22 | static DEFINE_MUTEX(op_arm_mutex); | 41 | static DEFINE_MUTEX(op_arm_mutex); |
| 23 | 42 | ||
| 24 | struct op_counter_config *counter_config; | 43 | static struct op_counter_config *counter_config; |
| 44 | static struct perf_event **perf_events[nr_cpumask_bits]; | ||
| 45 | static int perf_num_counters; | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Overflow callback for oprofile. | ||
| 49 | */ | ||
| 50 | static void op_overflow_handler(struct perf_event *event, int unused, | ||
| 51 | struct perf_sample_data *data, struct pt_regs *regs) | ||
| 52 | { | ||
| 53 | int id; | ||
| 54 | u32 cpu = smp_processor_id(); | ||
| 55 | |||
| 56 | for (id = 0; id < perf_num_counters; ++id) | ||
| 57 | if (perf_events[cpu][id] == event) | ||
| 58 | break; | ||
| 59 | |||
| 60 | if (id != perf_num_counters) | ||
| 61 | oprofile_add_sample(regs, id); | ||
| 62 | else | ||
| 63 | pr_warning("oprofile: ignoring spurious overflow " | ||
| 64 | "on cpu %u\n", cpu); | ||
| 65 | } | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Called by op_arm_setup to create perf attributes to mirror the oprofile | ||
| 69 | * settings in counter_config. Attributes are created as `pinned' events and | ||
| 70 | * so are permanently scheduled on the PMU. | ||
| 71 | */ | ||
| 72 | static void op_perf_setup(void) | ||
| 73 | { | ||
| 74 | int i; | ||
| 75 | u32 size = sizeof(struct perf_event_attr); | ||
| 76 | struct perf_event_attr *attr; | ||
| 77 | |||
| 78 | for (i = 0; i < perf_num_counters; ++i) { | ||
| 79 | attr = &counter_config[i].attr; | ||
| 80 | memset(attr, 0, size); | ||
| 81 | attr->type = PERF_TYPE_RAW; | ||
| 82 | attr->size = size; | ||
| 83 | attr->config = counter_config[i].event; | ||
| 84 | attr->sample_period = counter_config[i].count; | ||
| 85 | attr->pinned = 1; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | static int op_create_counter(int cpu, int event) | ||
| 90 | { | ||
| 91 | int ret = 0; | ||
| 92 | struct perf_event *pevent; | ||
| 93 | |||
| 94 | if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL)) | ||
| 95 | return ret; | ||
| 96 | |||
| 97 | pevent = perf_event_create_kernel_counter(&counter_config[event].attr, | ||
| 98 | cpu, -1, | ||
| 99 | op_overflow_handler); | ||
| 100 | |||
| 101 | if (IS_ERR(pevent)) { | ||
| 102 | ret = PTR_ERR(pevent); | ||
| 103 | } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { | ||
| 104 | pr_warning("oprofile: failed to enable event %d " | ||
| 105 | "on CPU %d\n", event, cpu); | ||
| 106 | ret = -EBUSY; | ||
| 107 | } else { | ||
| 108 | perf_events[cpu][event] = pevent; | ||
| 109 | } | ||
| 110 | |||
| 111 | return ret; | ||
| 112 | } | ||
| 113 | |||
| 114 | static void op_destroy_counter(int cpu, int event) | ||
| 115 | { | ||
| 116 | struct perf_event *pevent = perf_events[cpu][event]; | ||
| 117 | |||
| 118 | if (pevent) { | ||
| 119 | perf_event_release_kernel(pevent); | ||
| 120 | perf_events[cpu][event] = NULL; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Called by op_arm_start to create active perf events based on the | ||
| 126 | * perviously configured attributes. | ||
| 127 | */ | ||
| 128 | static int op_perf_start(void) | ||
| 129 | { | ||
| 130 | int cpu, event, ret = 0; | ||
| 131 | |||
| 132 | for_each_online_cpu(cpu) { | ||
| 133 | for (event = 0; event < perf_num_counters; ++event) { | ||
| 134 | ret = op_create_counter(cpu, event); | ||
| 135 | if (ret) | ||
| 136 | goto out; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | out: | ||
| 141 | return ret; | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Called by op_arm_stop at the end of a profiling run. | ||
| 146 | */ | ||
| 147 | static void op_perf_stop(void) | ||
| 148 | { | ||
| 149 | int cpu, event; | ||
| 150 | |||
| 151 | for_each_online_cpu(cpu) | ||
| 152 | for (event = 0; event < perf_num_counters; ++event) | ||
| 153 | op_destroy_counter(cpu, event); | ||
| 154 | } | ||
| 155 | |||
| 156 | |||
| 157 | static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) | ||
| 158 | { | ||
| 159 | switch (id) { | ||
| 160 | case ARM_PERF_PMU_ID_XSCALE1: | ||
| 161 | return "arm/xscale1"; | ||
| 162 | case ARM_PERF_PMU_ID_XSCALE2: | ||
| 163 | return "arm/xscale2"; | ||
| 164 | case ARM_PERF_PMU_ID_V6: | ||
| 165 | return "arm/armv6"; | ||
| 166 | case ARM_PERF_PMU_ID_V6MP: | ||
| 167 | return "arm/mpcore"; | ||
| 168 | case ARM_PERF_PMU_ID_CA8: | ||
| 169 | return "arm/armv7"; | ||
| 170 | case ARM_PERF_PMU_ID_CA9: | ||
| 171 | return "arm/armv7-ca9"; | ||
| 172 | default: | ||
| 173 | return NULL; | ||
| 174 | } | ||
| 175 | } | ||
| 25 | 176 | ||
| 26 | static int op_arm_create_files(struct super_block *sb, struct dentry *root) | 177 | static int op_arm_create_files(struct super_block *sb, struct dentry *root) |
| 27 | { | 178 | { |
| 28 | unsigned int i; | 179 | unsigned int i; |
| 29 | 180 | ||
| 30 | for (i = 0; i < op_arm_model->num_counters; i++) { | 181 | for (i = 0; i < perf_num_counters; i++) { |
| 31 | struct dentry *dir; | 182 | struct dentry *dir; |
| 32 | char buf[4]; | 183 | char buf[4]; |
| 33 | 184 | ||
| @@ -46,12 +197,10 @@ static int op_arm_create_files(struct super_block *sb, struct dentry *root) | |||
| 46 | 197 | ||
| 47 | static int op_arm_setup(void) | 198 | static int op_arm_setup(void) |
| 48 | { | 199 | { |
| 49 | int ret; | ||
| 50 | |||
| 51 | spin_lock(&oprofilefs_lock); | 200 | spin_lock(&oprofilefs_lock); |
| 52 | ret = op_arm_model->setup_ctrs(); | 201 | op_perf_setup(); |
| 53 | spin_unlock(&oprofilefs_lock); | 202 | spin_unlock(&oprofilefs_lock); |
| 54 | return ret; | 203 | return 0; |
| 55 | } | 204 | } |
| 56 | 205 | ||
| 57 | static int op_arm_start(void) | 206 | static int op_arm_start(void) |
| @@ -60,8 +209,9 @@ static int op_arm_start(void) | |||
| 60 | 209 | ||
| 61 | mutex_lock(&op_arm_mutex); | 210 | mutex_lock(&op_arm_mutex); |
| 62 | if (!op_arm_enabled) { | 211 | if (!op_arm_enabled) { |
| 63 | ret = op_arm_model->start(); | 212 | ret = 0; |
| 64 | op_arm_enabled = !ret; | 213 | op_perf_start(); |
| 214 | op_arm_enabled = 1; | ||
| 65 | } | 215 | } |
| 66 | mutex_unlock(&op_arm_mutex); | 216 | mutex_unlock(&op_arm_mutex); |
| 67 | return ret; | 217 | return ret; |
| @@ -71,7 +221,7 @@ static void op_arm_stop(void) | |||
| 71 | { | 221 | { |
| 72 | mutex_lock(&op_arm_mutex); | 222 | mutex_lock(&op_arm_mutex); |
| 73 | if (op_arm_enabled) | 223 | if (op_arm_enabled) |
| 74 | op_arm_model->stop(); | 224 | op_perf_stop(); |
| 75 | op_arm_enabled = 0; | 225 | op_arm_enabled = 0; |
| 76 | mutex_unlock(&op_arm_mutex); | 226 | mutex_unlock(&op_arm_mutex); |
| 77 | } | 227 | } |
| @@ -81,7 +231,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state) | |||
| 81 | { | 231 | { |
| 82 | mutex_lock(&op_arm_mutex); | 232 | mutex_lock(&op_arm_mutex); |
| 83 | if (op_arm_enabled) | 233 | if (op_arm_enabled) |
| 84 | op_arm_model->stop(); | 234 | op_perf_stop(); |
| 85 | mutex_unlock(&op_arm_mutex); | 235 | mutex_unlock(&op_arm_mutex); |
| 86 | return 0; | 236 | return 0; |
| 87 | } | 237 | } |
| @@ -89,7 +239,7 @@ static int op_arm_suspend(struct sys_device *dev, pm_message_t state) | |||
| 89 | static int op_arm_resume(struct sys_device *dev) | 239 | static int op_arm_resume(struct sys_device *dev) |
| 90 | { | 240 | { |
| 91 | mutex_lock(&op_arm_mutex); | 241 | mutex_lock(&op_arm_mutex); |
| 92 | if (op_arm_enabled && op_arm_model->start()) | 242 | if (op_arm_enabled && op_perf_start()) |
| 93 | op_arm_enabled = 0; | 243 | op_arm_enabled = 0; |
| 94 | mutex_unlock(&op_arm_mutex); | 244 | mutex_unlock(&op_arm_mutex); |
| 95 | return 0; | 245 | return 0; |
| @@ -126,58 +276,137 @@ static void exit_driverfs(void) | |||
| 126 | #define exit_driverfs() do { } while (0) | 276 | #define exit_driverfs() do { } while (0) |
| 127 | #endif /* CONFIG_PM */ | 277 | #endif /* CONFIG_PM */ |
| 128 | 278 | ||
| 129 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 279 | static int report_trace(struct stackframe *frame, void *d) |
| 130 | { | 280 | { |
| 131 | struct op_arm_model_spec *spec = NULL; | 281 | unsigned int *depth = d; |
| 132 | int ret = -ENODEV; | ||
| 133 | 282 | ||
| 134 | ops->backtrace = arm_backtrace; | 283 | if (*depth) { |
| 284 | oprofile_add_trace(frame->pc); | ||
| 285 | (*depth)--; | ||
| 286 | } | ||
| 135 | 287 | ||
| 136 | #ifdef CONFIG_CPU_XSCALE | 288 | return *depth == 0; |
| 137 | spec = &op_xscale_spec; | 289 | } |
| 138 | #endif | ||
| 139 | 290 | ||
| 140 | #ifdef CONFIG_OPROFILE_ARMV6 | 291 | /* |
| 141 | spec = &op_armv6_spec; | 292 | * The registers we're interested in are at the end of the variable |
| 142 | #endif | 293 | * length saved register structure. The fp points at the end of this |
| 294 | * structure so the address of this struct is: | ||
| 295 | * (struct frame_tail *)(xxx->fp)-1 | ||
| 296 | */ | ||
| 297 | struct frame_tail { | ||
| 298 | struct frame_tail *fp; | ||
| 299 | unsigned long sp; | ||
| 300 | unsigned long lr; | ||
| 301 | } __attribute__((packed)); | ||
| 143 | 302 | ||
| 144 | #ifdef CONFIG_OPROFILE_MPCORE | 303 | static struct frame_tail* user_backtrace(struct frame_tail *tail) |
| 145 | spec = &op_mpcore_spec; | 304 | { |
| 146 | #endif | 305 | struct frame_tail buftail[2]; |
| 147 | 306 | ||
| 148 | #ifdef CONFIG_OPROFILE_ARMV7 | 307 | /* Also check accessibility of one struct frame_tail beyond */ |
| 149 | spec = &op_armv7_spec; | 308 | if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) |
| 150 | #endif | 309 | return NULL; |
| 310 | if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) | ||
| 311 | return NULL; | ||
| 151 | 312 | ||
| 152 | if (spec) { | 313 | oprofile_add_trace(buftail[0].lr); |
| 153 | ret = spec->init(); | ||
| 154 | if (ret < 0) | ||
| 155 | return ret; | ||
| 156 | 314 | ||
| 157 | counter_config = kcalloc(spec->num_counters, sizeof(struct op_counter_config), | 315 | /* frame pointers should strictly progress back up the stack |
| 158 | GFP_KERNEL); | 316 | * (towards higher addresses) */ |
| 159 | if (!counter_config) | 317 | if (tail >= buftail[0].fp) |
| 160 | return -ENOMEM; | 318 | return NULL; |
| 319 | |||
| 320 | return buftail[0].fp-1; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) | ||
| 324 | { | ||
| 325 | struct frame_tail *tail = ((struct frame_tail *) regs->ARM_fp) - 1; | ||
| 326 | |||
| 327 | if (!user_mode(regs)) { | ||
| 328 | struct stackframe frame; | ||
| 329 | frame.fp = regs->ARM_fp; | ||
| 330 | frame.sp = regs->ARM_sp; | ||
| 331 | frame.lr = regs->ARM_lr; | ||
| 332 | frame.pc = regs->ARM_pc; | ||
| 333 | walk_stackframe(&frame, report_trace, &depth); | ||
| 334 | return; | ||
| 335 | } | ||
| 336 | |||
| 337 | while (depth-- && tail && !((unsigned long) tail & 3)) | ||
| 338 | tail = user_backtrace(tail); | ||
| 339 | } | ||
| 340 | |||
| 341 | int __init oprofile_arch_init(struct oprofile_operations *ops) | ||
| 342 | { | ||
| 343 | int cpu, ret = 0; | ||
| 344 | |||
| 345 | perf_num_counters = armpmu_get_max_events(); | ||
| 346 | |||
| 347 | counter_config = kcalloc(perf_num_counters, | ||
| 348 | sizeof(struct op_counter_config), GFP_KERNEL); | ||
| 161 | 349 | ||
| 162 | op_arm_model = spec; | 350 | if (!counter_config) { |
| 163 | init_driverfs(); | 351 | pr_info("oprofile: failed to allocate %d " |
| 164 | ops->create_files = op_arm_create_files; | 352 | "counters\n", perf_num_counters); |
| 165 | ops->setup = op_arm_setup; | 353 | return -ENOMEM; |
| 166 | ops->shutdown = op_arm_stop; | ||
| 167 | ops->start = op_arm_start; | ||
| 168 | ops->stop = op_arm_stop; | ||
| 169 | ops->cpu_type = op_arm_model->name; | ||
| 170 | printk(KERN_INFO "oprofile: using %s\n", spec->name); | ||
| 171 | } | 354 | } |
| 172 | 355 | ||
| 356 | for_each_possible_cpu(cpu) { | ||
| 357 | perf_events[cpu] = kcalloc(perf_num_counters, | ||
| 358 | sizeof(struct perf_event *), GFP_KERNEL); | ||
| 359 | if (!perf_events[cpu]) { | ||
| 360 | pr_info("oprofile: failed to allocate %d perf events " | ||
| 361 | "for cpu %d\n", perf_num_counters, cpu); | ||
| 362 | while (--cpu >= 0) | ||
| 363 | kfree(perf_events[cpu]); | ||
| 364 | return -ENOMEM; | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | init_driverfs(); | ||
| 369 | ops->backtrace = arm_backtrace; | ||
| 370 | ops->create_files = op_arm_create_files; | ||
| 371 | ops->setup = op_arm_setup; | ||
| 372 | ops->start = op_arm_start; | ||
| 373 | ops->stop = op_arm_stop; | ||
| 374 | ops->shutdown = op_arm_stop; | ||
| 375 | ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id()); | ||
| 376 | |||
| 377 | if (!ops->cpu_type) | ||
| 378 | ret = -ENODEV; | ||
| 379 | else | ||
| 380 | pr_info("oprofile: using %s\n", ops->cpu_type); | ||
| 381 | |||
| 173 | return ret; | 382 | return ret; |
| 174 | } | 383 | } |
| 175 | 384 | ||
| 176 | void oprofile_arch_exit(void) | 385 | void oprofile_arch_exit(void) |
| 177 | { | 386 | { |
| 178 | if (op_arm_model) { | 387 | int cpu, id; |
| 388 | struct perf_event *event; | ||
| 389 | |||
| 390 | if (*perf_events) { | ||
| 179 | exit_driverfs(); | 391 | exit_driverfs(); |
| 180 | op_arm_model = NULL; | 392 | for_each_possible_cpu(cpu) { |
| 393 | for (id = 0; id < perf_num_counters; ++id) { | ||
| 394 | event = perf_events[cpu][id]; | ||
| 395 | if (event != NULL) | ||
| 396 | perf_event_release_kernel(event); | ||
| 397 | } | ||
| 398 | kfree(perf_events[cpu]); | ||
| 399 | } | ||
| 181 | } | 400 | } |
| 182 | kfree(counter_config); | 401 | |
| 402 | if (counter_config) | ||
| 403 | kfree(counter_config); | ||
| 404 | } | ||
| 405 | #else | ||
| 406 | int __init oprofile_arch_init(struct oprofile_operations *ops) | ||
| 407 | { | ||
| 408 | pr_info("oprofile: hardware counters not available\n"); | ||
| 409 | return -ENODEV; | ||
| 183 | } | 410 | } |
| 411 | void oprofile_arch_exit(void) {} | ||
| 412 | #endif /* CONFIG_HW_PERF_EVENTS */ | ||
