diff options
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/perf_event.c | 198 |
1 files changed, 92 insertions, 106 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ecbb0288e5dd..ad19c276b10f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -221,46 +221,56 @@ again: | |||
221 | } | 221 | } |
222 | 222 | ||
223 | static void | 223 | static void |
224 | armpmu_disable(struct perf_event *event) | 224 | armpmu_read(struct perf_event *event) |
225 | { | 225 | { |
226 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
227 | struct hw_perf_event *hwc = &event->hw; | 226 | struct hw_perf_event *hwc = &event->hw; |
228 | int idx = hwc->idx; | ||
229 | |||
230 | WARN_ON(idx < 0); | ||
231 | |||
232 | clear_bit(idx, cpuc->active_mask); | ||
233 | armpmu->disable(hwc, idx); | ||
234 | |||
235 | barrier(); | ||
236 | 227 | ||
237 | armpmu_event_update(event, hwc, idx); | 228 | /* Don't read disabled counters! */ |
238 | cpuc->events[idx] = NULL; | 229 | if (hwc->idx < 0) |
239 | clear_bit(idx, cpuc->used_mask); | 230 | return; |
240 | 231 | ||
241 | perf_event_update_userpage(event); | 232 | armpmu_event_update(event, hwc, hwc->idx); |
242 | } | 233 | } |
243 | 234 | ||
244 | static void | 235 | static void |
245 | armpmu_read(struct perf_event *event) | 236 | armpmu_stop(struct perf_event *event, int flags) |
246 | { | 237 | { |
247 | struct hw_perf_event *hwc = &event->hw; | 238 | struct hw_perf_event *hwc = &event->hw; |
248 | 239 | ||
249 | /* Don't read disabled counters! */ | 240 | if (!armpmu) |
250 | if (hwc->idx < 0) | ||
251 | return; | 241 | return; |
252 | 242 | ||
253 | armpmu_event_update(event, hwc, hwc->idx); | 243 | /* |
244 | * ARM pmu always has to update the counter, so ignore | ||
245 | * PERF_EF_UPDATE, see comments in armpmu_start(). | ||
246 | */ | ||
247 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
248 | armpmu->disable(hwc, hwc->idx); | ||
249 | barrier(); /* why? */ | ||
250 | armpmu_event_update(event, hwc, hwc->idx); | ||
251 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
252 | } | ||
254 | } | 253 | } |
255 | 254 | ||
256 | static void | 255 | static void |
257 | armpmu_unthrottle(struct perf_event *event) | 256 | armpmu_start(struct perf_event *event, int flags) |
258 | { | 257 | { |
259 | struct hw_perf_event *hwc = &event->hw; | 258 | struct hw_perf_event *hwc = &event->hw; |
260 | 259 | ||
260 | if (!armpmu) | ||
261 | return; | ||
262 | |||
263 | /* | ||
264 | * ARM pmu always has to reprogram the period, so ignore | ||
265 | * PERF_EF_RELOAD, see the comment below. | ||
266 | */ | ||
267 | if (flags & PERF_EF_RELOAD) | ||
268 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
269 | |||
270 | hwc->state = 0; | ||
261 | /* | 271 | /* |
262 | * Set the period again. Some counters can't be stopped, so when we | 272 | * Set the period again. Some counters can't be stopped, so when we |
263 | * were throttled we simply disabled the IRQ source and the counter | 273 | * were stopped we simply disabled the IRQ source and the counter |
264 | * may have been left counting. If we don't do this step then we may | 274 | * may have been left counting. If we don't do this step then we may |
265 | * get an interrupt too soon or *way* too late if the overflow has | 275 | * get an interrupt too soon or *way* too late if the overflow has |
266 | * happened since disabling. | 276 | * happened since disabling. |
@@ -269,14 +279,33 @@ armpmu_unthrottle(struct perf_event *event) | |||
269 | armpmu->enable(hwc, hwc->idx); | 279 | armpmu->enable(hwc, hwc->idx); |
270 | } | 280 | } |
271 | 281 | ||
282 | static void | ||
283 | armpmu_del(struct perf_event *event, int flags) | ||
284 | { | ||
285 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
286 | struct hw_perf_event *hwc = &event->hw; | ||
287 | int idx = hwc->idx; | ||
288 | |||
289 | WARN_ON(idx < 0); | ||
290 | |||
291 | clear_bit(idx, cpuc->active_mask); | ||
292 | armpmu_stop(event, PERF_EF_UPDATE); | ||
293 | cpuc->events[idx] = NULL; | ||
294 | clear_bit(idx, cpuc->used_mask); | ||
295 | |||
296 | perf_event_update_userpage(event); | ||
297 | } | ||
298 | |||
272 | static int | 299 | static int |
273 | armpmu_enable(struct perf_event *event) | 300 | armpmu_add(struct perf_event *event, int flags) |
274 | { | 301 | { |
275 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 302 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
276 | struct hw_perf_event *hwc = &event->hw; | 303 | struct hw_perf_event *hwc = &event->hw; |
277 | int idx; | 304 | int idx; |
278 | int err = 0; | 305 | int err = 0; |
279 | 306 | ||
307 | perf_pmu_disable(event->pmu); | ||
308 | |||
280 | /* If we don't have a space for the counter then finish early. */ | 309 | /* If we don't have a space for the counter then finish early. */ |
281 | idx = armpmu->get_event_idx(cpuc, hwc); | 310 | idx = armpmu->get_event_idx(cpuc, hwc); |
282 | if (idx < 0) { | 311 | if (idx < 0) { |
@@ -293,25 +322,19 @@ armpmu_enable(struct perf_event *event) | |||
293 | cpuc->events[idx] = event; | 322 | cpuc->events[idx] = event; |
294 | set_bit(idx, cpuc->active_mask); | 323 | set_bit(idx, cpuc->active_mask); |
295 | 324 | ||
296 | /* Set the period for the event. */ | 325 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
297 | armpmu_event_set_period(event, hwc, idx); | 326 | if (flags & PERF_EF_START) |
298 | 327 | armpmu_start(event, PERF_EF_RELOAD); | |
299 | /* Enable the event. */ | ||
300 | armpmu->enable(hwc, idx); | ||
301 | 328 | ||
302 | /* Propagate our changes to the userspace mapping. */ | 329 | /* Propagate our changes to the userspace mapping. */ |
303 | perf_event_update_userpage(event); | 330 | perf_event_update_userpage(event); |
304 | 331 | ||
305 | out: | 332 | out: |
333 | perf_pmu_enable(event->pmu); | ||
306 | return err; | 334 | return err; |
307 | } | 335 | } |
308 | 336 | ||
309 | static struct pmu pmu = { | 337 | static struct pmu pmu; |
310 | .enable = armpmu_enable, | ||
311 | .disable = armpmu_disable, | ||
312 | .unthrottle = armpmu_unthrottle, | ||
313 | .read = armpmu_read, | ||
314 | }; | ||
315 | 338 | ||
316 | static int | 339 | static int |
317 | validate_event(struct cpu_hw_events *cpuc, | 340 | validate_event(struct cpu_hw_events *cpuc, |
@@ -491,20 +514,29 @@ __hw_perf_event_init(struct perf_event *event) | |||
491 | return err; | 514 | return err; |
492 | } | 515 | } |
493 | 516 | ||
494 | const struct pmu * | 517 | static int armpmu_event_init(struct perf_event *event) |
495 | hw_perf_event_init(struct perf_event *event) | ||
496 | { | 518 | { |
497 | int err = 0; | 519 | int err = 0; |
498 | 520 | ||
521 | switch (event->attr.type) { | ||
522 | case PERF_TYPE_RAW: | ||
523 | case PERF_TYPE_HARDWARE: | ||
524 | case PERF_TYPE_HW_CACHE: | ||
525 | break; | ||
526 | |||
527 | default: | ||
528 | return -ENOENT; | ||
529 | } | ||
530 | |||
499 | if (!armpmu) | 531 | if (!armpmu) |
500 | return ERR_PTR(-ENODEV); | 532 | return -ENODEV; |
501 | 533 | ||
502 | event->destroy = hw_perf_event_destroy; | 534 | event->destroy = hw_perf_event_destroy; |
503 | 535 | ||
504 | if (!atomic_inc_not_zero(&active_events)) { | 536 | if (!atomic_inc_not_zero(&active_events)) { |
505 | if (atomic_read(&active_events) > perf_max_events) { | 537 | if (atomic_read(&active_events) > armpmu.num_events) { |
506 | atomic_dec(&active_events); | 538 | atomic_dec(&active_events); |
507 | return ERR_PTR(-ENOSPC); | 539 | return -ENOSPC; |
508 | } | 540 | } |
509 | 541 | ||
510 | mutex_lock(&pmu_reserve_mutex); | 542 | mutex_lock(&pmu_reserve_mutex); |
@@ -518,17 +550,16 @@ hw_perf_event_init(struct perf_event *event) | |||
518 | } | 550 | } |
519 | 551 | ||
520 | if (err) | 552 | if (err) |
521 | return ERR_PTR(err); | 553 | return err; |
522 | 554 | ||
523 | err = __hw_perf_event_init(event); | 555 | err = __hw_perf_event_init(event); |
524 | if (err) | 556 | if (err) |
525 | hw_perf_event_destroy(event); | 557 | hw_perf_event_destroy(event); |
526 | 558 | ||
527 | return err ? ERR_PTR(err) : &pmu; | 559 | return err; |
528 | } | 560 | } |
529 | 561 | ||
530 | void | 562 | static void armpmu_enable(struct pmu *pmu) |
531 | hw_perf_enable(void) | ||
532 | { | 563 | { |
533 | /* Enable all of the perf events on hardware. */ | 564 | /* Enable all of the perf events on hardware. */ |
534 | int idx; | 565 | int idx; |
@@ -549,13 +580,23 @@ hw_perf_enable(void) | |||
549 | armpmu->start(); | 580 | armpmu->start(); |
550 | } | 581 | } |
551 | 582 | ||
552 | void | 583 | static void armpmu_disable(struct pmu *pmu) |
553 | hw_perf_disable(void) | ||
554 | { | 584 | { |
555 | if (armpmu) | 585 | if (armpmu) |
556 | armpmu->stop(); | 586 | armpmu->stop(); |
557 | } | 587 | } |
558 | 588 | ||
589 | static struct pmu pmu = { | ||
590 | .pmu_enable = armpmu_enable, | ||
591 | .pmu_disable = armpmu_disable, | ||
592 | .event_init = armpmu_event_init, | ||
593 | .add = armpmu_add, | ||
594 | .del = armpmu_del, | ||
595 | .start = armpmu_start, | ||
596 | .stop = armpmu_stop, | ||
597 | .read = armpmu_read, | ||
598 | }; | ||
599 | |||
559 | /* | 600 | /* |
560 | * ARMv6 Performance counter handling code. | 601 | * ARMv6 Performance counter handling code. |
561 | * | 602 | * |
@@ -2933,14 +2974,12 @@ init_hw_perf_events(void) | |||
2933 | armpmu = &armv6pmu; | 2974 | armpmu = &armv6pmu; |
2934 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, | 2975 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, |
2935 | sizeof(armv6_perf_cache_map)); | 2976 | sizeof(armv6_perf_cache_map)); |
2936 | perf_max_events = armv6pmu.num_events; | ||
2937 | break; | 2977 | break; |
2938 | case 0xB020: /* ARM11mpcore */ | 2978 | case 0xB020: /* ARM11mpcore */ |
2939 | armpmu = &armv6mpcore_pmu; | 2979 | armpmu = &armv6mpcore_pmu; |
2940 | memcpy(armpmu_perf_cache_map, | 2980 | memcpy(armpmu_perf_cache_map, |
2941 | armv6mpcore_perf_cache_map, | 2981 | armv6mpcore_perf_cache_map, |
2942 | sizeof(armv6mpcore_perf_cache_map)); | 2982 | sizeof(armv6mpcore_perf_cache_map)); |
2943 | perf_max_events = armv6mpcore_pmu.num_events; | ||
2944 | break; | 2983 | break; |
2945 | case 0xC080: /* Cortex-A8 */ | 2984 | case 0xC080: /* Cortex-A8 */ |
2946 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; | 2985 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; |
@@ -2952,7 +2991,6 @@ init_hw_perf_events(void) | |||
2952 | /* Reset PMNC and read the nb of CNTx counters | 2991 | /* Reset PMNC and read the nb of CNTx counters |
2953 | supported */ | 2992 | supported */ |
2954 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 2993 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2955 | perf_max_events = armv7pmu.num_events; | ||
2956 | break; | 2994 | break; |
2957 | case 0xC090: /* Cortex-A9 */ | 2995 | case 0xC090: /* Cortex-A9 */ |
2958 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; | 2996 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; |
@@ -2964,7 +3002,6 @@ init_hw_perf_events(void) | |||
2964 | /* Reset PMNC and read the nb of CNTx counters | 3002 | /* Reset PMNC and read the nb of CNTx counters |
2965 | supported */ | 3003 | supported */ |
2966 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 3004 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2967 | perf_max_events = armv7pmu.num_events; | ||
2968 | break; | 3005 | break; |
2969 | } | 3006 | } |
2970 | /* Intel CPUs [xscale]. */ | 3007 | /* Intel CPUs [xscale]. */ |
@@ -2975,13 +3012,11 @@ init_hw_perf_events(void) | |||
2975 | armpmu = &xscale1pmu; | 3012 | armpmu = &xscale1pmu; |
2976 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3013 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2977 | sizeof(xscale_perf_cache_map)); | 3014 | sizeof(xscale_perf_cache_map)); |
2978 | perf_max_events = xscale1pmu.num_events; | ||
2979 | break; | 3015 | break; |
2980 | case 2: | 3016 | case 2: |
2981 | armpmu = &xscale2pmu; | 3017 | armpmu = &xscale2pmu; |
2982 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3018 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2983 | sizeof(xscale_perf_cache_map)); | 3019 | sizeof(xscale_perf_cache_map)); |
2984 | perf_max_events = xscale2pmu.num_events; | ||
2985 | break; | 3020 | break; |
2986 | } | 3021 | } |
2987 | } | 3022 | } |
@@ -2991,9 +3026,10 @@ init_hw_perf_events(void) | |||
2991 | arm_pmu_names[armpmu->id], armpmu->num_events); | 3026 | arm_pmu_names[armpmu->id], armpmu->num_events); |
2992 | } else { | 3027 | } else { |
2993 | pr_info("no hardware support available\n"); | 3028 | pr_info("no hardware support available\n"); |
2994 | perf_max_events = -1; | ||
2995 | } | 3029 | } |
2996 | 3030 | ||
3031 | perf_pmu_register(&pmu); | ||
3032 | |||
2997 | return 0; | 3033 | return 0; |
2998 | } | 3034 | } |
2999 | arch_initcall(init_hw_perf_events); | 3035 | arch_initcall(init_hw_perf_events); |
@@ -3001,13 +3037,6 @@ arch_initcall(init_hw_perf_events); | |||
3001 | /* | 3037 | /* |
3002 | * Callchain handling code. | 3038 | * Callchain handling code. |
3003 | */ | 3039 | */ |
3004 | static inline void | ||
3005 | callchain_store(struct perf_callchain_entry *entry, | ||
3006 | u64 ip) | ||
3007 | { | ||
3008 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
3009 | entry->ip[entry->nr++] = ip; | ||
3010 | } | ||
3011 | 3040 | ||
3012 | /* | 3041 | /* |
3013 | * The registers we're interested in are at the end of the variable | 3042 | * The registers we're interested in are at the end of the variable |
@@ -3039,7 +3068,7 @@ user_backtrace(struct frame_tail *tail, | |||
3039 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) | 3068 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) |
3040 | return NULL; | 3069 | return NULL; |
3041 | 3070 | ||
3042 | callchain_store(entry, buftail.lr); | 3071 | perf_callchain_store(entry, buftail.lr); |
3043 | 3072 | ||
3044 | /* | 3073 | /* |
3045 | * Frame pointers should strictly progress back up the stack | 3074 | * Frame pointers should strictly progress back up the stack |
@@ -3051,16 +3080,11 @@ user_backtrace(struct frame_tail *tail, | |||
3051 | return buftail.fp - 1; | 3080 | return buftail.fp - 1; |
3052 | } | 3081 | } |
3053 | 3082 | ||
3054 | static void | 3083 | void |
3055 | perf_callchain_user(struct pt_regs *regs, | 3084 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3056 | struct perf_callchain_entry *entry) | ||
3057 | { | 3085 | { |
3058 | struct frame_tail *tail; | 3086 | struct frame_tail *tail; |
3059 | 3087 | ||
3060 | callchain_store(entry, PERF_CONTEXT_USER); | ||
3061 | |||
3062 | if (!user_mode(regs)) | ||
3063 | regs = task_pt_regs(current); | ||
3064 | 3088 | ||
3065 | tail = (struct frame_tail *)regs->ARM_fp - 1; | 3089 | tail = (struct frame_tail *)regs->ARM_fp - 1; |
3066 | 3090 | ||
@@ -3078,56 +3102,18 @@ callchain_trace(struct stackframe *fr, | |||
3078 | void *data) | 3102 | void *data) |
3079 | { | 3103 | { |
3080 | struct perf_callchain_entry *entry = data; | 3104 | struct perf_callchain_entry *entry = data; |
3081 | callchain_store(entry, fr->pc); | 3105 | perf_callchain_store(entry, fr->pc); |
3082 | return 0; | 3106 | return 0; |
3083 | } | 3107 | } |
3084 | 3108 | ||
3085 | static void | 3109 | void |
3086 | perf_callchain_kernel(struct pt_regs *regs, | 3110 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3087 | struct perf_callchain_entry *entry) | ||
3088 | { | 3111 | { |
3089 | struct stackframe fr; | 3112 | struct stackframe fr; |
3090 | 3113 | ||
3091 | callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
3092 | fr.fp = regs->ARM_fp; | 3114 | fr.fp = regs->ARM_fp; |
3093 | fr.sp = regs->ARM_sp; | 3115 | fr.sp = regs->ARM_sp; |
3094 | fr.lr = regs->ARM_lr; | 3116 | fr.lr = regs->ARM_lr; |
3095 | fr.pc = regs->ARM_pc; | 3117 | fr.pc = regs->ARM_pc; |
3096 | walk_stackframe(&fr, callchain_trace, entry); | 3118 | walk_stackframe(&fr, callchain_trace, entry); |
3097 | } | 3119 | } |
3098 | |||
3099 | static void | ||
3100 | perf_do_callchain(struct pt_regs *regs, | ||
3101 | struct perf_callchain_entry *entry) | ||
3102 | { | ||
3103 | int is_user; | ||
3104 | |||
3105 | if (!regs) | ||
3106 | return; | ||
3107 | |||
3108 | is_user = user_mode(regs); | ||
3109 | |||
3110 | if (!current || !current->pid) | ||
3111 | return; | ||
3112 | |||
3113 | if (is_user && current->state != TASK_RUNNING) | ||
3114 | return; | ||
3115 | |||
3116 | if (!is_user) | ||
3117 | perf_callchain_kernel(regs, entry); | ||
3118 | |||
3119 | if (current->mm) | ||
3120 | perf_callchain_user(regs, entry); | ||
3121 | } | ||
3122 | |||
3123 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
3124 | |||
3125 | struct perf_callchain_entry * | ||
3126 | perf_callchain(struct pt_regs *regs) | ||
3127 | { | ||
3128 | struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); | ||
3129 | |||
3130 | entry->nr = 0; | ||
3131 | perf_do_callchain(regs, entry); | ||
3132 | return entry; | ||
3133 | } | ||