diff options
author | Robert Richter <robert.richter@amd.com> | 2010-10-15 06:45:00 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2010-10-15 06:45:00 -0400 |
commit | 6268464b370e234e0255330190f9bd5d19386ad7 (patch) | |
tree | 5742641092ce64227dd2086d78baaede57da1f80 /arch/arm/kernel/perf_event.c | |
parent | 7df01d96b295e400167e78061b81d4c91630b12d (diff) | |
parent | 0fdf13606b67f830559abdaad15980c7f4f05ec4 (diff) |
Merge remote branch 'tip/perf/core' into oprofile/core
Conflicts:
arch/arm/oprofile/common.c
kernel/perf_event.c
Diffstat (limited to 'arch/arm/kernel/perf_event.c')
-rw-r--r-- | arch/arm/kernel/perf_event.c | 198 |
1 files changed, 92 insertions, 106 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ef3bc331518f..6cc6521881aa 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -227,46 +227,56 @@ again: | |||
227 | } | 227 | } |
228 | 228 | ||
229 | static void | 229 | static void |
230 | armpmu_disable(struct perf_event *event) | 230 | armpmu_read(struct perf_event *event) |
231 | { | 231 | { |
232 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
233 | struct hw_perf_event *hwc = &event->hw; | 232 | struct hw_perf_event *hwc = &event->hw; |
234 | int idx = hwc->idx; | ||
235 | |||
236 | WARN_ON(idx < 0); | ||
237 | |||
238 | clear_bit(idx, cpuc->active_mask); | ||
239 | armpmu->disable(hwc, idx); | ||
240 | |||
241 | barrier(); | ||
242 | 233 | ||
243 | armpmu_event_update(event, hwc, idx); | 234 | /* Don't read disabled counters! */ |
244 | cpuc->events[idx] = NULL; | 235 | if (hwc->idx < 0) |
245 | clear_bit(idx, cpuc->used_mask); | 236 | return; |
246 | 237 | ||
247 | perf_event_update_userpage(event); | 238 | armpmu_event_update(event, hwc, hwc->idx); |
248 | } | 239 | } |
249 | 240 | ||
250 | static void | 241 | static void |
251 | armpmu_read(struct perf_event *event) | 242 | armpmu_stop(struct perf_event *event, int flags) |
252 | { | 243 | { |
253 | struct hw_perf_event *hwc = &event->hw; | 244 | struct hw_perf_event *hwc = &event->hw; |
254 | 245 | ||
255 | /* Don't read disabled counters! */ | 246 | if (!armpmu) |
256 | if (hwc->idx < 0) | ||
257 | return; | 247 | return; |
258 | 248 | ||
259 | armpmu_event_update(event, hwc, hwc->idx); | 249 | /* |
250 | * ARM pmu always has to update the counter, so ignore | ||
251 | * PERF_EF_UPDATE, see comments in armpmu_start(). | ||
252 | */ | ||
253 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
254 | armpmu->disable(hwc, hwc->idx); | ||
255 | barrier(); /* why? */ | ||
256 | armpmu_event_update(event, hwc, hwc->idx); | ||
257 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
258 | } | ||
260 | } | 259 | } |
261 | 260 | ||
262 | static void | 261 | static void |
263 | armpmu_unthrottle(struct perf_event *event) | 262 | armpmu_start(struct perf_event *event, int flags) |
264 | { | 263 | { |
265 | struct hw_perf_event *hwc = &event->hw; | 264 | struct hw_perf_event *hwc = &event->hw; |
266 | 265 | ||
266 | if (!armpmu) | ||
267 | return; | ||
268 | |||
269 | /* | ||
270 | * ARM pmu always has to reprogram the period, so ignore | ||
271 | * PERF_EF_RELOAD, see the comment below. | ||
272 | */ | ||
273 | if (flags & PERF_EF_RELOAD) | ||
274 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
275 | |||
276 | hwc->state = 0; | ||
267 | /* | 277 | /* |
268 | * Set the period again. Some counters can't be stopped, so when we | 278 | * Set the period again. Some counters can't be stopped, so when we |
269 | * were throttled we simply disabled the IRQ source and the counter | 279 | * were stopped we simply disabled the IRQ source and the counter |
270 | * may have been left counting. If we don't do this step then we may | 280 | * may have been left counting. If we don't do this step then we may |
271 | * get an interrupt too soon or *way* too late if the overflow has | 281 | * get an interrupt too soon or *way* too late if the overflow has |
272 | * happened since disabling. | 282 | * happened since disabling. |
@@ -275,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event) | |||
275 | armpmu->enable(hwc, hwc->idx); | 285 | armpmu->enable(hwc, hwc->idx); |
276 | } | 286 | } |
277 | 287 | ||
288 | static void | ||
289 | armpmu_del(struct perf_event *event, int flags) | ||
290 | { | ||
291 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
292 | struct hw_perf_event *hwc = &event->hw; | ||
293 | int idx = hwc->idx; | ||
294 | |||
295 | WARN_ON(idx < 0); | ||
296 | |||
297 | clear_bit(idx, cpuc->active_mask); | ||
298 | armpmu_stop(event, PERF_EF_UPDATE); | ||
299 | cpuc->events[idx] = NULL; | ||
300 | clear_bit(idx, cpuc->used_mask); | ||
301 | |||
302 | perf_event_update_userpage(event); | ||
303 | } | ||
304 | |||
278 | static int | 305 | static int |
279 | armpmu_enable(struct perf_event *event) | 306 | armpmu_add(struct perf_event *event, int flags) |
280 | { | 307 | { |
281 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 308 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
282 | struct hw_perf_event *hwc = &event->hw; | 309 | struct hw_perf_event *hwc = &event->hw; |
283 | int idx; | 310 | int idx; |
284 | int err = 0; | 311 | int err = 0; |
285 | 312 | ||
313 | perf_pmu_disable(event->pmu); | ||
314 | |||
286 | /* If we don't have a space for the counter then finish early. */ | 315 | /* If we don't have a space for the counter then finish early. */ |
287 | idx = armpmu->get_event_idx(cpuc, hwc); | 316 | idx = armpmu->get_event_idx(cpuc, hwc); |
288 | if (idx < 0) { | 317 | if (idx < 0) { |
@@ -299,25 +328,19 @@ armpmu_enable(struct perf_event *event) | |||
299 | cpuc->events[idx] = event; | 328 | cpuc->events[idx] = event; |
300 | set_bit(idx, cpuc->active_mask); | 329 | set_bit(idx, cpuc->active_mask); |
301 | 330 | ||
302 | /* Set the period for the event. */ | 331 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
303 | armpmu_event_set_period(event, hwc, idx); | 332 | if (flags & PERF_EF_START) |
304 | 333 | armpmu_start(event, PERF_EF_RELOAD); | |
305 | /* Enable the event. */ | ||
306 | armpmu->enable(hwc, idx); | ||
307 | 334 | ||
308 | /* Propagate our changes to the userspace mapping. */ | 335 | /* Propagate our changes to the userspace mapping. */ |
309 | perf_event_update_userpage(event); | 336 | perf_event_update_userpage(event); |
310 | 337 | ||
311 | out: | 338 | out: |
339 | perf_pmu_enable(event->pmu); | ||
312 | return err; | 340 | return err; |
313 | } | 341 | } |
314 | 342 | ||
315 | static struct pmu pmu = { | 343 | static struct pmu pmu; |
316 | .enable = armpmu_enable, | ||
317 | .disable = armpmu_disable, | ||
318 | .unthrottle = armpmu_unthrottle, | ||
319 | .read = armpmu_read, | ||
320 | }; | ||
321 | 344 | ||
322 | static int | 345 | static int |
323 | validate_event(struct cpu_hw_events *cpuc, | 346 | validate_event(struct cpu_hw_events *cpuc, |
@@ -497,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event) | |||
497 | return err; | 520 | return err; |
498 | } | 521 | } |
499 | 522 | ||
500 | const struct pmu * | 523 | static int armpmu_event_init(struct perf_event *event) |
501 | hw_perf_event_init(struct perf_event *event) | ||
502 | { | 524 | { |
503 | int err = 0; | 525 | int err = 0; |
504 | 526 | ||
527 | switch (event->attr.type) { | ||
528 | case PERF_TYPE_RAW: | ||
529 | case PERF_TYPE_HARDWARE: | ||
530 | case PERF_TYPE_HW_CACHE: | ||
531 | break; | ||
532 | |||
533 | default: | ||
534 | return -ENOENT; | ||
535 | } | ||
536 | |||
505 | if (!armpmu) | 537 | if (!armpmu) |
506 | return ERR_PTR(-ENODEV); | 538 | return -ENODEV; |
507 | 539 | ||
508 | event->destroy = hw_perf_event_destroy; | 540 | event->destroy = hw_perf_event_destroy; |
509 | 541 | ||
510 | if (!atomic_inc_not_zero(&active_events)) { | 542 | if (!atomic_inc_not_zero(&active_events)) { |
511 | if (atomic_read(&active_events) > perf_max_events) { | 543 | if (atomic_read(&active_events) > armpmu->num_events) { |
512 | atomic_dec(&active_events); | 544 | atomic_dec(&active_events); |
513 | return ERR_PTR(-ENOSPC); | 545 | return -ENOSPC; |
514 | } | 546 | } |
515 | 547 | ||
516 | mutex_lock(&pmu_reserve_mutex); | 548 | mutex_lock(&pmu_reserve_mutex); |
@@ -524,17 +556,16 @@ hw_perf_event_init(struct perf_event *event) | |||
524 | } | 556 | } |
525 | 557 | ||
526 | if (err) | 558 | if (err) |
527 | return ERR_PTR(err); | 559 | return err; |
528 | 560 | ||
529 | err = __hw_perf_event_init(event); | 561 | err = __hw_perf_event_init(event); |
530 | if (err) | 562 | if (err) |
531 | hw_perf_event_destroy(event); | 563 | hw_perf_event_destroy(event); |
532 | 564 | ||
533 | return err ? ERR_PTR(err) : &pmu; | 565 | return err; |
534 | } | 566 | } |
535 | 567 | ||
536 | void | 568 | static void armpmu_enable(struct pmu *pmu) |
537 | hw_perf_enable(void) | ||
538 | { | 569 | { |
539 | /* Enable all of the perf events on hardware. */ | 570 | /* Enable all of the perf events on hardware. */ |
540 | int idx; | 571 | int idx; |
@@ -555,13 +586,23 @@ hw_perf_enable(void) | |||
555 | armpmu->start(); | 586 | armpmu->start(); |
556 | } | 587 | } |
557 | 588 | ||
558 | void | 589 | static void armpmu_disable(struct pmu *pmu) |
559 | hw_perf_disable(void) | ||
560 | { | 590 | { |
561 | if (armpmu) | 591 | if (armpmu) |
562 | armpmu->stop(); | 592 | armpmu->stop(); |
563 | } | 593 | } |
564 | 594 | ||
595 | static struct pmu pmu = { | ||
596 | .pmu_enable = armpmu_enable, | ||
597 | .pmu_disable = armpmu_disable, | ||
598 | .event_init = armpmu_event_init, | ||
599 | .add = armpmu_add, | ||
600 | .del = armpmu_del, | ||
601 | .start = armpmu_start, | ||
602 | .stop = armpmu_stop, | ||
603 | .read = armpmu_read, | ||
604 | }; | ||
605 | |||
565 | /* | 606 | /* |
566 | * ARMv6 Performance counter handling code. | 607 | * ARMv6 Performance counter handling code. |
567 | * | 608 | * |
@@ -2939,14 +2980,12 @@ init_hw_perf_events(void) | |||
2939 | armpmu = &armv6pmu; | 2980 | armpmu = &armv6pmu; |
2940 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, | 2981 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, |
2941 | sizeof(armv6_perf_cache_map)); | 2982 | sizeof(armv6_perf_cache_map)); |
2942 | perf_max_events = armv6pmu.num_events; | ||
2943 | break; | 2983 | break; |
2944 | case 0xB020: /* ARM11mpcore */ | 2984 | case 0xB020: /* ARM11mpcore */ |
2945 | armpmu = &armv6mpcore_pmu; | 2985 | armpmu = &armv6mpcore_pmu; |
2946 | memcpy(armpmu_perf_cache_map, | 2986 | memcpy(armpmu_perf_cache_map, |
2947 | armv6mpcore_perf_cache_map, | 2987 | armv6mpcore_perf_cache_map, |
2948 | sizeof(armv6mpcore_perf_cache_map)); | 2988 | sizeof(armv6mpcore_perf_cache_map)); |
2949 | perf_max_events = armv6mpcore_pmu.num_events; | ||
2950 | break; | 2989 | break; |
2951 | case 0xC080: /* Cortex-A8 */ | 2990 | case 0xC080: /* Cortex-A8 */ |
2952 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; | 2991 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; |
@@ -2958,7 +2997,6 @@ init_hw_perf_events(void) | |||
2958 | /* Reset PMNC and read the nb of CNTx counters | 2997 | /* Reset PMNC and read the nb of CNTx counters |
2959 | supported */ | 2998 | supported */ |
2960 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 2999 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2961 | perf_max_events = armv7pmu.num_events; | ||
2962 | break; | 3000 | break; |
2963 | case 0xC090: /* Cortex-A9 */ | 3001 | case 0xC090: /* Cortex-A9 */ |
2964 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; | 3002 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; |
@@ -2970,7 +3008,6 @@ init_hw_perf_events(void) | |||
2970 | /* Reset PMNC and read the nb of CNTx counters | 3008 | /* Reset PMNC and read the nb of CNTx counters |
2971 | supported */ | 3009 | supported */ |
2972 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 3010 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2973 | perf_max_events = armv7pmu.num_events; | ||
2974 | break; | 3011 | break; |
2975 | } | 3012 | } |
2976 | /* Intel CPUs [xscale]. */ | 3013 | /* Intel CPUs [xscale]. */ |
@@ -2981,13 +3018,11 @@ init_hw_perf_events(void) | |||
2981 | armpmu = &xscale1pmu; | 3018 | armpmu = &xscale1pmu; |
2982 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3019 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2983 | sizeof(xscale_perf_cache_map)); | 3020 | sizeof(xscale_perf_cache_map)); |
2984 | perf_max_events = xscale1pmu.num_events; | ||
2985 | break; | 3021 | break; |
2986 | case 2: | 3022 | case 2: |
2987 | armpmu = &xscale2pmu; | 3023 | armpmu = &xscale2pmu; |
2988 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | 3024 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, |
2989 | sizeof(xscale_perf_cache_map)); | 3025 | sizeof(xscale_perf_cache_map)); |
2990 | perf_max_events = xscale2pmu.num_events; | ||
2991 | break; | 3026 | break; |
2992 | } | 3027 | } |
2993 | } | 3028 | } |
@@ -2997,9 +3032,10 @@ init_hw_perf_events(void) | |||
2997 | arm_pmu_names[armpmu->id], armpmu->num_events); | 3032 | arm_pmu_names[armpmu->id], armpmu->num_events); |
2998 | } else { | 3033 | } else { |
2999 | pr_info("no hardware support available\n"); | 3034 | pr_info("no hardware support available\n"); |
3000 | perf_max_events = -1; | ||
3001 | } | 3035 | } |
3002 | 3036 | ||
3037 | perf_pmu_register(&pmu); | ||
3038 | |||
3003 | return 0; | 3039 | return 0; |
3004 | } | 3040 | } |
3005 | arch_initcall(init_hw_perf_events); | 3041 | arch_initcall(init_hw_perf_events); |
@@ -3007,13 +3043,6 @@ arch_initcall(init_hw_perf_events); | |||
3007 | /* | 3043 | /* |
3008 | * Callchain handling code. | 3044 | * Callchain handling code. |
3009 | */ | 3045 | */ |
3010 | static inline void | ||
3011 | callchain_store(struct perf_callchain_entry *entry, | ||
3012 | u64 ip) | ||
3013 | { | ||
3014 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
3015 | entry->ip[entry->nr++] = ip; | ||
3016 | } | ||
3017 | 3046 | ||
3018 | /* | 3047 | /* |
3019 | * The registers we're interested in are at the end of the variable | 3048 | * The registers we're interested in are at the end of the variable |
@@ -3045,7 +3074,7 @@ user_backtrace(struct frame_tail *tail, | |||
3045 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) | 3074 | if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) |
3046 | return NULL; | 3075 | return NULL; |
3047 | 3076 | ||
3048 | callchain_store(entry, buftail.lr); | 3077 | perf_callchain_store(entry, buftail.lr); |
3049 | 3078 | ||
3050 | /* | 3079 | /* |
3051 | * Frame pointers should strictly progress back up the stack | 3080 | * Frame pointers should strictly progress back up the stack |
@@ -3057,16 +3086,11 @@ user_backtrace(struct frame_tail *tail, | |||
3057 | return buftail.fp - 1; | 3086 | return buftail.fp - 1; |
3058 | } | 3087 | } |
3059 | 3088 | ||
3060 | static void | 3089 | void |
3061 | perf_callchain_user(struct pt_regs *regs, | 3090 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3062 | struct perf_callchain_entry *entry) | ||
3063 | { | 3091 | { |
3064 | struct frame_tail *tail; | 3092 | struct frame_tail *tail; |
3065 | 3093 | ||
3066 | callchain_store(entry, PERF_CONTEXT_USER); | ||
3067 | |||
3068 | if (!user_mode(regs)) | ||
3069 | regs = task_pt_regs(current); | ||
3070 | 3094 | ||
3071 | tail = (struct frame_tail *)regs->ARM_fp - 1; | 3095 | tail = (struct frame_tail *)regs->ARM_fp - 1; |
3072 | 3096 | ||
@@ -3084,56 +3108,18 @@ callchain_trace(struct stackframe *fr, | |||
3084 | void *data) | 3108 | void *data) |
3085 | { | 3109 | { |
3086 | struct perf_callchain_entry *entry = data; | 3110 | struct perf_callchain_entry *entry = data; |
3087 | callchain_store(entry, fr->pc); | 3111 | perf_callchain_store(entry, fr->pc); |
3088 | return 0; | 3112 | return 0; |
3089 | } | 3113 | } |
3090 | 3114 | ||
3091 | static void | 3115 | void |
3092 | perf_callchain_kernel(struct pt_regs *regs, | 3116 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) |
3093 | struct perf_callchain_entry *entry) | ||
3094 | { | 3117 | { |
3095 | struct stackframe fr; | 3118 | struct stackframe fr; |
3096 | 3119 | ||
3097 | callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
3098 | fr.fp = regs->ARM_fp; | 3120 | fr.fp = regs->ARM_fp; |
3099 | fr.sp = regs->ARM_sp; | 3121 | fr.sp = regs->ARM_sp; |
3100 | fr.lr = regs->ARM_lr; | 3122 | fr.lr = regs->ARM_lr; |
3101 | fr.pc = regs->ARM_pc; | 3123 | fr.pc = regs->ARM_pc; |
3102 | walk_stackframe(&fr, callchain_trace, entry); | 3124 | walk_stackframe(&fr, callchain_trace, entry); |
3103 | } | 3125 | } |
3104 | |||
3105 | static void | ||
3106 | perf_do_callchain(struct pt_regs *regs, | ||
3107 | struct perf_callchain_entry *entry) | ||
3108 | { | ||
3109 | int is_user; | ||
3110 | |||
3111 | if (!regs) | ||
3112 | return; | ||
3113 | |||
3114 | is_user = user_mode(regs); | ||
3115 | |||
3116 | if (!current || !current->pid) | ||
3117 | return; | ||
3118 | |||
3119 | if (is_user && current->state != TASK_RUNNING) | ||
3120 | return; | ||
3121 | |||
3122 | if (!is_user) | ||
3123 | perf_callchain_kernel(regs, entry); | ||
3124 | |||
3125 | if (current->mm) | ||
3126 | perf_callchain_user(regs, entry); | ||
3127 | } | ||
3128 | |||
3129 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
3130 | |||
3131 | struct perf_callchain_entry * | ||
3132 | perf_callchain(struct pt_regs *regs) | ||
3133 | { | ||
3134 | struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); | ||
3135 | |||
3136 | entry->nr = 0; | ||
3137 | perf_do_callchain(regs, entry); | ||
3138 | return entry; | ||
3139 | } | ||