diff options
author | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 07:49:52 -0500 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 07:49:52 -0500 |
commit | b9e0e5a9e075575cc47940da8271d4908d3ae9c3 (patch) | |
tree | 1ad6cafc584265817fc7ff772dc6c27745c7e55d /drivers/bus | |
parent | c3e6dc65f2ce83dacc0a18104bf44931e7eb8a5d (diff) | |
parent | af66abfe2ec8bd82211e9e4f036a64c902ff4cdb (diff) |
Merge tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into next/drivers
Pull "ARM: perf: updates for 3.19" from Will Deacon:
This patch series takes us slightly further on the road to big.LITTLE
support in perf. The main change enabling this is moving the CCI PMU
driver away from the arm-pmu abstraction, allowing the arch code to
focus specifically on support for CPU PMUs.
* tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux:
arm: perf: fold hotplug notifier into arm_pmu
arm: perf: dynamically allocate cpu hardware data
arm: perf: fold percpu_pmu into pmu_hw_events
arm: perf: kill get_hw_events()
arm: perf: limit size of accounting data
arm: perf: use IDR types for CPU PMUs
arm: perf: make PMU probing data-driven
arm: perf: add missing pr_info newlines
arm: perf: factor out callchain code
ARM: perf: use pr_* instead of printk
ARM: perf: remove useless return and check of idx in counter handling
bus: cci: move away from arm_pmu framework
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'drivers/bus')
-rw-r--r-- | drivers/bus/arm-cci.c | 552 |
1 files changed, 444 insertions, 108 deletions
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 7af78df241f2..860da40b78ef 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c | |||
@@ -16,17 +16,17 @@ | |||
16 | 16 | ||
17 | #include <linux/arm-cci.h> | 17 | #include <linux/arm-cci.h> |
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/interrupt.h> | ||
19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
20 | #include <linux/of_address.h> | 21 | #include <linux/of_address.h> |
21 | #include <linux/of_irq.h> | 22 | #include <linux/of_irq.h> |
22 | #include <linux/of_platform.h> | 23 | #include <linux/of_platform.h> |
24 | #include <linux/perf_event.h> | ||
23 | #include <linux/platform_device.h> | 25 | #include <linux/platform_device.h> |
24 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
25 | #include <linux/spinlock.h> | 27 | #include <linux/spinlock.h> |
26 | 28 | ||
27 | #include <asm/cacheflush.h> | 29 | #include <asm/cacheflush.h> |
28 | #include <asm/irq_regs.h> | ||
29 | #include <asm/pmu.h> | ||
30 | #include <asm/smp_plat.h> | 30 | #include <asm/smp_plat.h> |
31 | 31 | ||
32 | #define DRIVER_NAME "CCI-400" | 32 | #define DRIVER_NAME "CCI-400" |
@@ -98,6 +98,8 @@ static unsigned long cci_ctrl_phys; | |||
98 | 98 | ||
99 | #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) | 99 | #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) |
100 | 100 | ||
101 | #define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) | ||
102 | |||
101 | /* | 103 | /* |
102 | * Instead of an event id to monitor CCI cycles, a dedicated counter is | 104 | * Instead of an event id to monitor CCI cycles, a dedicated counter is |
103 | * provided. Use 0xff to represent CCI cycles and hope that no future revisions | 105 | * provided. Use 0xff to represent CCI cycles and hope that no future revisions |
@@ -170,18 +172,29 @@ static char *const pmu_names[] = { | |||
170 | [CCI_REV_R1] = "CCI_400_r1", | 172 | [CCI_REV_R1] = "CCI_400_r1", |
171 | }; | 173 | }; |
172 | 174 | ||
173 | struct cci_pmu_drv_data { | 175 | struct cci_pmu_hw_events { |
176 | struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; | ||
177 | unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; | ||
178 | raw_spinlock_t pmu_lock; | ||
179 | }; | ||
180 | |||
181 | struct cci_pmu { | ||
174 | void __iomem *base; | 182 | void __iomem *base; |
175 | struct arm_pmu *cci_pmu; | 183 | struct pmu pmu; |
176 | int nr_irqs; | 184 | int nr_irqs; |
177 | int irqs[CCI_PMU_MAX_HW_EVENTS]; | 185 | int irqs[CCI_PMU_MAX_HW_EVENTS]; |
178 | unsigned long active_irqs; | 186 | unsigned long active_irqs; |
179 | struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; | ||
180 | unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; | ||
181 | struct pmu_port_event_ranges *port_ranges; | 187 | struct pmu_port_event_ranges *port_ranges; |
182 | struct pmu_hw_events hw_events; | 188 | struct cci_pmu_hw_events hw_events; |
189 | struct platform_device *plat_device; | ||
190 | int num_events; | ||
191 | atomic_t active_events; | ||
192 | struct mutex reserve_mutex; | ||
193 | cpumask_t cpus; | ||
183 | }; | 194 | }; |
184 | static struct cci_pmu_drv_data *pmu; | 195 | static struct cci_pmu *pmu; |
196 | |||
197 | #define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) | ||
185 | 198 | ||
186 | static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) | 199 | static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) |
187 | { | 200 | { |
@@ -252,7 +265,7 @@ static int pmu_validate_hw_event(u8 hw_event) | |||
252 | return -ENOENT; | 265 | return -ENOENT; |
253 | } | 266 | } |
254 | 267 | ||
255 | static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx) | 268 | static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) |
256 | { | 269 | { |
257 | return CCI_PMU_CYCLE_CNTR_IDX <= idx && | 270 | return CCI_PMU_CYCLE_CNTR_IDX <= idx && |
258 | idx <= CCI_PMU_CNTR_LAST(cci_pmu); | 271 | idx <= CCI_PMU_CNTR_LAST(cci_pmu); |
@@ -293,14 +306,9 @@ static u32 pmu_get_max_counters(void) | |||
293 | return n_cnts + 1; | 306 | return n_cnts + 1; |
294 | } | 307 | } |
295 | 308 | ||
296 | static struct pmu_hw_events *pmu_get_hw_events(void) | 309 | static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event) |
297 | { | ||
298 | return &pmu->hw_events; | ||
299 | } | ||
300 | |||
301 | static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) | ||
302 | { | 310 | { |
303 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 311 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
304 | struct hw_perf_event *hw_event = &event->hw; | 312 | struct hw_perf_event *hw_event = &event->hw; |
305 | unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; | 313 | unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; |
306 | int idx; | 314 | int idx; |
@@ -336,7 +344,7 @@ static int pmu_map_event(struct perf_event *event) | |||
336 | return mapping; | 344 | return mapping; |
337 | } | 345 | } |
338 | 346 | ||
339 | static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) | 347 | static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler) |
340 | { | 348 | { |
341 | int i; | 349 | int i; |
342 | struct platform_device *pmu_device = cci_pmu->plat_device; | 350 | struct platform_device *pmu_device = cci_pmu->plat_device; |
@@ -371,17 +379,91 @@ static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) | |||
371 | return 0; | 379 | return 0; |
372 | } | 380 | } |
373 | 381 | ||
382 | static void pmu_free_irq(struct cci_pmu *cci_pmu) | ||
383 | { | ||
384 | int i; | ||
385 | |||
386 | for (i = 0; i < pmu->nr_irqs; i++) { | ||
387 | if (!test_and_clear_bit(i, &pmu->active_irqs)) | ||
388 | continue; | ||
389 | |||
390 | free_irq(pmu->irqs[i], cci_pmu); | ||
391 | } | ||
392 | } | ||
393 | |||
394 | static u32 pmu_read_counter(struct perf_event *event) | ||
395 | { | ||
396 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
397 | struct hw_perf_event *hw_counter = &event->hw; | ||
398 | int idx = hw_counter->idx; | ||
399 | u32 value; | ||
400 | |||
401 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | ||
402 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | ||
403 | return 0; | ||
404 | } | ||
405 | value = pmu_read_register(idx, CCI_PMU_CNTR); | ||
406 | |||
407 | return value; | ||
408 | } | ||
409 | |||
410 | static void pmu_write_counter(struct perf_event *event, u32 value) | ||
411 | { | ||
412 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
413 | struct hw_perf_event *hw_counter = &event->hw; | ||
414 | int idx = hw_counter->idx; | ||
415 | |||
416 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) | ||
417 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | ||
418 | else | ||
419 | pmu_write_register(value, idx, CCI_PMU_CNTR); | ||
420 | } | ||
421 | |||
422 | static u64 pmu_event_update(struct perf_event *event) | ||
423 | { | ||
424 | struct hw_perf_event *hwc = &event->hw; | ||
425 | u64 delta, prev_raw_count, new_raw_count; | ||
426 | |||
427 | do { | ||
428 | prev_raw_count = local64_read(&hwc->prev_count); | ||
429 | new_raw_count = pmu_read_counter(event); | ||
430 | } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
431 | new_raw_count) != prev_raw_count); | ||
432 | |||
433 | delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK; | ||
434 | |||
435 | local64_add(delta, &event->count); | ||
436 | |||
437 | return new_raw_count; | ||
438 | } | ||
439 | |||
440 | static void pmu_read(struct perf_event *event) | ||
441 | { | ||
442 | pmu_event_update(event); | ||
443 | } | ||
444 | |||
445 | void pmu_event_set_period(struct perf_event *event) | ||
446 | { | ||
447 | struct hw_perf_event *hwc = &event->hw; | ||
448 | /* | ||
449 | * The CCI PMU counters have a period of 2^32. To account for the | ||
450 | * possiblity of extreme interrupt latency we program for a period of | ||
451 | * half that. Hopefully we can handle the interrupt before another 2^31 | ||
452 | * events occur and the counter overtakes its previous value. | ||
453 | */ | ||
454 | u64 val = 1ULL << 31; | ||
455 | local64_set(&hwc->prev_count, val); | ||
456 | pmu_write_counter(event, val); | ||
457 | } | ||
458 | |||
374 | static irqreturn_t pmu_handle_irq(int irq_num, void *dev) | 459 | static irqreturn_t pmu_handle_irq(int irq_num, void *dev) |
375 | { | 460 | { |
376 | unsigned long flags; | 461 | unsigned long flags; |
377 | struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; | 462 | struct cci_pmu *cci_pmu = dev; |
378 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 463 | struct cci_pmu_hw_events *events = &pmu->hw_events; |
379 | struct perf_sample_data data; | ||
380 | struct pt_regs *regs; | ||
381 | int idx, handled = IRQ_NONE; | 464 | int idx, handled = IRQ_NONE; |
382 | 465 | ||
383 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 466 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
384 | regs = get_irq_regs(); | ||
385 | /* | 467 | /* |
386 | * Iterate over counters and update the corresponding perf events. | 468 | * Iterate over counters and update the corresponding perf events. |
387 | * This should work regardless of whether we have per-counter overflow | 469 | * This should work regardless of whether we have per-counter overflow |
@@ -403,154 +485,407 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) | |||
403 | 485 | ||
404 | pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); | 486 | pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); |
405 | 487 | ||
488 | pmu_event_update(event); | ||
489 | pmu_event_set_period(event); | ||
406 | handled = IRQ_HANDLED; | 490 | handled = IRQ_HANDLED; |
407 | |||
408 | armpmu_event_update(event); | ||
409 | perf_sample_data_init(&data, 0, hw_counter->last_period); | ||
410 | if (!armpmu_event_set_period(event)) | ||
411 | continue; | ||
412 | |||
413 | if (perf_event_overflow(event, &data, regs)) | ||
414 | cci_pmu->disable(event); | ||
415 | } | 491 | } |
416 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 492 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
417 | 493 | ||
418 | return IRQ_RETVAL(handled); | 494 | return IRQ_RETVAL(handled); |
419 | } | 495 | } |
420 | 496 | ||
421 | static void pmu_free_irq(struct arm_pmu *cci_pmu) | 497 | static int cci_pmu_get_hw(struct cci_pmu *cci_pmu) |
422 | { | 498 | { |
423 | int i; | 499 | int ret = pmu_request_irq(cci_pmu, pmu_handle_irq); |
500 | if (ret) { | ||
501 | pmu_free_irq(cci_pmu); | ||
502 | return ret; | ||
503 | } | ||
504 | return 0; | ||
505 | } | ||
424 | 506 | ||
425 | for (i = 0; i < pmu->nr_irqs; i++) { | 507 | static void cci_pmu_put_hw(struct cci_pmu *cci_pmu) |
426 | if (!test_and_clear_bit(i, &pmu->active_irqs)) | 508 | { |
427 | continue; | 509 | pmu_free_irq(cci_pmu); |
510 | } | ||
428 | 511 | ||
429 | free_irq(pmu->irqs[i], cci_pmu); | 512 | static void hw_perf_event_destroy(struct perf_event *event) |
513 | { | ||
514 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
515 | atomic_t *active_events = &cci_pmu->active_events; | ||
516 | struct mutex *reserve_mutex = &cci_pmu->reserve_mutex; | ||
517 | |||
518 | if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) { | ||
519 | cci_pmu_put_hw(cci_pmu); | ||
520 | mutex_unlock(reserve_mutex); | ||
430 | } | 521 | } |
431 | } | 522 | } |
432 | 523 | ||
433 | static void pmu_enable_event(struct perf_event *event) | 524 | static void cci_pmu_enable(struct pmu *pmu) |
434 | { | 525 | { |
526 | struct cci_pmu *cci_pmu = to_cci_pmu(pmu); | ||
527 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
528 | int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events); | ||
435 | unsigned long flags; | 529 | unsigned long flags; |
436 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 530 | u32 val; |
437 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 531 | |
438 | struct hw_perf_event *hw_counter = &event->hw; | 532 | if (!enabled) |
439 | int idx = hw_counter->idx; | 533 | return; |
534 | |||
535 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); | ||
536 | |||
537 | /* Enable all the PMU counters. */ | ||
538 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; | ||
539 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
540 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); | ||
541 | |||
542 | } | ||
543 | |||
544 | static void cci_pmu_disable(struct pmu *pmu) | ||
545 | { | ||
546 | struct cci_pmu *cci_pmu = to_cci_pmu(pmu); | ||
547 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
548 | unsigned long flags; | ||
549 | u32 val; | ||
550 | |||
551 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); | ||
552 | |||
553 | /* Disable all the PMU counters. */ | ||
554 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; | ||
555 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
556 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); | ||
557 | } | ||
558 | |||
559 | static void cci_pmu_start(struct perf_event *event, int pmu_flags) | ||
560 | { | ||
561 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
562 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
563 | struct hw_perf_event *hwc = &event->hw; | ||
564 | int idx = hwc->idx; | ||
565 | unsigned long flags; | ||
566 | |||
567 | /* | ||
568 | * To handle interrupt latency, we always reprogram the period | ||
569 | * regardlesss of PERF_EF_RELOAD. | ||
570 | */ | ||
571 | if (pmu_flags & PERF_EF_RELOAD) | ||
572 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
573 | |||
574 | hwc->state = 0; | ||
440 | 575 | ||
441 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 576 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { |
442 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 577 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); |
443 | return; | 578 | return; |
444 | } | 579 | } |
445 | 580 | ||
446 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 581 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); |
447 | 582 | ||
448 | /* Configure the event to count, unless you are counting cycles */ | 583 | /* Configure the event to count, unless you are counting cycles */ |
449 | if (idx != CCI_PMU_CYCLE_CNTR_IDX) | 584 | if (idx != CCI_PMU_CYCLE_CNTR_IDX) |
450 | pmu_set_event(idx, hw_counter->config_base); | 585 | pmu_set_event(idx, hwc->config_base); |
451 | 586 | ||
587 | pmu_event_set_period(event); | ||
452 | pmu_enable_counter(idx); | 588 | pmu_enable_counter(idx); |
453 | 589 | ||
454 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 590 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); |
455 | } | 591 | } |
456 | 592 | ||
457 | static void pmu_disable_event(struct perf_event *event) | 593 | static void cci_pmu_stop(struct perf_event *event, int pmu_flags) |
458 | { | 594 | { |
459 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 595 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
460 | struct hw_perf_event *hw_counter = &event->hw; | 596 | struct hw_perf_event *hwc = &event->hw; |
461 | int idx = hw_counter->idx; | 597 | int idx = hwc->idx; |
598 | |||
599 | if (hwc->state & PERF_HES_STOPPED) | ||
600 | return; | ||
462 | 601 | ||
463 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 602 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { |
464 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 603 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); |
465 | return; | 604 | return; |
466 | } | 605 | } |
467 | 606 | ||
607 | /* | ||
608 | * We always reprogram the counter, so ignore PERF_EF_UPDATE. See | ||
609 | * cci_pmu_start() | ||
610 | */ | ||
468 | pmu_disable_counter(idx); | 611 | pmu_disable_counter(idx); |
612 | pmu_event_update(event); | ||
613 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
469 | } | 614 | } |
470 | 615 | ||
471 | static void pmu_start(struct arm_pmu *cci_pmu) | 616 | static int cci_pmu_add(struct perf_event *event, int flags) |
472 | { | 617 | { |
473 | u32 val; | 618 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
474 | unsigned long flags; | 619 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; |
475 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 620 | struct hw_perf_event *hwc = &event->hw; |
621 | int idx; | ||
622 | int err = 0; | ||
476 | 623 | ||
477 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 624 | perf_pmu_disable(event->pmu); |
478 | 625 | ||
479 | /* Enable all the PMU counters. */ | 626 | /* If we don't have a space for the counter then finish early. */ |
480 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; | 627 | idx = pmu_get_event_idx(hw_events, event); |
481 | writel(val, cci_ctrl_base + CCI_PMCR); | 628 | if (idx < 0) { |
629 | err = idx; | ||
630 | goto out; | ||
631 | } | ||
482 | 632 | ||
483 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 633 | event->hw.idx = idx; |
634 | hw_events->events[idx] = event; | ||
635 | |||
636 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
637 | if (flags & PERF_EF_START) | ||
638 | cci_pmu_start(event, PERF_EF_RELOAD); | ||
639 | |||
640 | /* Propagate our changes to the userspace mapping. */ | ||
641 | perf_event_update_userpage(event); | ||
642 | |||
643 | out: | ||
644 | perf_pmu_enable(event->pmu); | ||
645 | return err; | ||
484 | } | 646 | } |
485 | 647 | ||
486 | static void pmu_stop(struct arm_pmu *cci_pmu) | 648 | static void cci_pmu_del(struct perf_event *event, int flags) |
487 | { | 649 | { |
488 | u32 val; | 650 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
489 | unsigned long flags; | 651 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; |
490 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 652 | struct hw_perf_event *hwc = &event->hw; |
653 | int idx = hwc->idx; | ||
491 | 654 | ||
492 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 655 | cci_pmu_stop(event, PERF_EF_UPDATE); |
656 | hw_events->events[idx] = NULL; | ||
657 | clear_bit(idx, hw_events->used_mask); | ||
493 | 658 | ||
494 | /* Disable all the PMU counters. */ | 659 | perf_event_update_userpage(event); |
495 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; | 660 | } |
496 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
497 | 661 | ||
498 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 662 | static int |
663 | validate_event(struct cci_pmu_hw_events *hw_events, | ||
664 | struct perf_event *event) | ||
665 | { | ||
666 | if (is_software_event(event)) | ||
667 | return 1; | ||
668 | |||
669 | if (event->state < PERF_EVENT_STATE_OFF) | ||
670 | return 1; | ||
671 | |||
672 | if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) | ||
673 | return 1; | ||
674 | |||
675 | return pmu_get_event_idx(hw_events, event) >= 0; | ||
499 | } | 676 | } |
500 | 677 | ||
501 | static u32 pmu_read_counter(struct perf_event *event) | 678 | static int |
679 | validate_group(struct perf_event *event) | ||
502 | { | 680 | { |
503 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 681 | struct perf_event *sibling, *leader = event->group_leader; |
504 | struct hw_perf_event *hw_counter = &event->hw; | 682 | struct cci_pmu_hw_events fake_pmu = { |
505 | int idx = hw_counter->idx; | 683 | /* |
506 | u32 value; | 684 | * Initialise the fake PMU. We only need to populate the |
685 | * used_mask for the purposes of validation. | ||
686 | */ | ||
687 | .used_mask = CPU_BITS_NONE, | ||
688 | }; | ||
507 | 689 | ||
508 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 690 | if (!validate_event(&fake_pmu, leader)) |
509 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 691 | return -EINVAL; |
510 | return 0; | 692 | |
693 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
694 | if (!validate_event(&fake_pmu, sibling)) | ||
695 | return -EINVAL; | ||
511 | } | 696 | } |
512 | value = pmu_read_register(idx, CCI_PMU_CNTR); | ||
513 | 697 | ||
514 | return value; | 698 | if (!validate_event(&fake_pmu, event)) |
699 | return -EINVAL; | ||
700 | |||
701 | return 0; | ||
515 | } | 702 | } |
516 | 703 | ||
517 | static void pmu_write_counter(struct perf_event *event, u32 value) | 704 | static int |
705 | __hw_perf_event_init(struct perf_event *event) | ||
518 | { | 706 | { |
519 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 707 | struct hw_perf_event *hwc = &event->hw; |
520 | struct hw_perf_event *hw_counter = &event->hw; | 708 | int mapping; |
521 | int idx = hw_counter->idx; | ||
522 | 709 | ||
523 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) | 710 | mapping = pmu_map_event(event); |
524 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 711 | |
525 | else | 712 | if (mapping < 0) { |
526 | pmu_write_register(value, idx, CCI_PMU_CNTR); | 713 | pr_debug("event %x:%llx not supported\n", event->attr.type, |
714 | event->attr.config); | ||
715 | return mapping; | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * We don't assign an index until we actually place the event onto | ||
720 | * hardware. Use -1 to signify that we haven't decided where to put it | ||
721 | * yet. | ||
722 | */ | ||
723 | hwc->idx = -1; | ||
724 | hwc->config_base = 0; | ||
725 | hwc->config = 0; | ||
726 | hwc->event_base = 0; | ||
727 | |||
728 | /* | ||
729 | * Store the event encoding into the config_base field. | ||
730 | */ | ||
731 | hwc->config_base |= (unsigned long)mapping; | ||
732 | |||
733 | /* | ||
734 | * Limit the sample_period to half of the counter width. That way, the | ||
735 | * new counter value is far less likely to overtake the previous one | ||
736 | * unless you have some serious IRQ latency issues. | ||
737 | */ | ||
738 | hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; | ||
739 | hwc->last_period = hwc->sample_period; | ||
740 | local64_set(&hwc->period_left, hwc->sample_period); | ||
741 | |||
742 | if (event->group_leader != event) { | ||
743 | if (validate_group(event) != 0) | ||
744 | return -EINVAL; | ||
745 | } | ||
746 | |||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int cci_pmu_event_init(struct perf_event *event) | ||
751 | { | ||
752 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
753 | atomic_t *active_events = &cci_pmu->active_events; | ||
754 | int err = 0; | ||
755 | int cpu; | ||
756 | |||
757 | if (event->attr.type != event->pmu->type) | ||
758 | return -ENOENT; | ||
759 | |||
760 | /* Shared by all CPUs, no meaningful state to sample */ | ||
761 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) | ||
762 | return -EOPNOTSUPP; | ||
763 | |||
764 | /* We have no filtering of any kind */ | ||
765 | if (event->attr.exclude_user || | ||
766 | event->attr.exclude_kernel || | ||
767 | event->attr.exclude_hv || | ||
768 | event->attr.exclude_idle || | ||
769 | event->attr.exclude_host || | ||
770 | event->attr.exclude_guest) | ||
771 | return -EINVAL; | ||
772 | |||
773 | /* | ||
774 | * Following the example set by other "uncore" PMUs, we accept any CPU | ||
775 | * and rewrite its affinity dynamically rather than having perf core | ||
776 | * handle cpu == -1 and pid == -1 for this case. | ||
777 | * | ||
778 | * The perf core will pin online CPUs for the duration of this call and | ||
779 | * the event being installed into its context, so the PMU's CPU can't | ||
780 | * change under our feet. | ||
781 | */ | ||
782 | cpu = cpumask_first(&cci_pmu->cpus); | ||
783 | if (event->cpu < 0 || cpu < 0) | ||
784 | return -EINVAL; | ||
785 | event->cpu = cpu; | ||
786 | |||
787 | event->destroy = hw_perf_event_destroy; | ||
788 | if (!atomic_inc_not_zero(active_events)) { | ||
789 | mutex_lock(&cci_pmu->reserve_mutex); | ||
790 | if (atomic_read(active_events) == 0) | ||
791 | err = cci_pmu_get_hw(cci_pmu); | ||
792 | if (!err) | ||
793 | atomic_inc(active_events); | ||
794 | mutex_unlock(&cci_pmu->reserve_mutex); | ||
795 | } | ||
796 | if (err) | ||
797 | return err; | ||
798 | |||
799 | err = __hw_perf_event_init(event); | ||
800 | if (err) | ||
801 | hw_perf_event_destroy(event); | ||
802 | |||
803 | return err; | ||
527 | } | 804 | } |
528 | 805 | ||
529 | static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev) | 806 | static ssize_t pmu_attr_cpumask_show(struct device *dev, |
807 | struct device_attribute *attr, char *buf) | ||
530 | { | 808 | { |
531 | *cci_pmu = (struct arm_pmu){ | 809 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &pmu->cpus); |
532 | .name = pmu_names[probe_cci_revision()], | 810 | |
533 | .max_period = (1LLU << 32) - 1, | 811 | buf[n++] = '\n'; |
534 | .get_hw_events = pmu_get_hw_events, | 812 | buf[n] = '\0'; |
535 | .get_event_idx = pmu_get_event_idx, | 813 | return n; |
536 | .map_event = pmu_map_event, | 814 | } |
537 | .request_irq = pmu_request_irq, | 815 | |
538 | .handle_irq = pmu_handle_irq, | 816 | static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL); |
539 | .free_irq = pmu_free_irq, | 817 | |
540 | .enable = pmu_enable_event, | 818 | static struct attribute *pmu_attrs[] = { |
541 | .disable = pmu_disable_event, | 819 | &dev_attr_cpumask.attr, |
542 | .start = pmu_start, | 820 | NULL, |
543 | .stop = pmu_stop, | 821 | }; |
544 | .read_counter = pmu_read_counter, | 822 | |
545 | .write_counter = pmu_write_counter, | 823 | static struct attribute_group pmu_attr_group = { |
824 | .attrs = pmu_attrs, | ||
825 | }; | ||
826 | |||
827 | static const struct attribute_group *pmu_attr_groups[] = { | ||
828 | &pmu_attr_group, | ||
829 | NULL | ||
830 | }; | ||
831 | |||
832 | static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) | ||
833 | { | ||
834 | char *name = pmu_names[probe_cci_revision()]; | ||
835 | cci_pmu->pmu = (struct pmu) { | ||
836 | .name = pmu_names[probe_cci_revision()], | ||
837 | .task_ctx_nr = perf_invalid_context, | ||
838 | .pmu_enable = cci_pmu_enable, | ||
839 | .pmu_disable = cci_pmu_disable, | ||
840 | .event_init = cci_pmu_event_init, | ||
841 | .add = cci_pmu_add, | ||
842 | .del = cci_pmu_del, | ||
843 | .start = cci_pmu_start, | ||
844 | .stop = cci_pmu_stop, | ||
845 | .read = pmu_read, | ||
846 | .attr_groups = pmu_attr_groups, | ||
546 | }; | 847 | }; |
547 | 848 | ||
548 | cci_pmu->plat_device = pdev; | 849 | cci_pmu->plat_device = pdev; |
549 | cci_pmu->num_events = pmu_get_max_counters(); | 850 | cci_pmu->num_events = pmu_get_max_counters(); |
550 | 851 | ||
551 | return armpmu_register(cci_pmu, -1); | 852 | return perf_pmu_register(&cci_pmu->pmu, name, -1); |
552 | } | 853 | } |
553 | 854 | ||
855 | static int cci_pmu_cpu_notifier(struct notifier_block *self, | ||
856 | unsigned long action, void *hcpu) | ||
857 | { | ||
858 | unsigned int cpu = (long)hcpu; | ||
859 | unsigned int target; | ||
860 | |||
861 | switch (action & ~CPU_TASKS_FROZEN) { | ||
862 | case CPU_DOWN_PREPARE: | ||
863 | if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus)) | ||
864 | break; | ||
865 | target = cpumask_any_but(cpu_online_mask, cpu); | ||
866 | if (target < 0) // UP, last CPU | ||
867 | break; | ||
868 | /* | ||
869 | * TODO: migrate context once core races on event->ctx have | ||
870 | * been fixed. | ||
871 | */ | ||
872 | cpumask_set_cpu(target, &pmu->cpus); | ||
873 | default: | ||
874 | break; | ||
875 | } | ||
876 | |||
877 | return NOTIFY_OK; | ||
878 | } | ||
879 | |||
880 | static struct notifier_block cci_pmu_cpu_nb = { | ||
881 | .notifier_call = cci_pmu_cpu_notifier, | ||
882 | /* | ||
883 | * to migrate uncore events, our notifier should be executed | ||
884 | * before perf core's notifier. | ||
885 | */ | ||
886 | .priority = CPU_PRI_PERF + 1, | ||
887 | }; | ||
888 | |||
554 | static const struct of_device_id arm_cci_pmu_matches[] = { | 889 | static const struct of_device_id arm_cci_pmu_matches[] = { |
555 | { | 890 | { |
556 | .compatible = "arm,cci-400-pmu", | 891 | .compatible = "arm,cci-400-pmu", |
@@ -604,15 +939,16 @@ static int cci_pmu_probe(struct platform_device *pdev) | |||
604 | return -EINVAL; | 939 | return -EINVAL; |
605 | } | 940 | } |
606 | 941 | ||
607 | pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL); | ||
608 | if (!pmu->cci_pmu) | ||
609 | return -ENOMEM; | ||
610 | |||
611 | pmu->hw_events.events = pmu->events; | ||
612 | pmu->hw_events.used_mask = pmu->used_mask; | ||
613 | raw_spin_lock_init(&pmu->hw_events.pmu_lock); | 942 | raw_spin_lock_init(&pmu->hw_events.pmu_lock); |
943 | mutex_init(&pmu->reserve_mutex); | ||
944 | atomic_set(&pmu->active_events, 0); | ||
945 | cpumask_set_cpu(smp_processor_id(), &pmu->cpus); | ||
946 | |||
947 | ret = register_cpu_notifier(&cci_pmu_cpu_nb); | ||
948 | if (ret) | ||
949 | return ret; | ||
614 | 950 | ||
615 | ret = cci_pmu_init(pmu->cci_pmu, pdev); | 951 | ret = cci_pmu_init(pmu, pdev); |
616 | if (ret) | 952 | if (ret) |
617 | return ret; | 953 | return ret; |
618 | 954 | ||